## Generate videos for each aligned gesture (for real pairs)

### Import packages and define paths

In [1]:
import os
import pandas as pd
from moviepy.editor import VideoFileClip, clips_array
from tqdm import tqdm

### define path to video file (two levels up from current directory)
mediapipe_folder = "../data/mediapipe/output_videos/"
output_folder = '../data/videos/'

### Concatenate two videos into one to display two videos side by side
Now, we will concatenate two videos to display them next to each other. This is to check with the eyes whether aligned gestures look alike and whether the pose was properly estimated

In [3]:
### prepare the dataframe containing the gesture alignment data
# gest_align_file = '../data/elan_annotation/elan_annotation_gestural_alignment_processed.csv'
# df_gest_align = pd.read_csv(gest_align_file)

dtw_file_path = '../data/processed/05_dtw_distance/dtw_distance.csv'
dtw_file = pd.read_csv(dtw_file_path)

### for each row in the dataframe, extract the pair, comparison_id, and start and end times
for i, row in tqdm(dtw_file.iterrows(), total=len(dtw_file)):
    pair = f"{row['pair']:03d}"
    comparison_id = row['comparison_id']
    average_distance = row['average_distance']
    speaker_1 = row['speaker_1'].lower()
    speaker_2 = row['speaker_2'].lower()
    round_1 = f"R{int(row['round_1'])}"
    round_2 = f"R{int(row['round_2'])}"
    start_time_1 = row['begin_time_1_adj']/1000 # convert to seconds
    end_time_1 = row['end_time_1_adj']/1000 # convert to seconds
    start_time_2 = row['begin_time_2_adj']/1000 # convert to seconds
    end_time_2 = row['end_time_2_adj']/1000 # convert to seconds

    ### get the video files for gesture
    video_file_1 = mediapipe_folder + f'{pair}_{speaker_1}.mp4'
    video_file_2 = mediapipe_folder + f'{pair}_{speaker_2}.mp4'

    ### load the video file and extract the relevant portion
    video_1 = VideoFileClip(video_file_1).subclip(start_time_1, end_time_1)
    video_2 = VideoFileClip(video_file_2).subclip(start_time_2, end_time_2)

    ### combine the video clips and save the comined video
    combined_video = clips_array([[video_1, video_2]])
    output_path = output_folder + f'{pair}_{comparison_id}_{round_1}{round_2}_{round(average_distance, 2)}.mp4'
    combined_video.write_videofile(output_path, codec='libx264', verbose=False, logger=None)

    ### close the videos
    video_1.close()
    video_2.close()
    combined_video.close()

100%|██████████| 1304/1304 [43:35<00:00,  2.01s/it] 


### Generate videos for non-aligned gesture pairs

In [None]:
dtw_file = '../data/processed/06_dtw_distance/dtw_distance_non_aligned_gestures.csv'
df_dtw = pd.read_csv(dtw_file)

### for each row in the dataframe, extract the pair, comparison_id, and start and end times
for i, row in tqdm(df_dtw.iterrows(), total=len(df_dtw)):
    pair = row['pair_x']
    comparison_id = row['comparison_id']
    average_distance = row['average_distance']
    speaker_1 = row['speaker_1']
    speaker_2 = row['speaker_2']
    round_1 = row['round_1']
    round_2 = row['round_2']
    referent_1 = row['A_gesture_referent_1'] if row['A_gesture_referent_1'] != "" else row['B_gesture_referent_1']
    referent_2 = row['A_gesture_referent_2'] if not row['A_gesture_referent_2'] != "" else row['B_gesture_referent_2']
    start_time_1 = row['begin_time_1']/1000 # convert to seconds
    end_time_1 = row['end_time_1']/1000 # convert to seconds
    start_time_2 = row['begin_time_2']/1000 # convert to seconds
    end_time_2 = row['end_time_2']/1000 # convert to seconds

    ### get the video files for gesture
    video_file_1 = mediapipe_folder + f'{pair}_synced_pp{speaker_1}.mp4'
    video_file_2 = mediapipe_folder + f'{pair}_synced_pp{speaker_2}.mp4'

    ### load the video file and extract the relevant portion
    video_1 = VideoFileClip(video_file_1).subclip(start_time_1, end_time_1)
    video_2 = VideoFileClip(video_file_2).subclip(start_time_2, end_time_2)

    ### combine the video clips and save the comined video
    combined_video = clips_array([[video_1, video_2]])
    output_path = output_non_aligned_folder + f'{pair}_{comparison_id}_{round_1}{round_2}_{referent_1}{referent_2}_{round(average_distance, 2)}.mp4'
    combined_video.write_videofile(output_path, codec='libx264', verbose=False, logger=None)

    ### close the videos
    video_1.close()
    video_2.close()
    combined_video.close()

  0%|          | 0/1121 [00:00<?, ?it/s]

100%|██████████| 1121/1121 [46:21<00:00,  2.48s/it]
