# Merging Mediapipe Output with ELAN Annotations

## Set up folders and check data formats

For this module we will only demonstrate the steps for the cartoon retelling example that we have in our multimedia samples. For this sample we have already generated 

* A) a motion tracking time series for a bunch of body keypoints sampling at 30Hz
* B) an amplitude envelope time series of speech which was sampled at 100Hz
* C) Then we also have annotations of the dominant hand (right handed gestures) that this person produced. 

So here we show a way to merge A, B, and C, in a way that is convenient for further analysis. Lets first identify the relevant files and set the relevant folders.

In [None]:
import glob
import os           #folder designating
import pandas as pd  #data wrangling and data framing
import numpy as np

In [2]:
MT_files = glob.glob("merging_dataset\\mediapipe\\*\\*hands.csv") #path to the mediapipe data

ANNO = pd.read_csv("merging_dataset\\ELAN\\sharice_multiparty_data.txt", sep='\t') #path to the annotations

FileNotFoundError: [Errno 2] No such file or directory: 'merging_dataset\\ELAN\\sharice_multiparty_data.txt'

In [None]:
ANNO.head()

Unnamed: 0,Begin Time - msec,End Time - msec,Director_Speech,Type,Referent,Phase,Hand,File
0,863,13681,"So this is, looks like a, like a sun on the ho...",,set1_target11.png,,,1130_JS_Director_set1_test
1,18500,25045,"This one kind of looks like a carrot, a bit. A...",,set1_target6.png,,,1130_JS_Director_set1_test
2,28681,40499,"Uh, this has kind of an obtuse-looking, um, li...",,set1_target12.png,,,1130_JS_Director_set1_test
3,44886,56886,"Uh, this one looks like a 3D cube in the middl...",,set1_target16.png,,,1130_JS_Director_set1_test
4,61727,76817,"Uh, this one kind of, at first glance, looks a...",,set1_filler6.png,,,1130_JS_Director_set1_test


In [None]:
MT_0 = pd.read_csv(MT_files[0])
MT_0.head()

Unnamed: 0,time,X_LEFT_WRIST,Y_LEFT_WRIST,Z_LEFT_WRIST,X_LEFT_THUMB_CMC,Y_LEFT_THUMB_CMC,Z_LEFT_THUMB_CMC,X_LEFT_THUMB_MCP,Y_LEFT_THUMB_MCP,Z_LEFT_THUMB_MCP,...,Z_RIGHT_PINKY_FINGER_MCP,X_RIGHT_PINKY_FINGER_PIP,Y_RIGHT_PINKY_FINGER_PIP,Z_RIGHT_PINKY_FINGER_PIP,X_RIGHT_PINKY_FINGER_DIP,Y_RIGHT_PINKY_FINGER_DIP,Z_RIGHT_PINKY_FINGER_DIP,X_RIGHT_PINKY_FINGER_TIP,Y_RIGHT_PINKY_FINGER_TIP,Z_RIGHT_PINKY_FINGER_TIP
0,0.0,,,,,,,,,,...,,,,,,,,,,
1,33.333333,,,,,,,,,,...,,,,,,,,,,
2,66.666667,,,,,,,,,,...,,,,,,,,,,
3,100.0,,,,,,,,,,...,,,,,,,,,,
4,133.333333,,,,,,,,,,...,,,,,,,,,,


In [None]:
pd.options.mode.copy_on_write = True

## Adding annotations and saving data



In [None]:
merged_data_list = []
merged_data = pd.DataFrame()  # Initialize an empty DataFrame

# this function loads in annotations and the original time of the timeseries dataframe, and returns annotations for the time series dataframe
def load_in_event(time_original, anno, col):
    output = np.full(len(time_original), np.nan, dtype=object)  # Initialize output array with NaN values

    for i in range(len(anno)):
        
        # Assign the gesture type if the time is between the begin and end time of the annotation 
        output[(time_original >= anno.loc[i, 'Begin Time - msec']) & (time_original <= anno.loc[i, 'End Time - msec'])] =anno.iloc[i, anno.columns.get_loc(col)]
    return output

for mt_file in MT_files:
    mdata = pd.read_csv(mt_file)
    fname = '_'.join(os.path.basename(mt_file).split('_')[:-1])
    adata = ANNO[ANNO['File'] == fname].reset_index(drop=True)

    merged_data = mdata[['time', 'X_LEFT_WRIST', 'Y_LEFT_WRIST', 'Z_LEFT_WRIST']]

    # apply the function to the merged dataframe
    merged_data['Referent'] = load_in_event(merged_data['time'], adata, 'Referent')
    merged_data['Director_Speech'] = load_in_event(merged_data['time'], adata, 'Director_Speech')
    merged_data['File'] = [fname] * len(merged_data)  # Add a column with the file name

    merged_data_list.append(merged_data)  
    
# Concatenate all DataFrames in the list into one DataFrame
final_merged_data = pd.concat(merged_data_list, ignore_index=True)

#lets save the data now we have everything merged
final_merged_data.to_csv('merging_dataset\\output\\merged_data.csv', index=False)

In [None]:
final_merged_data[final_merged_data['Referent'].notna()]  # Display rows with non-NaN values in the 'Director_Speech' column

Unnamed: 0,time,X_LEFT_WRIST,Y_LEFT_WRIST,Z_LEFT_WRIST,Referent,Director_Speech,File
26,866.666667,,,,set1_target11.png,"So this is, looks like a, like a sun on the ho...",1130_JS_Director_set1_test
27,900.000000,,,,set1_target11.png,"So this is, looks like a, like a sun on the ho...",1130_JS_Director_set1_test
28,933.333333,,,,set1_target11.png,"So this is, looks like a, like a sun on the ho...",1130_JS_Director_set1_test
29,966.666667,,,,set1_target11.png,"So this is, looks like a, like a sun on the ho...",1130_JS_Director_set1_test
30,1000.000000,,,,set1_target11.png,"So this is, looks like a, like a sun on the ho...",1130_JS_Director_set1_test
...,...,...,...,...,...,...,...
58601,438767.321591,0.431728,0.581675,5.363898e-08,set1_target2.png,"Uh, it's a black square and there's white ring...",5137_JJ_Director_set1_test
58602,438800.654974,0.430130,0.594905,4.861727e-08,set1_target2.png,"Uh, it's a black square and there's white ring...",5137_JJ_Director_set1_test
58603,438833.988357,0.428552,0.608484,5.894018e-08,set1_target2.png,"Uh, it's a black square and there's white ring...",5137_JJ_Director_set1_test
58604,438867.321740,0.428470,0.624811,4.816133e-08,set1_target2.png,"Uh, it's a black square and there's white ring...",5137_JJ_Director_set1_test
