In [64]:
import pickle
import pandas as pd
import numpy as np
from copy import deepcopy
import math
from sklearn.model_selection import train_test_split

activities_to_classify = {
        'Get/replace items from refrigerator/cabinets/drawers': 0,
        'Peel a cucumber': 1,
        'Clear cutting board': 2,
        'Slice a cucumber': 3,
        'Peel a potato': 4,
        'Slice a potato': 5,
        'Slice bread': 6,
        'Spread almond butter on a bread slice': 7,
        'Spread jelly on a bread slice': 8,
        'Open/close a jar of almond butter': 9,
        'Pour water from a pitcher into a glass': 10,
        'Clean a plate with a sponge': 11,
        'Clean a plate with a towel': 12,
        'Clean a pan with a sponge': 13,
        'Clean a pan with a towel': 14,
        'Get items from cabinets: 3 each large/small plates, bowls, mugs, glasses, sets of utensils': 15,
        'Set table: 3 each large/small plates, bowls, mugs, glasses, sets of utensils': 16,
        'Stack on table: 3 each large/small plates, bowls': 17,
        'Load dishwasher: 3 each large/small plates, bowls, mugs, glasses, sets of utensils': 18,
        'Unload dishwasher: 3 each large/small plates, bowls, mugs, glasses, sets of utensils': 19,
}

FPS = 30
ACTION_LENGTH = 5

def augmentation(data_frame):
    augmented_data = []

    for _, row in data_frame.iterrows():
        start_frame = row['start_frame']
        stop_frame = row['stop_frame']
        interval_size = FPS * ACTION_LENGTH 

        num_intervals = math.ceil((stop_frame - start_frame + 1) / interval_size)

        for i in range(num_intervals):
            new_start = start_frame + i * interval_size
            new_stop = min(new_start + interval_size - 1, stop_frame)  
            new_row = row.copy()
            new_row['start_frame'] = new_start
            new_row['stop_frame'] = new_stop
            new_row['start_timestamp'] = new_start/FPS
            new_row['stop_timestamp'] = new_stop/FPS
            augmented_data.append(new_row)

    augmented_dataframe = pd.DataFrame(augmented_data, columns=data_frame.columns)
    augmented_dataframe.reset_index(drop=True, inplace=True)

    return augmented_dataframe

def create_annotations_file(timestamp_file, annotations_file, type='train'):

    timestamps = pd.read_pickle(timestamp_file)
    timestamps = timestamps.drop(
        ['myo_left_timestamps', 'myo_right_timestamps', 'myo_left_readings', 'myo_right_readings'], axis=1)
    timestamps = timestamps.reset_index()

    start_timestamp = timestamps['start'].iloc[0]
    timestamps['start_frame'] = ((timestamps['start'] - start_timestamp) * FPS).astype(int)
    timestamps['stop_frame'] = ((timestamps['stop'] - start_timestamp) * FPS).astype(int)
    
    annotations = pd.read_pickle(annotations_file)
    annotations = annotations[annotations['file'] == 'S04_1.pkl']
    annotations = annotations.drop(['labels'], axis=1)
    
    complete_labels = pd.merge(timestamps, annotations, on='index', how='inner')
    
    complete_labels['uid'] = complete_labels['index']
    complete_labels['participant_id'] = 'S04'
    complete_labels['video_id'] = 'S04_1'
    complete_labels['verb'] = complete_labels['description_x']
    complete_labels['narration'] = complete_labels['description_x']
    complete_labels['verb_class'] = complete_labels['verb'].map(activities_to_classify)

    complete_labels = complete_labels[
        ['uid', 'participant_id', 'video_id', 'narration', 'start', 'stop', 'start_frame',
         'stop_frame', 'verb', 'verb_class']]
    
    complete_labels['type'] = type
    
    return complete_labels

def change_uid_to_emg(emg_data, split):
    emg_data = pd.read_pickle(emg_data)
    emg_data['uid'] = emg_data.reset_index().index + 1
    emg_data.to_pickle(f'new_emg_data_{split}.pkl')    


def take_S04_annotations_RGB(timestamps, emg_data, type):
    
    calibration_val = pd.read_pickle(timestamps)['start'].iloc[0]

    emg_data = pd.read_pickle(emg_data)
    
    emg_data = emg_data[emg_data['file'] == 'S04_1.pkl']
    
    emg_data = emg_data.rename(columns={'file': 'video_id', 'description': 'narration', 'description_class': 'verb_class'})
    emg_data['participant_id'] = 'S04'
    emg_data['video_id'] = 'S04_1'
    emg_data['start_frame'] = ((emg_data['start'] - calibration_val) * FPS).astype(int)
    emg_data['stop_frame'] = ((emg_data['stop'] - calibration_val) * FPS).astype(int)
    emg_data['verb'] = emg_data['narration']
    emg_data = emg_data.drop(['emg_data'], axis = 1)
    emg_data = emg_data[
        ['uid', 'participant_id', 'video_id', 'narration', 'start', 'stop', 'start_frame',
         'stop_frame', 'verb', 'verb_class']]
    
    emg_data.to_pickle(f'action_net_annotations_training/S04_{type}.pkl')    

def create_reduced_annotations(train_annotations, test_annotations):
    
    combined_df = pd.concat([train_annotations, test_annotations], ignore_index=True)
    
    combined_df = combined_df.sample(frac=1)

    combined_df.reset_index(inplace=True)
    combined_df['uid'] = combined_df.index
    
    train_df_final = combined_df[combined_df['type'] == 'train']
    test_df_final = combined_df[combined_df['type'] == 'test']
    
    train_df_final = train_df_final.drop(['type'], axis=1)
    test_df_final = test_df_final.drop(['type'], axis=1)

    train_df_final.to_pickle(f"action_net_annotations_training/S04_train.pkl")    
    test_df_final.to_pickle(f"action_net_annotations_training/S04_test.pkl")   
    
def create_multimodal_annotations(full_data, split, spectogram):
    full_data = pd.read_pickle(full_data)
    full_data = full_data[full_data['file'] == 'S04_1.pkl']
    full_data = full_data.rename(columns={'description_class': 'verb_class'})
    full_data['participant_id'] = 'S04'
    full_data['video_id'] = 'S04_1'
    
    create_emg_features(full_data, split, spectogram)
    
    final_annotations = full_data[
        ['uid', 'participant_id', 'video_id', 'description', 'verb_class']]
    
    final_annotations.to_pickle(f"an_multimodal_annotations/S04_{split}.pkl")   
    
def create_emg_features(full_data, split, spectogram=False):
    
    full_data = pd.read_pickle(full_data)
    full_data = full_data[full_data['file'] == 'S04_1.pkl']
    full_data = full_data.rename(columns={'emg_data': 'features_EMG'})
    
    emg_features = full_data[
        ['uid', 'features_EMG']]
    emg_features = emg_features.to_dict('list')
    emg_features = {'features': emg_features}

    features_name = f'saved_features_an_multimodal/features_emg_spectogram_S04_{split}.pkl' if spectogram \
        else f'saved_features_an_multimodal/features_emg_S04_{split}.pkl'
      
    with open(features_name, 'wb') as f:
        pickle.dump(emg_features, f)

In [22]:
change_uid_to_emg('new_emg_data_test.pkl', split='test')
change_uid_to_emg('new_emg_data_train.pkl', split='train')

In [45]:
take_S04_annotations_RGB(timestamps='action-net/S04_1.pkl', emg_data='new_emg_data_test.pkl', type='test')
take_S04_annotations_RGB(timestamps='action-net/S04_1.pkl', emg_data='new_emg_data_train.pkl', type='train')

In [88]:
create_multimodal_annotations(full_data="new_emg_data_train.pkl", split='train')
create_multimodal_annotations(full_data="new_emg_data_test.pkl", split='test')
create_emg_features(full_data="new_emg_data_train.pkl", split='train')
create_emg_features(full_data="new_emg_data_test.pkl", split='test')

In [65]:
create_emg_features(full_data="new_emg_data_train.pkl", split='train')
create_emg_features(full_data="new_emg_data_test.pkl", split='test')

In [46]:
data2 = pd.read_pickle('action_net_annotations_training/S04_test.pkl')

data2

Unnamed: 0,uid,participant_id,video_id,narration,start,stop,start_frame,stop_frame,verb,verb_class
14,15,S04,S04_1,Open a jar of almond butter,1.655241e+09,1.655241e+09,69539,69837,Open a jar of almond butter,8
22,23,S04,S04_1,Clear cutting board,1.655241e+09,1.655241e+09,60114,60410,Clear cutting board,18
34,35,S04,S04_1,"Set table: 3 each large/small plates, bowls, m...",1.655242e+09,1.655242e+09,88254,88551,"Set table: 3 each large/small plates, bowls, m...",15
37,38,S04,S04_1,Spread almond butter on a bread slice,1.655241e+09,1.655241e+09,64379,64677,Spread almond butter on a bread slice,6
39,40,S04,S04_1,Open a jar of almond butter,1.655241e+09,1.655241e+09,69431,69729,Open a jar of almond butter,8
...,...,...,...,...,...,...,...,...,...,...
850,851,S04,S04_1,"Set table: 3 each large/small plates, bowls, m...",1.655242e+09,1.655242e+09,88506,88803,"Set table: 3 each large/small plates, bowls, m...",15
855,856,S04,S04_1,Open a jar of almond butter,1.655241e+09,1.655241e+09,69530,69828,Open a jar of almond butter,8
880,881,S04,S04_1,Open a jar of almond butter,1.655241e+09,1.655241e+09,69503,69801,Open a jar of almond butter,8
889,890,S04,S04_1,Spread almond butter on a bread slice,1.655241e+09,1.655241e+09,64334,64632,Spread almond butter on a bread slice,6


In [42]:
data2 = pd.read_pickle('new_emg_data_test.pkl')

data2 = data2[data2['file'] == 'S04_1.pkl']
data2

Unnamed: 0,index,file,description,labels,start,stop,emg_data,description_class,uid
14,34,S04_1.pkl,Open a jar of almond butter,Open/Close,1.655241e+09,1.655241e+09,"[[-0.5310362499607831, -0.17112101585733755, -...",8,15
22,23,S04_1.pkl,Clear cutting board,Clear,1.655241e+09,1.655241e+09,"[[-0.06765700081485183, -0.19264746771383182, ...",18,23
34,56,S04_1.pkl,"Set table: 3 each large/small plates, bowls, m...",Set,1.655242e+09,1.655242e+09,"[[-0.8781975985563716, -0.8737516029999017, -0...",15,35
37,26,S04_1.pkl,Spread almond butter on a bread slice,Spread,1.655241e+09,1.655241e+09,"[[-0.5560823621930757, -0.3788598220978606, -0...",6,38
39,34,S04_1.pkl,Open a jar of almond butter,Open/Close,1.655241e+09,1.655241e+09,"[[-0.6652617227946247, -0.35463073586700183, -...",8,40
...,...,...,...,...,...,...,...,...,...
850,56,S04_1.pkl,"Set table: 3 each large/small plates, bowls, m...",Set,1.655242e+09,1.655242e+09,"[[-0.8913982997448366, -0.8646481789321555, -0...",15,851
855,34,S04_1.pkl,Open a jar of almond butter,Open/Close,1.655241e+09,1.655241e+09,"[[-0.32349131425931404, -0.19182720534648318, ...",8,856
880,34,S04_1.pkl,Open a jar of almond butter,Open/Close,1.655241e+09,1.655241e+09,"[[-0.5547529179641489, -0.3992432882881073, -0...",8,881
889,26,S04_1.pkl,Spread almond butter on a bread slice,Spread,1.655241e+09,1.655241e+09,"[[-0.3703662356774267, -0.49132828506191123, -...",6,890


In [66]:
data2 = pd.read_pickle('saved_features_an_multimodal/features_emg_S04_test.pkl')

data2 = pd.DataFrame.from_dict(data2)
data2['features']

features_EMG    [[[-0.5310362499607831, -0.17112101585733755, ...
uid             [15, 23, 35, 38, 40, 48, 49, 68, 109, 115, 126...
Name: features, dtype: object

In [19]:
data2 = pd.read_pickle('action_net_annotations_training/S04_test.pkl')

data2

Unnamed: 0,index,uid,participant_id,video_id,narration,start,stop,start_frame,stop_frame,verb,verb_class
14,27,14,S04,S04_1,Clear cutting board,1.655241e+09,1.655241e+09,60114,60410,Clear cutting board,18
19,74,19,S04,S04_1,"Set table: 3 each large/small plates, bowls, m...",1.655242e+09,1.655242e+09,89644,89941,"Set table: 3 each large/small plates, bowls, m...",15
20,69,20,S04,S04_1,Spread almond butter on a bread slice,1.655241e+09,1.655241e+09,64322,64620,Spread almond butter on a bread slice,6
22,39,22,S04,S04_1,Spread almond butter on a bread slice,1.655241e+09,1.655241e+09,64439,64737,Spread almond butter on a bread slice,6
36,10,36,S04,S04_1,"Set table: 3 each large/small plates, bowls, m...",1.655242e+09,1.655242e+09,87366,87663,"Set table: 3 each large/small plates, bowls, m...",15
...,...,...,...,...,...,...,...,...,...,...,...
772,63,772,S04,S04_1,Open a jar of almond butter,1.655241e+09,1.655241e+09,69512,69810,Open a jar of almond butter,8
781,16,781,S04,S04_1,Clear cutting board,1.655241e+09,1.655241e+09,60198,60494,Clear cutting board,18
783,67,783,S04,S04_1,Clear cutting board,1.655241e+09,1.655241e+09,60252,60548,Clear cutting board,18
786,58,786,S04,S04_1,"Set table: 3 each large/small plates, bowls, m...",1.655242e+09,1.655242e+09,87999,88296,"Set table: 3 each large/small plates, bowls, m...",15
