## Subseq Dict Generation

This script takes a singerwise csv file and converts it into a "subseq dict" (subsequence dictionary) pickle file for that singer

The keys of the subseq dict file are tuples in the following format (one tuple corresponds to one stable or non-stable note):

**(filename, index, label)**

label is st or ns corresponding to stable or non-stable

The values of the subseq dict file are dataframes having the pitch contour and gesture contours for that segment. This is just to organise the data better for downstream tasks

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import warnings
warnings.filterwarnings('ignore')
import time
import pickle

pd.set_option('display.max_columns', None)

from sklearn.model_selection import train_test_split, cross_val_score, KFold, GridSearchCV
from sklearn.metrics import confusion_matrix, classification_report, f1_score, accuracy_score
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.svm import SVC
from sklearn.inspection import permutation_importance

In [2]:
for singer in ['AG','AK','AP','CC','MG','MP','NM','RV','SCh','SM','SS']:
    
    print(singer)
    df = pd.read_csv(f'./../../Data/check_output_Jan20/check_output_v2/{singer}_gesture_pitch_pd_with_vel_accln.csv')
#     df = df.drop(columns = [x for x in df.columns.values if 'Elbow' in x])
    ## Uncomment above line if you want to take only wrist and drop elbow contours
    
    filelist = list(df['filename'].unique())

    notes = []
    for filename in filelist:
        
        # Reading stable note and SDS timestamps
        sds = pd.read_csv(f'./SDS_Dataframes/{filename}')
        stable = pd.read_csv(f'./Stable_Notes/{filename}')
        
        # Making dicts for start and end timestamps for SDS 
        # and Stable notes separately to make a "timestamp dict"
        # which will be used in marking stable and non-stable notes
        start_times_sds = dict([(round(float(x['Start']),2),'start_sds') for _,x in sds.iterrows()])
        end_times_sds = dict([(round(float(x['End']),2),'end_sds') for _,x in sds.iterrows()])
        start_times_st = dict([(round(float(x['Start']),2),'start_st') for _,x in stable.iterrows()])
        end_times_st = dict([(round(float(x['End']),2),'end_st') for _,x in stable.iterrows()])
        timestamp_dict = start_times_sds | end_times_sds | start_times_st | end_times_st
        
        myKeys = list(timestamp_dict.keys())
        myKeys.sort()
        sorted_dict = {i: timestamp_dict[i] for i in myKeys}

        st = None
        ns = None
        
        # Iterate over each timestamp to mark stable and non-stable notes
        for timestamp in list(sorted_dict.keys()):
            if sorted_dict[timestamp] == 'start_sds':
                ns = timestamp
            elif sorted_dict[timestamp] == 'start_st':
                st = timestamp
                notes.append([ns,timestamp,'ns',filename])
            elif sorted_dict[timestamp] == 'end_st':
                notes.append([st,timestamp,'st',filename])
            elif sorted_dict[timestamp] == 'end_sds':
                notes.append([ns,timestamp,'ns',filename])

    notes = pd.DataFrame(notes,columns=['Start','End','Target','Filename'])
    print('Dataframe created')
    
    notes['Duration'] = notes['End']-notes['Start']
    # Drop notes which are not in the range [0.5,5]s
    notes = notes.loc[notes['Duration'].apply(lambda x: x >= 0.5 and x < 5)].reset_index(drop=True)

    subseq_dict_new = {}
    
    # Gesture processing starts here
    for filename in filelist:

        notes_this_file = notes.loc[notes['Filename']==filename].reset_index(drop=True)
        data_this_file = df.loc[df['filename']==filename].reset_index(drop=True)
        
        for i in notes_this_file.index.values:
            # Cut out dataframe for this note from the overall data for this alap
            note_gesture = data_this_file.loc[data_this_file['time'].apply(lambda x: x >= notes_this_file['Start'].iloc[i]\
                                                             and x <= notes_this_file['End'].iloc[i])]
            # Make an entry for this note in our "subseq dict"
            subseq_dict_new[(filename, i, notes_this_file['Target'].iloc[i])] = note_gesture
        print(filename)
        
    # Save the subseq dict file for further tasks
    with open(f'./Subseq_Dict_With_Elbow/subseq_dict_{singer}.pkl', 'wb') as file:
        pickle.dump(subseq_dict_new, file)
    print()

AG
Dataframe created
AG_Aalap1_Bag.csv
AG_Aalap1_Bahar.csv
AG_Aalap1_Bilas.csv
AG_Aalap1_Jaun.csv
AG_Aalap1_Kedar.csv
AG_Aalap1_MM.csv
AG_Aalap1_Marwa.csv
AG_Aalap1_Nand.csv
AG_Aalap1_Shree.csv
AG_Aalap2_Bag.csv
AG_Aalap2_Bahar.csv
AG_Aalap2_Bilas.csv
AG_Aalap2_Jaun.csv
AG_Aalap2_MM.csv
AG_Aalap2_Marwa.csv
AG_Aalap2_Nand.csv
AG_Aalap2_Shree.csv
AG_Pakad_Bag.csv
AG_Pakad_Bahar.csv
AG_Pakad_Bilas.csv
AG_Pakad_Jaun.csv
AG_Pakad_Kedar.csv
AG_Pakad_MM.csv
AG_Pakad_Marwa.csv
AG_Pakad_Nand.csv
AG_Pakad_Shree.csv

AK
Dataframe created
AK_Aalap1_Bag.csv
AK_Aalap1_Bahar.csv
AK_Aalap1_Bilas.csv
AK_Aalap1_Jaun.csv
AK_Aalap1_Kedar.csv
AK_Aalap1_MM.csv
AK_Aalap1_Marwa.csv
AK_Aalap1_Nand.csv
AK_Aalap1_Shree.csv
AK_Aalap2_Bag.csv
AK_Aalap2_Bahar.csv
AK_Aalap2_Bilas.csv
AK_Aalap2_Jaun.csv
AK_Aalap2_Kedar.csv
AK_Aalap2_MM.csv
AK_Aalap2_Marwa.csv
AK_Aalap2_Nand.csv
AK_Aalap2_Shree.csv
AK_Pakad_Bag.csv
AK_Pakad_Bahar.csv
AK_Pakad_Bilas.csv
AK_Pakad_Jaun.csv
AK_Pakad_Kedar.csv
AK_Pakad_MM.csv
AK_Pakad_Marw