In [None]:
import numpy as np
import pandas as pd
import tensorflow
np.random.seed(0)
tensorflow.random.set_seed(0)
from scipy import stats
from sklearn.preprocessing import MinMaxScaler
from sklearn import svm
from sklearn.metrics import recall_score, confusion_matrix
from sklearn.calibration import CalibratedClassifierCV
from sklearn.model_selection import train_test_split
import itertools
from itertools import combinations, product
import glob
import os.path
import sys
import tensorflow as tf
from keras.models import Sequential, Model
from tensorflow.keras import layers
from keras.layers import Dense, LSTM, Dropout,GRU,TimeDistributed, SimpleRNN,Bidirectional,RepeatVector,Flatten
from keras import optimizers
from tensorflow.keras.regularizers import l2
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelBinarizer
from sklearn.preprocessing import OneHotEncoder,OrdinalEncoder

In [None]:
classes = ['L', 'M', 'H']

# Task
task_name = 'ComParE2020_USOMS-e'

# Enter your team name HERE
team_name = 'NYIT'

# Enter your submission number HERE
submission_index = 1
# Option
show_confusion = True  # Display confusion matrix on devel
majority_vote_story_id = True # Perform a majority vote over all audio file prediction, based on SpeakerID + Story
# Configuration

complexities = [1e-5,1e-4,1e-3,1e-2,1e-1,1e0]

# Mapping each available feature set to tuple (number of features, offset/index of first feature, separator, header option)
feat_conf_c = {'ComParE':      (6373, 1, ';', 'infer')}
feat_conf_d = {'DeepSpectrum_resnet50': (2048, 1, ',', 'infer')}

feat_conf_b = {'BoAW-125':     ( 250, 1, ';',  None),
               'BoAW-250':     ( 500, 1, ';',  None),
               'BoAW-500':     (1000, 1, ';',  None),
               'BoAW-1000':    (2000, 1, ';',  None),
               'BoAW-2000':    (4000, 1, ';',  None)}

feat_conf_a ={'auDeep-30':    (1024, 2, ',', 'infer'),
              'auDeep-45':    (1024, 2, ',', 'infer'),
              'auDeep-60':    (1024, 2, ',', 'infer'),
              'auDeep-75':    (1024, 2, ',', 'infer'),
              'auDeep-fused': (4096, 2, ',', 'infer')}

fisher_path = './fisher_vector/'
fisher_option= [8,16,32,64]

feat_conf_f = {f'fisher_vector_{fisher_option[0]}': (),
               f'fisher_vector_{fisher_option[1]}': (),
               f'fisher_vector_{fisher_option[2]}': (),
               f'fisher_vector_{fisher_option[3]}': ()}

# adding early fusion features
feat_conf_vv = {'auDeep-45-BoAW-500-ComParE-DeepSpectrum_resnet50': (),
               'DeepSpectrum_resnet50-auDeep-75-BoAW-250': (),
               'auDeep-60-BoAW-250-ComParE-DeepSpectrum_resnet50': ()}
feat_conf_aa = {'DeepSpectrum_resnet50-auDeep-75-BoAW-125': (),
               'DeepSpectrum_resnet50-auDeep-75-BoAW-250': (),
               'DeepSpectrum_resnet50-auDeep-60-BoAW-250': ()}

feat_conf_vcat = {**feat_conf_a,**feat_conf_b,**feat_conf_c,**feat_conf_d,**feat_conf_f,**feat_conf_vv}
feat_conf_acat = {**feat_conf_a,**feat_conf_b,**feat_conf_c,**feat_conf_d,**feat_conf_f,**feat_conf_aa}

# Path of the features and labels
features_path = '../features/'
label_file = '../lab/labels.csv'
df_labels = pd.read_csv(label_file)


# Labels
label_options = ['V_cat','A_cat']
# ,'V_cat'
UAR_A_cat = []
UAR_V_cat = []

In [None]:
feat_conf = [feat_conf_vcat,feat_conf_acat]

In [None]:
# Where to save the figures
PROJECT_ROOT_DIR = "."
IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, "images")
os.makedirs(IMAGES_PATH, exist_ok=True)

def save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=300):
    path = os.path.join(IMAGES_PATH, fig_id + "." + fig_extension)
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format=fig_extension, dpi=resolution)

In [None]:
# Reading csv file path

train_file = []
devel_file = []
for s in range(len(label_options)):
    current_label = label_options[s]
    csv_train = []
    csv_devel = [] 
    for i in list(dict.keys(feat_conf[s])):
            current_feature = i
            print(i)
            csv_train+= glob.glob(os.path.join('.\\Decision_Fun_CSV\\Train\\', f"{current_feature}_{current_label}_*"))
            csv_devel+= glob.glob(os.path.join('.\\Decision_Fun_CSV\\Devel\\', f"{current_feature}_{current_label}_*"))
    train_file.append(csv_train)
    devel_file.append(csv_devel)

In [None]:
# Preparing Features

X_rnn_train =[]
X_rnn_devel =[]
for t_f, d_f in zip(train_file,devel_file):
    X_f_train = []
    X_f_devel = []
    for s_f, e_f in zip(t_f,d_f):
        temp_train = pd.read_csv(s_f,index_col=0).values
        temp_devel = pd.read_csv(e_f,index_col=0).values
        X_f_train.append(temp_train)
        X_f_devel.append(temp_devel)
    X_rnn_train.append(X_f_train)
    X_rnn_devel.append(X_f_devel)

In [None]:
# RNN & LSTM Algorithm Implementation

feature_best_UAR_cat = []
prob_funsion_cat = []
UAR_p_a =[]
UAR_p_v =[]
cfs_a = []
cfs_v = []
pred_a_train=[]
pred_v_train=[]
pred_a_devel=[]
pred_v_devel=[]

for count1 in range(len(X_rnn_train)) :
    current_label = label_options[count1]
    print('\nRunning ' + task_name+' '+current_label+ ' LSTM' + ' baseline ... (this might take a while) \n')

    feature_best_UAR = []
    prob_story_fusion = []
    UAR_p = []
    cfs_matrix = []
    predic_train = []
    predic_devel = []
    for i in range(len(X_rnn_train[count1])):
        current_feature = list(dict.keys(feat_conf[count1]))[i]
        
        y_train = df_labels[current_label][df_labels['filename_audio'].str.startswith('train')].values
        y_devel = df_labels[current_label][df_labels['filename_audio'].str.startswith('devel')].values
        train_audio = df_labels['filename_audio'][df_labels['filename_audio'].str.startswith('train')].values
        train_text =  df_labels['filename_text'][df_labels['filename_audio'].str.startswith('train')].values

        # Upsampling / Balancing
        print('Upsampling ... ')
        num_samples_train = []
        for label in classes:
            num_samples_train.append(len(y_train[y_train == label]))
        for label, ns_tr in zip(classes, num_samples_train):
            factor_tr = np.max(num_samples_train) // ns_tr
            train_audio = np.concatenate((train_audio, np.tile(train_audio[y_train == label], (factor_tr - 1))))
            train_text = np.concatenate((train_text, np.tile(train_text[y_train == label], (factor_tr - 1))))
            y_train = np.concatenate((y_train, np.tile(y_train[y_train == label], (factor_tr - 1))))
       
        scaler = MinMaxScaler()
        X_train = scaler.fit_transform(X_rnn_train[count1][i])
        X_devel = scaler.transform(X_rnn_devel[count1][i])
    
        X_train = pd.DataFrame(X_train)
        X_devel = pd.DataFrame(X_devel)
        
        df_with_stories_train= pd.DataFrame(
        data={'filename_audio': train_audio,
          'filename_text': train_text,  # filename_text == ID_Story
          'L': X_train.iloc[:, 1],
          'M': X_train.iloc[:, 2],
          'H': X_train.iloc[:, 0],
          'true': y_train.flatten()},columns=['filename_audio', 'filename_text', 'L', 'M','H','true'])

        df_with_stories_devel= pd.DataFrame(
        data={'filename_audio': df_labels['filename_audio'][df_labels['filename_audio'].str.startswith('devel')].values,
          'filename_text': df_labels['filename_text'][df_labels['filename_audio'].str.startswith('devel')].values,  # filename_text == ID_Story
          'L': X_devel.iloc[:, 1],
          'M': X_devel.iloc[:, 2],
          'H': X_devel.iloc[:, 0],
          'true': y_devel.flatten()},columns=['filename_audio', 'filename_text', 'L', 'M','H','true'])
       
        #create feature with statistics of the probabilities of each label class for each story 
        stories_train = df_with_stories_train['filename_text'].unique()
        stories_devel= df_with_stories_devel['filename_text'].unique()
        max_length_train = sorted(df_with_stories_train.groupby(['filename_text'])['filename_text'].value_counts(),reverse=True)[0]
        max_length_devel = sorted(df_with_stories_devel.groupby(['filename_text'])['filename_text'].value_counts(),reverse=True)[0]
        feature_stories_train = []
        feature_stories_devel = []
        
        print('Working on Train_story_prob')
        for story in stories_train:
            story_senten_prob_train = pd.DataFrame()
            sentences = df_with_stories_train.groupby(['filename_text'])['L','M','H'].get_group(story).to_numpy()
            s_length = sentences.shape[0]
            dim_p = max_length_train-s_length
            sentences = np.vstack((sentences,pd.np.zeros([dim_p,3])))
            feature_stories_train.append(sentences)

        print('Working on Devel_story_prob')    
        for story in stories_devel:
            story_senten_prob_devel = pd.DataFrame()
            sentences = df_with_stories_devel.groupby(['filename_text'])['L','M','H'].get_group(story).to_numpy()
            s_length = sentences.shape[0]
            dim_p = max_length_train-s_length
            sentences = np.vstack((sentences,pd.np.zeros([dim_p,3])))
            feature_stories_devel.append(sentences)            
        
        story_label_train = df_with_stories_train.groupby(['filename_text'])['true'].agg(
            lambda x: x.value_counts().sort_index().sort_values(ascending=False, kind='mergesort').index[0]).values

        story_label_devel = df_with_stories_devel.groupby(['filename_text'])['true'].agg(
            lambda x: x.value_counts().sort_index().sort_values(ascending=False, kind='mergesort').index[0]).values
        

        lb = LabelBinarizer()
        binLabel_train = lb.fit_transform(story_label_train)
        binLabel_devel = lb.transform(story_label_devel)
        

        feature_stories_train = np.asarray(feature_stories_train)
        feature_stories_devel = np.asarray(feature_stories_devel)
        
        para = []
        uar_SVM_story = []
        prob_SVM_story = []
        
        
        print(f'current feature sets: '+current_feature)
        model = Sequential()
        model.add(LSTM(64,kernel_regularizer=l2(0.0003),recurrent_regularizer=l2(0.0003),input_shape=(feature_stories_train.shape[1],feature_stories_train.shape[2])))
#         model.add(LSTM(8,kernel_regularizer=l2(0.0003),recurrent_regularizer=l2(0.0003))),return_sequences=True
        
        model.add(Dropout(0.3))
        model.add(Dense(3, activation='softmax'))
        optimizer = optimizers.Adam(learning_rate=0.001)
        model.compile(loss='categorical_crossentropy',optimizer=optimizer,metrics=['accuracy'])
        history = model.fit(feature_stories_train, binLabel_train, epochs=200, batch_size=32, validation_data=(feature_stories_devel, binLabel_devel), verbose=1, shuffle=False)

        plt.plot(history.history['loss'], label='train')
        plt.plot(history.history['val_loss'], label='test')
        plt.legend()
        save_fig(f'feature sets_{current_feature}_{current_label}_training_LSTM')
        plt.show()

        y_pred_p_train = model.predict(feature_stories_train)
        y_pred_p_devel = model.predict(feature_stories_devel)
                
        y_pred_f_train = []
        classes_pred=['H','L','M']
        for r in range(len(y_pred_p_train)):
            y_pred_f_train.extend(classes_pred[np.argmax(y_pred_p_train[r])])

        
        y_pred_f_devel = []
        for s in range(len(y_pred_p_devel)):
            y_pred_f_devel.extend(classes_pred[np.argmax(y_pred_p_devel[s])])
            
        y_test_p_r = lb.inverse_transform(binLabel_devel)
        
        score_p = recall_score(y_test_p_r, y_pred_f_devel, labels=classes, average='macro')
        UAR_p.append(score_p)
        print(score_p*100)
        print(classes)
        print(confusion_matrix(y_test_p_r, y_pred_f_devel, labels=classes))
        cfs_matrix.append(confusion_matrix(y_test_p_r, y_pred_f_devel, labels=classes))
        predic_train.append(y_pred_p_train)
        predic_devel.append(y_pred_p_devel)

    
    if(current_label=='A_cat'):
        UAR_p_a.append(UAR_p)
        cfs_a.append(cfs_matrix)
        pred_a_train.append(predic_train)
        pred_a_devel.append(predic_devel)
    else:
        UAR_p_v.append(UAR_p)
        cfs_v.append(cfs_matrix)
        pred_v_train.append(predic_train)
        pred_v_devel.append(predic_devel)
    

In [None]:
# Present all A_cat Result

sorted(zip(UAR_p_a[0],feat_conf[1]),reverse=True)

In [None]:
# Present all V_cat Result

sorted(zip(UAR_p_v[0],feat_conf[0]),reverse=True)

In [None]:
# Present Top 3 A_cat Result & Their confusion Matrix

print('A_cat Top 3:')
for x in range(3):
    print(f'Feature: {sorted(zip(UAR_p_a[0],feat_conf[1]),reverse=True)[x][1]}, Score: {sorted(zip(UAR_p_a[0],feat_conf[1]),reverse=True)[x][0]}')
    print('Confusion Matrix:')
    print(classes)
    print(cfs_a[0][list(dict.keys(feat_conf[1])).index(sorted(zip(UAR_p_a[0],feat_conf[1]),reverse=True)[x][1])])

In [None]:
# Present Top 3 V_cat Result & Their confusion Matrix

print('V_cat Top 3:')
for x in range(3):
    print(f'Feature: {sorted(zip(UAR_p_v[0],feat_conf[0]),reverse=True)[x][1]}, Score: {sorted(zip(UAR_p_v[0],feat_conf[0]),reverse=True)[x][0]}')
    print('Confusion Matrix:')
    print(classes)
    print(cfs_a[0][list(dict.keys(feat_conf[0])).index(sorted(zip(UAR_p_v[0],feat_conf[0]),reverse=True)[x][1])])

In [None]:
# Preparing Fusion Features

fu_a_train = np.array(pred_a_train[0])
fu_a_devel = np.array(pred_a_devel[0])
fu_v_train = np.array(pred_a_train[0])
fu_v_devel = np.array(pred_a_devel[0])

In [None]:
# Option for Feature Selection

useful_a=[]
useful_v=[]
thresh = 0.38
top = 3
# best = True
best = False # Use All feature or Top 3
if best==False:
    for i in range(len(sorted(zip(UAR_p_a[0],feat_conf[1]),reverse=True))):
        if sorted(zip(UAR_p_a[0],feat_conf[0]),reverse=True)[i][0]>thresh:
            useful_a.append(sorted(zip(UAR_p_a[0],feat_conf[1]),reverse=True)[i][1])
    for i in range(len(sorted(zip(UAR_p_v[0],feat_conf[1]),reverse=True))):
        if sorted(zip(UAR_p_v[0],feat_conf[0]),reverse=True)[i][0]>thresh:
            useful_v.append(sorted(zip(UAR_p_v[0],feat_conf[1]),reverse=True)[i][1])
else:
    for i in range(top):
        useful_a.append(sorted(zip(UAR_p_a[0],feat_conf[1]),reverse=True)[i][1])
        useful_v.append(sorted(zip(UAR_p_v[0],feat_conf[1]),reverse=True)[i][1])

In [None]:
# Fusion Settings

feat_conf_c_fu = {'ComParE':  (fu_a_train[10],fu_a_devel[10],fu_v_train[10],fu_v_devel[10])}
feat_conf_d_fu = {'DeepSpectrum_resnet50': (fu_a_train[11],fu_a_devel[12],fu_v_train[11],fu_v_devel[11])}

feat_conf_b_fu = {'BoAW-125':   (fu_a_train[5],fu_a_devel[5],fu_v_train[5],fu_v_devel[5]),
               'BoAW-250':     (fu_a_train[6],fu_a_devel[6],fu_v_train[6],fu_v_devel[6]),
               'BoAW-500':     (fu_a_train[7],fu_a_devel[7],fu_v_train[7],fu_v_devel[7]),
               'BoAW-1000':    (fu_a_train[8],fu_a_devel[8],fu_v_train[8],fu_v_devel[8]),
               'BoAW-2000':    (fu_a_train[9],fu_a_devel[9],fu_v_train[9],fu_v_devel[9])}

feat_conf_a_fu ={'auDeep-30': (fu_a_train[0],fu_a_devel[0],fu_v_train[0],fu_v_devel[0]),
              'auDeep-45':    (fu_a_train[1],fu_a_devel[1],fu_v_train[1],fu_v_devel[1]),
              'auDeep-60':    (fu_a_train[2],fu_a_devel[2],fu_v_train[2],fu_v_devel[2]),
              'auDeep-75':    (fu_a_train[3],fu_a_devel[3],fu_v_train[3],fu_v_devel[3]),
              'auDeep-fused': (fu_a_train[4],fu_a_devel[4],fu_v_train[4],fu_v_devel[4])}

feat_conf_f_fu = {f'fisher_vector_{fisher_option[0]}': (fu_a_train[12],fu_a_devel[12],fu_v_train[12],fu_v_devel[12]),
               f'fisher_vector_{fisher_option[1]}': (fu_a_train[13],fu_a_devel[13],fu_v_train[13],fu_v_devel[13]),
               f'fisher_vector_{fisher_option[2]}': (fu_a_train[14],fu_a_devel[14],fu_v_train[14],fu_v_devel[14]),
               f'fisher_vector_{fisher_option[3]}': (fu_a_train[15],fu_a_devel[15],fu_v_train[15],fu_v_devel[15])}

In [None]:
# Preparing Label for Each datasets on each Category

y_train_a = df_labels['A_cat'][df_labels['filename_audio'].str.startswith('train')].values
df_with_stories_train_a= pd.DataFrame(
        data={'filename_audio': df_labels['filename_audio'][df_labels['filename_audio'].str.startswith('train')].values,
          'filename_text': df_labels['filename_text'][df_labels['filename_audio'].str.startswith('train')].values,  # filename_text == ID_Story
          'true': y_train_a.flatten()},columns=['filename_audio', 'filename_text', 'L', 'M','H','true'])
       
story_label_train_a = df_with_stories_train_a.groupby(['filename_text'])['true'].agg(
            lambda x: x.value_counts().sort_index().sort_values(ascending=False, kind='mergesort').index[0]).values
y_train_v = df_labels['V_cat'][df_labels['filename_audio'].str.startswith('train')].values
df_with_stories_train_v= pd.DataFrame(
        data={'filename_audio': df_labels['filename_audio'][df_labels['filename_audio'].str.startswith('train')].values,
          'filename_text': df_labels['filename_text'][df_labels['filename_audio'].str.startswith('train')].values,  # filename_text == ID_Story
          'true': y_train_v.flatten()},columns=['filename_audio', 'filename_text', 'L', 'M','H','true'])
       
story_label_train_v = df_with_stories_train_v.groupby(['filename_text'])['true'].agg(
            lambda x: x.value_counts().sort_index().sort_values(ascending=False, kind='mergesort').index[0]).values

y_devel_a = df_labels['A_cat'][df_labels['filename_audio'].str.startswith('devel')].values
df_with_stories_devel_a= pd.DataFrame(
        data={'filename_audio': df_labels['filename_audio'][df_labels['filename_audio'].str.startswith('devel')].values,
          'filename_text': df_labels['filename_text'][df_labels['filename_audio'].str.startswith('devel')].values,  # filename_text == ID_Story
          'true': y_devel_a.flatten()},columns=['filename_audio', 'filename_text', 'L', 'M','H','true'])
       
story_label_devel_a = df_with_stories_devel_a.groupby(['filename_text'])['true'].agg(
            lambda x: x.value_counts().sort_index().sort_values(ascending=False, kind='mergesort').index[0]).values

y_devel_v = df_labels['V_cat'][df_labels['filename_audio'].str.startswith('devel')].values
df_with_stories_devel_v= pd.DataFrame(
        data={'filename_audio': df_labels['filename_audio'][df_labels['filename_audio'].str.startswith('devel')].values,
          'filename_text': df_labels['filename_text'][df_labels['filename_audio'].str.startswith('devel')].values,  # filename_text == ID_Story
          'true': y_devel_v.flatten()},columns=['filename_audio', 'filename_text', 'L', 'M','H','true'])
       
story_label_devel_v = df_with_stories_devel_v.groupby(['filename_text'])['true'].agg(
            lambda x: x.value_counts().sort_index().sort_values(ascending=False, kind='mergesort').index[0]).values



In [None]:
# Merge Features and Label

X_train_fu_DNN=[X_train_fu_DNN_a,X_train_fu_DNN_v]
X_devel_fu_DNN=[X_devel_fu_DNN_a,X_devel_fu_DNN_v]
story_label_train=[story_label_train_a,story_label_train_v]
story_label_devel=[story_label_devel_a,story_label_devel_v]

In [None]:
# DNN Model Implementation

cat = ['A_cat','V_cat']
UAR_p_DNN = []
cfs_matrix_DNN = []
predic_DNN = []
for l in range(len(cat)):
    X_train_DNN = X_train_fu_DNN[l]
    X_devel_DNN = X_devel_fu_DNN[l]
    y_train_label_DNN = story_label_train[l]
    y_devel_label_DNN = story_label_devel[l]

    y_train_label_DNN_bin = lb.fit_transform(y_train_label_DNN)
    y_devel_label_DNN_bin = lb.transform(y_devel_label_DNN)

    model1 = Sequential()
    model1.add(Dense(64,input_shape=(X_train_DNN.shape[1],)))

    model1.add(Dense(3, activation='softmax'))
    optimizer = optimizers.Adam(learning_rate=0.0001)
    model1.compile(loss='categorical_crossentropy',optimizer=optimizer,metrics=['categorical_accuracy'])
    history = model1.fit(X_train_DNN, y_train_label_DNN_bin, epochs=200, batch_size=25, validation_data=(X_devel_DNN, y_devel_label_DNN_bin), verbose=2,shuffle=False)

    plt.plot(history.history['loss'], label='train')
    plt.plot(history.history['val_loss'], label='test')
    plt.legend()
    plt.show()

    y_pred_p_DNN = model1.predict(X_devel_DNN)


    y_pred_f_DNN = []
    classes_pred=['H','L','M']
    for s in range(len(y_pred_p_DNN)):
        y_pred_f_DNN.extend(classes_pred[np.argmax(y_pred_p_DNN[s])])
    y_devel_p_r = lb.inverse_transform(y_devel_label_DNN_bin)

    score_p_DNN = recall_score(y_devel_p_r, y_pred_f_DNN, labels=classes, average='macro')
    UAR_p_DNN.append(score_p_DNN)
    print(cat[l])
    print(score_p_DNN*100)
    print(classes)
    print(confusion_matrix(y_devel_p_r, y_pred_f_DNN, labels=classes))
    cfs_matrix_DNN.append(confusion_matrix(y_devel_p_r, y_pred_f_DNN, labels=classes))
    predic_DNN.append(y_pred_f_DNN)


In [None]:
# Present DNN result

UAR_p_DNN

In [None]:
# Present Confusion Matrix

cfs_matrix_DNN