In [13]:
import pandas as pd
import os
import librosa
import numpy as np
import scipy
import re


#Data handling
from sklearn.model_selection import train_test_split

#Pickling
from six.moves import cPickle as pickle

# Models 
from sklearn.linear_model import LogisticRegression, Perceptron, SGDClassifier
from sklearn.svm import SVC, LinearSVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier

#Metrics
from sklearn.metrics import recall_score, precision_score, f1_score

#HTTP
import requests
import json

#
import collections

import matplotlib.pyplot as plt
import matplotlib.style as ms
import librosa.display
import IPython.display as ipd
import seaborn
import codecs
ms.use('seaborn-muted')
%matplotlib inline

no_alignment_file = [4764]
wrong_alignment = [3730]

In [14]:
pfief_path = 'Pattern_construction_code/luis_pattern_half/patterns_ignore_5'
# pat_table = pd.read_csv('Pattern_construction_code/luis_pattern_half/patterns_ignore_5',sep='\t')
# pat_table

with codecs.open(pfief_path,'r','utf-8') as content_file:
    content = content_file.read()
len(set(map(lambda x: x.split('\t')[0] ,content.split('\n'))))   

44

# Models

In [15]:
import basic_models

In [21]:
def get_pattern(text):
    text = json.dumps(text)
    url = 'http://192.168.2.101:7878/api/get_patt'
    data = dict(input_tweets = text)
    resp = requests.post(url=url, data=data)
    r = json.loads(resp.text)
    return map(lambda x: x['pattern'],r)
    
def get_deep_emotion(text):
    text = json.dumps(text)
    url = 'http://192.168.2.101:7878/api/get_emo'
    data = dict(input_tweets = text)
    resp = requests.post(url=url, data=data)
    r = json.loads(resp.text)
    return r

def clean_text(text, remove_actions = True):
    punct_str = '!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~«»“…‘”'
    if(remove_actions):
        text = re.sub(r" ?\[[^)]+\]", "", text)
    for p in punct_str:
        text = text.replace(p,' ')
    text = re.sub(' +', ' ', text)
    return text.lower().strip()

def get_f1_score(precision,recall):
    return 2 * (precision * recall) / (precision + recall)

def get_patterns_load(data,patterns_df, emotion):
    index = data[data.emotion == emotion ]['index']
    patterns = patterns_df.loc[list(index)]
    load = []
    for pat in patterns.iterrows():
        load = load + list(pat[1].dropna())
    return load


def extract_patterns(data,extract=False):
    if(extract):
        patterns = {}
        for index, row in data.iterrows():
            patterns[row['index']] = set(get_pattern([row['text']])[0].values())
            print('Extracted pattern from '+ row['index'] + ' index:'+ str(index))
            print('Size: ', len(patterns[row['index']]), 'Patterns size', len(patterns))
        try:
            print('Saving Pickle')
            with open('pickles/patterns/pattern.pickle','wb') as f:
                save = {
                    'patterns' : patterns
                }
                pickle.dump(save,f,pickle.HIGHEST_PROTOCOL)
                print('Successfully saved in pattern.pickle')
                return patterns
        except Exception as e:
            print('Unable to save data to pickle', e)
            print('Patterns probably not saved.')
            return patterns
    else:
        try:
            with open('pickles/patterns/pattern.pickle','rb') as f:
                save = pickle.load(f)
                patterns = save['patterns']
                del save
                returning = {}
                for key in list(data['index']):
                    returning[key] = patterns[key]
                return returning
        except Exception as e:
            print('Error loading base datasets pickle: ', e)
            
def build_emotions_counter(data,patterns_df):
    emotions_counter ={}
    emotions_list = list(data['emotion'].unique())
    for emotion in emotions_list:
        load = get_patterns_load(data,patterns_df,emotion)
        emotions_counter[emotion] = collections.Counter(load)
    return emotions_counter

def build_frequencyframe(all_patterns,emotions_counter):
    df_patt = {}
    for pattern in all_patterns:
        df_patt[pattern] = {}
        for emotion in emotions_counter:
            df_patt[pattern][emotion] = emotions_counter[emotion][pattern]
    return pd.DataFrame(df_patt).T

def build_pfief(df_patt):
    ief = ((df_patt+1).rdiv(df_patt.sum(axis=1)+1, axis=0)+1).apply(np.log10)
    pf = ((df_patt.sum(axis=0)+1)/(df_patt+1)).apply(np.log10)
    return ief * pf

def balance_data(data):
    min_sample = min(data.groupby('emotion').count()['index'])
    emotions_list = list(data['emotion'].unique())
    samples = []
    for emotion in emotions_list:
        samples.append(data[data.emotion == emotion].sample(n=min_sample))
    result = pd.concat(samples).sample(frac=1)
    return result
        
def two_emotions(data,emotional_mapping,emotion1,emotion2):
    emotion_code = emotional_mapping[emotion1]
    emotion_sample = data[data.emotion_code == emotion_code]
    emotion_code2 = emotional_mapping[emotion2]
    emotion_sample2 = data[data.emotion_code == emotion_code2]
    if(len(emotion_sample2) < len(emotion_sample)):
        emotion_sample = emotion_sample.sample(n=len(emotion_sample2))
    else:
        emotion_sample2 = emotion_sample2.sample(n=len(emotion_sample))
    sample = pd.concat([emotion_sample,emotion_sample2]).sample(frac=1)
    return sample

def filter_word_count(data, n_count):
    return data[list(map(lambda x: len(x.split(' ')) >= n_count,data['text']))]

def remove_empty_patterns(data,patterns):
    empty_patterns = [k for k, v in patterns.items() if len(v) < 1]
    patterns = { k:v for k, v in patterns.items() if len(v) >= 1 }
    data = filter(lambda x: x[1]['index'] not in empty_patterns ,data.iterrows())
    data = pd.DataFrame.from_items(data).T
    return data,patterns


def remove_multiwildcard(patterns):
    for index, patt in patterns.items():
        flt_patt = {p for p in patt if p.split(' ').count('.+') == 1}
        patterns[index] = flt_patt
    return patterns


In [22]:
def load_data(word_count,emotional_mapping):
    # full = generate_IEMOCAP_df()
    data = pd.read_csv('data/IEMOCAP_sentences_votebased.csv',index_col=0)
    data['emotion_code'] = data['emotion'].map( emotional_mapping ).astype(int)
    # Take away fear, surprise,disgust, xxx and others. Not enough data
    data = data[data.emotion_code < 4]
    # Clean Transcripts
    
    try:
        data = data.drop(no_alignment_file)
    except Exception as e:
        print('Error at: ',e)
    # Remove rows that have wrong Alignment file
    try:
        data = data.drop(wrong_alignment)
    except Exception as e:
        print('Error at: ',e)
    data['text'] = data['text'].apply(clean_text)
    # Filter Word Count
    data = filter_word_count(data, word_count)
    patterns = extract_patterns(data)
    data,patterns = remove_empty_patterns(data,patterns)
    return data,patterns

def build_model(data,patterns):
    transcript_order = list(data['index'])
    patterns_df = pd.DataFrame.from_dict(patterns, orient='index')
    patterns_df = patterns_df.loc[transcript_order]
    emotions_counter = build_emotions_counter(X_train,patterns_df)
    all_patterns = []
    for pat in patterns_df.iterrows():
        all_patterns = all_patterns + list(pat[1].dropna())
        
    df_patt = build_frequencyframe(all_patterns,emotions_counter)
    em_df = build_pfief(df_patt)
    return em_df

def get_frequency_vectors(data,patterns_list):
    patterns = extract_patterns(data)
    transcript_order = list(data['index'])
    frequency_vectors = []
    for index in patterns:
        frequency_vectors.append(np.isin(patterns_list,np.array(list(patterns[index]))))
    vectors = pd.DataFrame(frequency_vectors,columns=patterns_list,index=patterns.keys())
    vectors = vectors.loc[transcript_order]
    vectors = vectors * 1
    return vectors
    
def calculate_scores(em_df,vectors):
    em_matrix = em_df.T.as_matrix()
    emotional_scores = []
    for index, vector in vectors.iterrows():
        emotional_scores.append(em_matrix.dot(vector))
    emotions_list = list(em_df.columns)
    scores = pd.DataFrame(emotional_scores,columns=emotions_list,index=list(vectors.index))
    scores['pred_emotion'] = list(map(lambda x: x[1].idxmin(),scores.iterrows()))
    scores['pred_code'] = scores['pred_emotion'].map(emotional_mapping).astype(int)
    return scores

In [23]:
emotional_mapping = {'ang': 0, 'sad': 1, 'hap': 2, 'neu': 3,'fru': 4,'exc': 5,'fea': 6,'sur': 7,'dis': 8, 'xxx':9,'oth':10}
data,patterns = load_data(3,emotional_mapping)
# data = two_emotions(data,emotional_mapping,'sad','exc')
# Balance Data
# data = balance_data(data)
y = data.emotion_code
# X_train, X_test, y_train, y_test = train_test_split(data, y, test_size=0.2)

# X_train.groupby('emotion').count()['index'] #  6,453 Total
# X_test.groupby('emotion').count()['index']
try:
    with open('pickles/matrix_basedata.pickle','rb') as f:
        save = pickle.load(f)
        X_train = save['X_train']
        X_test = save['X_test']
        y_train = save['y_train']
        y_test = save['y_test']
        del save
except Exception as e:
    print('Error loading base datasets pickle: ', e)

Error at:  labels [4764] not contained in axis


In [24]:
data.groupby('emotion').count()['index']

emotion
ang    1141
hap     680
neu    1440
sad     947
Name: index, dtype: int64

In [10]:
# try:
#     print('Saving Pickle')
#     with open('pickles/matrix_basedata.pickle','wb') as f:
#         save = {
#             'X_train' : X_train,
#             'X_test' : X_test,
#             'y_train' : y_train,
#             'y_test': y_test
#         }
#         pickle.dump(save,f,pickle.HIGHEST_PROTOCOL)
#         print('Successfully saved in matrix_basedata.pickle')
# except Exception as e:
#     print('Unable to save data to pickle', e)


In [25]:
em_df = build_model(X_train,patterns)
patterns_list = np.array(list(em_df.index))

In [27]:
print(len(em_df))
# em_df.head()

5953


# Score - Training Data

In [58]:
vectors = get_frequency_vectors(X_train,patterns_list)

In [59]:
scores = calculate_scores(em_df,vectors)
pred_y = list(scores['pred_code'])

In [60]:
# pred_y, y_train
precision = precision_score(list(y_train),pred_y,average='macro')
recall = recall_score(list(y_train),pred_y,average='macro')
f1 = get_f1_score(precision,recall)
print('Precision Macro',precision)
print('Recall Macro',recall)
print('F1 Macro',f1)
print(' ')
precision = precision_score(list(y_train),pred_y,average='micro')
recall = recall_score(list(y_train),pred_y,average='micro')
f1 = get_f1_score(precision,recall)
print('Precision Micro',precision)
print('Recall Micro',recall)
print('F1 Micro',f1)
print(' ')
precision = precision_score(list(y_train),pred_y,average='weighted')
recall = recall_score(list(y_train),pred_y,average='weighted')
f1 = get_f1_score(precision,recall)
print('Precision Weighted',precision)
print('Recall Weighted',recall)
print('F1 Weighted',f1)

Precision Macro 0.75391121567249
Recall Macro 0.6167756377209378
F1 Macro 0.6784833015362706
 
Precision Micro 0.6838978015448604
Recall Micro 0.6838978015448604
F1 Micro 0.6838978015448604
 
Precision Weighted 0.7256284813320032
Recall Weighted 0.6838978015448604
F1 Weighted 0.7041453985638744


#  Testing

In [352]:
vectors = get_frequency_vectors(X_test,patterns_list)

In [353]:
scores = calculate_scores(em_df,vectors)
pred_y = list(scores['pred_code'])

In [354]:
precision = precision_score(list(y_test),pred_y,average='macro')
recall = recall_score(list(y_test),pred_y,average='macro')
f1 = get_f1_score(precision,recall)
print('Precision Macro',precision)
print('Recall Macro',recall)
print('F1 Macro',f1)
print(' ')
precision = precision_score(list(y_test),pred_y,average='micro')
recall = recall_score(list(y_test),pred_y,average='micro')
f1 = get_f1_score(precision,recall)
print('Precision Micro',precision)
print('Recall Micro',recall)
print('F1 Micro',f1)
print(' ')
precision = precision_score(list(y_test),pred_y,average='weighted')
recall = recall_score(list(y_test),pred_y,average='weighted')
f1 = get_f1_score(precision,recall)
print('Precision Weighted',precision)
print('Recall Weighted',recall)
print('F1 Weighted',f1)

Precision Macro 0.6599539131684076
Recall Macro 0.5276922251210888
F1 Macro 0.5864584368855466
 
Precision Micro 0.5831353919239906
Recall Micro 0.5831353919239906
F1 Micro 0.5831353919239906
 
Precision Weighted 0.633415830621282
Recall Weighted 0.5831353919239906
F1 Weighted 0.6072365582230232


# Without multiple wild-card patterns

In [28]:
def remove_multiwildcard(patterns):
    for index, patt in patterns.items():
        flt_patt = {p for p in patt if p.split(' ').count('.+') == 1}
        patterns[index] = flt_patt
    return patterns


patterns = remove_multiwildcard(patterns)
# data = two_emotions(data,emotional_mapping,'sad','exc')
# Balance Data
# data = balance_data(data)
# y = data.emotion_code
# X_train, X_test, y_train, y_test = train_test_split(data, y, test_size=0.3)

# data.groupby('emotion').count()['index'] #  6,453 Total

In [None]:
em_df = build_model(X_train,patterns)
patterns_list = np.array(list(em_df.index))

In [70]:
print(len(em_df))
em_df.head()
# em_df.to_pickle('pickles/patterns/pfief_matrix.pickle')

5648


Unnamed: 0,ang,hap,neu,sad
.+ a,1.392265,1.770622,1.215614,1.678883
.+ a big,1.786332,3.8863,1.525532,4.065756
.+ a bit,1.609671,2.451982,1.62127,2.565206
.+ a day,1.489879,3.441792,2.452887,2.383968
.+ a dog,2.073588,1.94315,1.226444,2.032878


# Score - Training Data

In [358]:
vectors = get_frequency_vectors(X_train,patterns_list)
scores = calculate_scores(em_df,vectors)
pred_y = list(scores['pred_code'])

In [359]:
# pred_y, y_train
# precision = precision_score(list(y_train),pred_y,average='macro')
# recall = recall_score(list(y_train),pred_y,average='macro')
# f1 = get_f1_score(precision,recall)
# print('Precision Macro',precision)
# print('Recall Macro',recall)
# print('F1 Macro',f1)
# print(' ')
# precision = precision_score(list(y_train),pred_y,average='micro')
# recall = recall_score(list(y_train),pred_y,average='micro')
# f1 = get_f1_score(precision,recall)
# print('Precision Micro',precision)
# print('Recall Micro',recall)
# print('F1 Micro',f1)
# print(' ')
precision = precision_score(list(y_train),pred_y,average='weighted')
recall = recall_score(list(y_train),pred_y,average='weighted')
f1 = get_f1_score(precision,recall)
print('Precision Weighted',precision)
print('Recall Weighted',recall)
print('F1 Weighted',f1)


# Precision Macro 0.7664926811354967
# Recall Macro 0.6423609709732143
# F1 Macro 0.6989583086378197
 
# Precision Micro 0.6947368421052632
# Recall Micro 0.6947368421052632
# F1 Micro 0.6947368421052632
 
# Precision Weighted 0.7386741269995074
# Recall Weighted 0.6947368421052632
# F1 Weighted 0.7160320960247799

Precision Weighted 0.752504104064495
Recall Weighted 0.7225806451612903
F1 Weighted 0.7372388620882049


#  Testing

In [360]:
vectors = get_frequency_vectors(X_test,patterns_list)

In [361]:
scores = calculate_scores(em_df,vectors)
pred_y = list(scores['pred_code'])

In [362]:
# precision = precision_score(list(y_test),pred_y,average='macro')
# recall = recall_score(list(y_test),pred_y,average='macro')
# f1 = get_f1_score(precision,recall)
# print('Precision Macro',precision)
# print('Recall Macro',recall)
# print('F1 Macro',f1)
# print(' ')
# precision = precision_score(list(y_test),pred_y,average='micro')
# recall = recall_score(list(y_test),pred_y,average='micro')
# f1 = get_f1_score(precision,recall)
# print('Precision Micro',precision)
# print('Recall Micro',recall)
# print('F1 Micro',f1)
# print(' ')
precision = precision_score(list(y_test),pred_y,average='weighted')
recall = recall_score(list(y_test),pred_y,average='weighted')
f1 = get_f1_score(precision,recall)
print('Precision Weighted',precision)
print('Recall Weighted',recall)
print('F1 Weighted',f1)


# Precision Macro 0.6295216882336137
# Recall Macro 0.5249158266314806
# F1 Macro 0.5724794856483989
 
# Precision Micro 0.5874901029295329
# Recall Micro 0.5874901029295329
# F1 Micro 0.5874901029295329
 
# Precision Weighted 0.6153167839850325
# Recall Weighted 0.5874901029295329
# F1 Weighted 0.6010815612885869

Precision Weighted 0.6081336974610542
Recall Weighted 0.5843230403800475
F1 Weighted 0.5959906465057085


# Acoustic Weights

In [376]:
try:
    with open('pickles/patterns/scaledmfcc20_pattern_features4emo.pickle','rb') as f:
        save = pickle.load(f)
        full_feature_table = save['full_feature_table']
        wc_feature_table = save['wc_feature_table']
        cw_feature_table = save['cw_feature_table']
        del save
except Exception as e:
    print('Error loading pattern features pickle: ', e)
    

##############################MATRIX OPERATIONS###############################################
def calculate_final_matrix(em_df,matrix):
    final = []
    for val in em_df.iterrows():
        final.append(val[1] * matrix.loc[val[0]])
    return pd.DataFrame(final)

def calculate_final_matrix_mul(em_df,matrix):
    final = []
    for val in em_df.iterrows():
        final.append(val[1] + (val[1] * matrix.loc[val[0]]))
    return pd.DataFrame(final)

def calculate_final_matrix_sum(em_df,matrix):
    final = []
    for val in em_df.iterrows():
        final.append(val[1] + matrix.loc[val[0]])
    return pd.DataFrame(final)

##############################MULTI MATRIX OPERATIONS###############################################
def calculate_final_multimatrix(em_df,multimatrix):
    final = []
    mmatrix_size = len(multimatrix)
    for val in em_df.iterrows():
        temp_val = val[1]
        for i in range(mmatrix_size):
            temp_val = temp_val * multimatrix[i].loc[val[0]]
        final.append(temp_val)
    return pd.DataFrame(final)

def calculate_final_multimatrix_mul(em_df,multimatrix):
    final = []
    mmatrix_size = len(multimatrix)
    for val in em_df.iterrows():
        temp_val = val[1]
        for i in range(mmatrix_size):
            temp_val = temp_val ( 1 + multimatrix[i].loc[val[0]])
        final.append(temp_val)
    return pd.DataFrame(final)

def calculate_final_multimatrix_sum(em_df,multimatrix):
    final = []
    mmatrix_size = len(multimatrix)
    for val in em_df.iterrows():
        temp_val = val[1]
        for i in range(mmatrix_size):
            temp_val = temp_val + multimatrix[i].loc[val[0]]
        final.append(temp_val)
    return pd.DataFrame(final)


################################### MATRIX BUILD OPERATIONS ######################################################

def build_acumatrix(data,feature_table,saveToPickle = False, savePath = ''):
    matrix = {}
    emotions_list = list(data['emotion'].unique())
    for index, row in data.iterrows():
        emo = row.emotion
        key = row['index']
        patts = feature_table[key].keys()
        for patt in patts:
            tpatt = patt.split('_')[1]
            if(tpatt not in matrix):
                matrix[tpatt] = {}
            if(emo not in matrix[tpatt]):
                matrix[tpatt][emo] = []
            matrix[tpatt][emo].append(feature_table[key][patt])
    for val in matrix:
        for emo in matrix[val].keys():
            matrix[val][emo] = np.mean(matrix[val][emo])
    matrix = pd.DataFrame(matrix).T
    if(saveToPickle and savePath != ''):
        matrix.to_pickle(savePath)
    return matrix

def build_multiacumatrix(data,feature_table,saveToPickle = False, savePath = '',size = 20):
    multimatrix = []
    count = 0
    for i in range(size):
        multimatrix.append(dict())
    emotions_list = list(data['emotion'].unique())
    for index, row in data.iterrows():
        emo = row.emotion
        key = row['index']
        patts = feature_table[key].keys()
        for patt in patts:
            tpatt = patt.split('_')[1]
            if(tpatt not in multimatrix[0]):
                for matrix in multimatrix:
                    matrix[tpatt] = {}
                    for emotion in emotions_list:
                        matrix[tpatt][emotion] = []
            for i in range(size):
                sub_feature = feature_table[key][patt][i]
                multimatrix[i][tpatt][emo].append(sub_feature)
    for i in range(size):
        for val in multimatrix[i]:
            for emo in multimatrix[i][val].keys():
                if(len(multimatrix[i][val][emo]) > 0):
                    multimatrix[i][val][emo] = np.mean(multimatrix[i][val][emo])
                else:
                    multimatrix[i][val][emo] = np.nan
        multimatrix[i] = pd.DataFrame(multimatrix[i]).T
    if(saveToPickle and savePath != ''):
        try:
            print('Saving Pickle')
            with open(savePath,'wb') as f:
                save = {
                    'multimatrix' : multimatrix
                }
                pickle.dump(save,f,pickle.HIGHEST_PROTOCOL)
                print('Successfully saved matrix to '+savePath)
        except Exception as e:
            print('Unable to save data to pickle', e)
            print('Patterns probably not saved.')
    return multimatrix

############################## LOAD MATRICES ###############################################

def get_acumatrix(filePath):
    try:
        matrix = pd.read_pickle(filePath)
        return matrix
    except Exception as e:
        print('Error loading matrix: ', e)

def get_multiacumatrix(filePath):
    try:
        with open(filePath,'rb') as f:
            save = pickle.load(f)
            multimatrix = save['multimatrix']
            del save
            return multimatrix
    except Exception as e:
        print('Error loading matrix: ', e)

In [377]:
# full_matrix = build_acumatrix(X_train,full_feature_table,saveToPickle=True,savePath = 'pickles/patterns/full_rmse_matrix_fd.pickle')
# full_matrix = full_matrix.fillna(np.max(full_matrix))

# wc_matrix = build_acumatrix(X_train,wc_feature_table,saveToPickle=True,savePath = 'pickles/patterns/wc_rmse_matrix_fd.pickle')
# wc_matrix = wc_matrix.fillna(np.max(wc_matrix))

# cw_matrix = build_acumatrix(X_train,cw_feature_table,saveToPickle=True,savePath = 'pickles/patterns/cw_rmse_matrix_fd.pickle')
# cw_matrix = cw_matrix.fillna(np.max(cw_matrix))

#FOR MFCC 20 
MATRIX_SIZE = 20
savePath = 'pickles/patterns/full_mfcc20_matrix_fd.pickle'
# full_matrices = get_multiacumatrix(savePath)
full_matrices = build_multiacumatrix(data,full_feature_table,size = MATRIX_SIZE,saveToPickle=True,savePath=savePath)
for i in range(MATRIX_SIZE):
    full_matrices[i] = full_matrices[i].fillna(np.max(full_matrices[i]))
    
savePath = 'pickles/patterns/wc_mfcc20_matrix_fd.pickle'
# wc_matrices = get_multiacumatrix(savePath)
wc_matrices = build_multiacumatrix(data,wc_feature_table,size = MATRIX_SIZE,saveToPickle=True,savePath=savePath)
for i in range(MATRIX_SIZE):
    wc_matrices[i] = wc_matrices[i].fillna(np.max(wc_matrices[i]))

savePath = 'pickles/patterns/cw_mfcc20_matrix_fd.pickle'
# cw_matrices = get_multiacumatrix(savePath)
cw_matrices = build_multiacumatrix(data,cw_feature_table,size = MATRIX_SIZE,saveToPickle=True,savePath=savePath)
for i in range(MATRIX_SIZE):
    cw_matrices[i] = cw_matrices[i].fillna(np.max(cw_matrices[i]))


Saving Pickle
Successfully saved matrix to pickles/patterns/full_mfcc20_matrix_fd.pickle
Saving Pickle
Successfully saved matrix to pickles/patterns/wc_mfcc20_matrix_fd.pickle
Saving Pickle
Successfully saved matrix to pickles/patterns/cw_mfcc20_matrix_fd.pickle


In [334]:
# MFCC 1 
full_matrix = full_matrices[0]
wc_matrix = wc_matrices[0]
cw_matrix = cw_matrices[0]

In [335]:
summatrix = calculate_final_matrix_sum(em_df,full_matrix)
mulmatrix = calculate_final_matrix(em_df,full_matrix)
mul2matrix = calculate_final_matrix_mul(em_df,full_matrix)
em_df.head()

########### FOR MFCC 20 ################
# summatrix = calculate_final_multimatrix_sum(em_df,full_matrices)
# mulmatrix = calculate_final_multimatrix(em_df,full_matrices)
# mul2matrix = calculate_final_multimatrix_mul(em_df,full_matrices)
# em_df.head()

Unnamed: 0,ang,hap,neu,sad
.+ a,1.335632,1.773339,1.233435,1.653975
.+ a big,1.514692,3.875035,2.202922,2.782855
.+ a bit,1.21858,1.937517,2.090332,2.043222
.+ a day,1.387807,3.667312,2.665174,2.600678
.+ a dog,2.075031,1.937517,1.228233,2.043222


In [336]:
mulmatrix.head()

Unnamed: 0,ang,hap,neu,sad
.+ a,0.65208,0.680055,0.435924,0.479885
.+ a big,0.648942,0.0,0.765781,0.806431
.+ a bit,0.28783,0.0,0.0,0.0
.+ a day,0.738959,0.0,0.613493,1.219806
.+ a dog,0.0,0.0,0.420985,0.0


In [337]:
# print(summatrix.shape,mulmatrix.shape,em_df.shape,full_matrix.shape,cw_matrix.shape,wc_matrix.shape)

# Full Pattern Feature

# Train

In [338]:
vectors = get_frequency_vectors(X_train,patterns_list)
scores = calculate_scores(mulmatrix,vectors)
pred_y = list(scores['pred_code'])

In [306]:
precision = precision_score(list(y_train),pred_y,average='weighted')
recall = recall_score(list(y_train),pred_y,average='weighted')
f1 = get_f1_score(precision,recall)
print(precision)
print(recall)
print(f1)

0.3187676633676476
0.30220713073005095
0.3102665738819127


In [307]:
vectors = get_frequency_vectors(X_train,patterns_list)
scores = calculate_scores(summatrix,vectors)
pred_y = list(scores['pred_code'])

precision = precision_score(list(y_train),pred_y,average='weighted')
recall = recall_score(list(y_train),pred_y,average='weighted')
f1 = get_f1_score(precision,recall)
print(precision)
print(recall)
print(f1)

0.7448454113066184
0.702546689303905
0.7230779794030133


In [308]:
vectors = get_frequency_vectors(X_train,patterns_list)
scores = calculate_scores(mul2matrix,vectors)
pred_y = list(scores['pred_code'])

precision = precision_score(list(y_train),pred_y,average='weighted')
recall = recall_score(list(y_train),pred_y,average='weighted')
f1 = get_f1_score(precision,recall)
print(precision)
print(recall)
print(f1)

0.7222409677772846
0.669269949066214
0.6947472274444265


## Testing

In [309]:
vectors = get_frequency_vectors(X_test,patterns_list)
scores = calculate_scores(mulmatrix,vectors)
pred_y = list(scores['pred_code'])

In [310]:
precision = precision_score(list(y_test),pred_y,average='weighted')
recall = recall_score(list(y_test),pred_y,average='weighted')
f1 = get_f1_score(precision,recall)
print(precision)
print(recall)
print(f1)

0.28410839933825766
0.2921615201900237
0.2880786903379087


In [311]:
vectors = get_frequency_vectors(X_test,patterns_list)
scores = calculate_scores(mul2matrix,vectors)
pred_y = list(scores['pred_code'])
precision = precision_score(list(y_test),pred_y,average='weighted')
recall = recall_score(list(y_test),pred_y,average='weighted')
f1 = get_f1_score(precision,recall)
print(precision)
print(recall)
print(f1)

0.5986706435045359
0.5629453681710214
0.5802586438778765


In [312]:
vectors = get_frequency_vectors(X_test,patterns_list)
scores = calculate_scores(summatrix,vectors)
pred_y = list(scores['pred_code'])
precision = precision_score(list(y_test),pred_y,average='weighted')
recall = recall_score(list(y_test),pred_y,average='weighted')
f1 = get_f1_score(precision,recall)
print(precision)
print(recall)
print(f1)

0.6113322892569504
0.5748218527315915
0.5925151659567698


# WildCard Weights

## Train

In [313]:
summatrix = calculate_final_matrix_sum(em_df,wc_matrix)
mulmatrix = calculate_final_matrix(em_df,wc_matrix)
mul2matrix = calculate_final_matrix_mul(em_df,wc_matrix)
em_df.head()

# mulmatrix = em_df + matrix

######## FOR MFCC 20 #########
# summatrix = calculate_final_multimatrix_sum(em_df,wc_matrices)
# mulmatrix = calculate_final_multimatrix(em_df,wc_matrices)
# mul2matrix = calculate_final_multimatrix_mul(em_df,wc_matrices)
# em_df.head()

Unnamed: 0,ang,hap,neu,sad
.+ a,1.335632,1.773339,1.233435,1.653975
.+ a big,1.514692,3.875035,2.202922,2.782855
.+ a bit,1.21858,1.937517,2.090332,2.043222
.+ a day,1.387807,3.667312,2.665174,2.600678
.+ a dog,2.075031,1.937517,1.228233,2.043222


In [314]:
vectors = get_frequency_vectors(X_train,patterns_list)
scores = calculate_scores(summatrix,vectors)
pred_y = list(scores['pred_code'])

precision = precision_score(list(y_train),pred_y,average='weighted')
recall = recall_score(list(y_train),pred_y,average='weighted')
f1 = get_f1_score(precision,recall)
print(precision)
print(recall)
print(f1)

0.7415662562860448
0.700169779286927
0.7202737105535195


In [315]:
vectors = get_frequency_vectors(X_train,patterns_list)
scores = calculate_scores(mulmatrix,vectors)
pred_y = list(scores['pred_code'])

precision = precision_score(list(y_train),pred_y,average='weighted')
recall = recall_score(list(y_train),pred_y,average='weighted')
f1 = get_f1_score(precision,recall)
print(precision)
print(recall)
print(f1)

0.3466977506030511
0.31171477079796267
0.3282768973938682


In [316]:
vectors = get_frequency_vectors(X_train,patterns_list)
scores = calculate_scores(mul2matrix,vectors)
pred_y = list(scores['pred_code'])

precision = precision_score(list(y_train),pred_y,average='weighted')
recall = recall_score(list(y_train),pred_y,average='weighted')
f1 = get_f1_score(precision,recall)
print(precision)
print(recall)
print(f1)

0.7205521557316455
0.6706281833616299
0.694694382369676


## Test

In [317]:
vectors = get_frequency_vectors(X_test,patterns_list)
scores = calculate_scores(summatrix,vectors)
pred_y = list(scores['pred_code'])

precision = precision_score(list(y_test),pred_y,average='weighted')
recall = recall_score(list(y_test),pred_y,average='weighted')
f1 = get_f1_score(precision,recall)
print(precision)
print(recall)
print(f1)

0.6146234821838491
0.5748218527315915
0.594056739541341


In [318]:
vectors = get_frequency_vectors(X_test,patterns_list)
scores = calculate_scores(mulmatrix,vectors)
pred_y = list(scores['pred_code'])

precision = precision_score(list(y_test),pred_y,average='weighted')
recall = recall_score(list(y_test),pred_y,average='weighted')
f1 = get_f1_score(precision,recall)
print(precision)
print(recall)
print(f1)

0.3049228908295152
0.3008709422011085
0.3028833654631287


In [319]:
vectors = get_frequency_vectors(X_test,patterns_list)
scores = calculate_scores(mul2matrix,vectors)
pred_y = list(scores['pred_code'])

precision = precision_score(list(y_test),pred_y,average='weighted')
recall = recall_score(list(y_test),pred_y,average='weighted')
f1 = get_f1_score(precision,recall)
print(precision)
print(recall)
print(f1)

0.6072264621141198
0.564528899445764
0.5850997531008266


# ContentWord Weights

In [320]:
summatrix = calculate_final_matrix_sum(em_df,cw_matrix)
mulmatrix = calculate_final_matrix(em_df,cw_matrix)
mul2matrix = calculate_final_matrix_mul(em_df,cw_matrix)
em_df.head()

############ FOR MFCC 20 ############
# summatrix = calculate_final_multimatrix_sum(em_df,cw_matrices)
# mulmatrix = calculate_final_multimatrix(em_df,cw_matrices)
# mul2matrix = calculate_final_multimatrix_mul(em_df,cw_matrices)


Unnamed: 0,ang,hap,neu,sad
.+ a,1.335632,1.773339,1.233435,1.653975
.+ a big,1.514692,3.875035,2.202922,2.782855
.+ a bit,1.21858,1.937517,2.090332,2.043222
.+ a day,1.387807,3.667312,2.665174,2.600678
.+ a dog,2.075031,1.937517,1.228233,2.043222


## Train

In [321]:
vectors = get_frequency_vectors(X_train,patterns_list)
scores = calculate_scores(summatrix,vectors)
pred_y = list(scores['pred_code'])

precision = precision_score(list(y_train),pred_y,average='weighted')
recall = recall_score(list(y_train),pred_y,average='weighted')
f1 = get_f1_score(precision,recall)
print(precision)
print(recall)
print(f1)

0.7420164285845253
0.7011884550084889
0.7210249342488


In [322]:
vectors = get_frequency_vectors(X_train,patterns_list)
scores = calculate_scores(mulmatrix,vectors)
pred_y = list(scores['pred_code'])

precision = precision_score(list(y_train),pred_y,average='weighted')
recall = recall_score(list(y_train),pred_y,average='weighted')
f1 = get_f1_score(precision,recall)
print(precision)
print(recall)
print(f1)

0.3257189908340611
0.31069609507640067
0.3180302314788517


In [323]:
vectors = get_frequency_vectors(X_train,patterns_list)
scores = calculate_scores(mul2matrix,vectors)
pred_y = list(scores['pred_code'])

precision = precision_score(list(y_train),pred_y,average='weighted')
recall = recall_score(list(y_train),pred_y,average='weighted')
f1 = get_f1_score(precision,recall)
print(precision)
print(recall)
print(f1)

0.7229990440256301
0.6730050933786078
0.6971068725367464


## Test

In [324]:
vectors = get_frequency_vectors(X_test,patterns_list)
scores = calculate_scores(summatrix,vectors)
pred_y = list(scores['pred_code'])

precision = precision_score(list(y_test),pred_y,average='weighted')
recall = recall_score(list(y_test),pred_y,average='weighted')
f1 = get_f1_score(precision,recall)
print(precision)
print(recall)
print(f1)

0.6152201537413479
0.5756136183689627
0.5947582392813809


In [325]:
vectors = get_frequency_vectors(X_test,patterns_list)
scores = calculate_scores(mulmatrix,vectors)
pred_y = list(scores['pred_code'])

precision = precision_score(list(y_test),pred_y,average='weighted')
recall = recall_score(list(y_test),pred_y,average='weighted')
f1 = get_f1_score(precision,recall)
print(precision)
print(recall)
print(f1)

0.27840602671199105
0.2921615201900237
0.285117961706323


In [326]:
vectors = get_frequency_vectors(X_test,patterns_list)
scores = calculate_scores(mul2matrix,vectors)
pred_y = list(scores['pred_code'])

precision = precision_score(list(y_test),pred_y,average='weighted')
recall = recall_score(list(y_test),pred_y,average='weighted')
f1 = get_f1_score(precision,recall)
print(precision)
print(recall)
print(f1)

0.5972116221812408
0.5613618368962787
0.5787320788625733
