In [4]:
import pandas as pd
import os
import librosa
import numpy as np
import scipy
import re


#Data handling
from sklearn.model_selection import train_test_split

#Pickling
from six.moves import cPickle as pickle

# Models 
from sklearn.linear_model import LogisticRegression, Perceptron, SGDClassifier
from sklearn.svm import SVC, LinearSVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier

#Metrics
from sklearn.metrics import recall_score, precision_score, f1_score

#HTTP
import requests
import json

#
import collections

import matplotlib.pyplot as plt
import matplotlib.style as ms
import librosa.display
import IPython.display as ipd
import seaborn
ms.use('seaborn-muted')
%matplotlib inline


# Models

In [5]:
def log_reg(x_train,y_train,x_test,y_test, delete = True):
    logreg = LogisticRegression()
    logreg.fit(x_train, y_train)
    Y_pred = logreg.predict(x_test)
    acc_log = round(logreg.score(x_test, y_test) * 100, 2)
    precision = precision_score(y_test, Y_pred)
    recall = recall_score(y_test,Y_pred)
    if(delete):
        del logreg
        return Y_pred, acc_log, precision, recall
    return logreg,Y_pred,acc_log, precision, recall

# _, acc_log = log_reg(x_train,ang_train, x_test, ang_test)
# print(acc_log)

def svc(x_train,y_train,x_test,y_test, delete = True):
    svc = SVC()
    svc.fit(x_train,y_train)
    Y_svcpred = svc.predict(x_test)
    acc_svc = round(svc.score(x_test, y_test) * 100,2)
    precision = precision_score(y_test, Y_svcpred)
    recall = recall_score(y_test,Y_svcpred)
    if(delete):
        del svc 
        return Y_svcpred, acc_svc,precision, recall
    return svc, Y_svcpred, acc_svc,precision,recall

# _, acc_svc = svc(x_train,ang_train,x_test,ang_test)
# print(acc_svc)

def knn(x_train,y_train,x_test,y_test,n_neighbors = 3,delete = True):
    knn = KNeighborsClassifier(n_neighbors = n_neighbors)
    knn.fit(x_train, y_train)
    Y_knnpred = knn.predict(x_test)
    acc_knn = round(knn.score(x_test, y_test) * 100, 2)
    precision = precision_score(y_test, Y_knnpred)
    recall = recall_score(y_test,Y_knnpred)
    if(delete):
        del knn
        return Y_knnpred, acc_knn,precision,recall
    return knn, Y_knnpred, acc_knn, precision,recall
# _,acc_knn = knn(x_train,ang_train,x_test,ang_test)
# print(acc_knn)

def gaussian(x_train,y_train,x_test,y_test,delete = True):
    gaussian = GaussianNB()
    gaussian.fit(x_train, y_train)
    Y_gaussianpred = gaussian.predict(x_test)
    acc_gaussian = round(gaussian.score(x_test, y_test) * 100, 2)
    precision = precision_score(y_test, Y_gaussianpred)
    recall = recall_score(y_test,Y_gaussianpred)
    if(delete):
        del gaussian
        return Y_gaussianpred, acc_gaussian, precision,recall
    return gaussian, Y_gaussianpred, acc_gaussian,precision,recall

# _, acc_gaussian = gaussian(x_train,ang_train,x_test,ang_test)
# print(acc_gaussian)

def perceptron(x_train,y_train,x_test,y_test,delete = True):
    perceptron = Perceptron()
    perceptron.fit(x_train, y_train)
    Y_perceptronpred = perceptron.predict(x_test)
    acc_perceptron = round(perceptron.score(x_test, y_test) * 100, 2)
    precision = precision_score(y_test, Y_perceptronpred)
    recall = recall_score(y_test,Y_perceptronpred)
    if(delete):
        del perceptron
        return Y_perceptronpred, acc_perceptron,precision,recall
    return perceptron, Y_perceptronpred ,acc_perceptron,precision,recall
# _, acc_perceptron = perceptron(x_train,ang_train,x_test,ang_test)
# print(acc_perceptron)

def linear_svc(x_train,y_train,x_test,y_test,delete = True):
    linear_svc = LinearSVC()
    linear_svc.fit(x_train, y_train)
    Y_linearsvcpred = linear_svc.predict(x_test)
    acc_linear_svc = round(linear_svc.score(x_test, y_test) * 100, 2)
    precision = precision_score(y_test, Y_linearsvcpred)
    recall = recall_score(y_test,Y_linearsvcpred)
    if(delete):
        del linear_svc
        return Y_linearsvcpred, acc_linear_svc,precision,recall
    return linear_svc, Y_linearsvcpred, acc_linear_svc,precision,recall
# _, acc_linear_svc = linear_svc(x_train,ang_train,x_test,ang_test)
# print(acc_linear_svc)

def sgd(x_train,y_train,x_test,y_test,delete = True):
    sgd = SGDClassifier()
    sgd.fit(x_train, y_train)
    Y_linearsgdpred = sgd.predict(x_test)
    acc_sgd = round(sgd.score(x_test, y_test) * 100, 2)
    precision = precision_score(y_test, Y_linearsgdpred)
    recall = recall_score(y_test,Y_linearsgdpred)
    if(delete):
        del sgd
        return Y_linearsgdpred , acc_sgd,precision,recall
    return sgd, Y_linearsgdpred, acc_sgd,precision,recall
# _, acc_sgd = sgd(x_train,ang_train,x_test,ang_test)
# print(acc_sgd)

def decision_tree(x_train,y_train,x_test,y_test,delete = True):
    decision_tree = DecisionTreeClassifier()
    decision_tree.fit(x_train, y_train)
    Y_dectreepred = decision_tree.predict(x_test)
    acc_decision_tree = round(decision_tree.score(x_test, y_test) * 100, 2)
    precision = precision_score(y_test, Y_dectreepred)
    recall = recall_score(y_test,Y_dectreepred)
    if(delete):
        del decision_tree
        return Y_dectreepred , acc_decision_tree,precision,recall
    return decision_tree, Y_dectreepred , acc_decision_tree,precision,recall
# _, acc_decision_tree = decision_tree(x_train,ang_train,x_test,ang_test)
# print(acc_decision_tree)

def random_forest(x_train,y_train,x_test,y_test,n_estimators = 100, delete = True):
    random_forest = RandomForestClassifier(n_estimators= n_estimators)
    random_forest.fit(x_train, y_train)
    Y_pred = random_forest.predict(x_test)
    acc_random_forest = round(random_forest.score(x_test, y_test) * 100, 2)
    precision = precision_score(y_test, Y_pred)
    recall = recall_score(y_test,Y_pred)
    if(delete):
        del random_forest
        return Y_pred, acc_random_forest,precision,recall
    return random_forest, Y_pred, acc_random_forest,precision,recall
# _, acc_random_forest = random_forest(x_train,ang_train, x_test , ang_test)
# print(acc_random_forest)

def get_f1_score(precision,recall):
    return 2 * (precision * recall) / (precision + recall)

def compare_models(x_train, y_train, x_test, y_test):
    _,acc_svc,pre_svc,rec_svc = svc(x_train,y_train,x_test,y_test)
    f1_svc = get_f1_score(pre_svc,rec_svc)
    _,acc_knn,pre_knn,rec_knn = knn(x_train,y_train,x_test,y_test)
    f1_knn = get_f1_score(pre_knn,rec_knn)
    _,acc_log,pre_log,rec_log = log_reg(x_train,y_train,x_test,y_test)
    f1_log = get_f1_score(pre_log,rec_log)
    _,acc_random_forest,pre_rf,rec_rf = random_forest(x_train,y_train,x_test,y_test)
    f1_rf = get_f1_score(pre_rf,rec_rf)
    _,acc_gaussian,pre_gau,rec_gau = gaussian(x_train,y_train,x_test,y_test)
    f1_gau = get_f1_score(pre_gau,rec_gau)
    _,acc_perceptron,pre_per,rec_per = perceptron(x_train,y_train,x_test,y_test)
    f1_per = get_f1_score(pre_per,rec_per)
    _,acc_sgd,pre_sgd,rec_sgd = sgd(x_train,y_train,x_test,y_test)
    f1_sgd =get_f1_score(pre_sgd,rec_sgd)
    _,acc_linear_svc,pre_lsvc,rec_lsvc = linear_svc(x_train,y_train,x_test,y_test)
    f1_lsvc = get_f1_score(pre_lsvc,rec_lsvc)
    _,acc_decision_tree,pre_dtree,rec_dtree =  decision_tree(x_train,y_train,x_test,y_test)
    f1_dtree = get_f1_score(pre_dtree,rec_dtree)
    models = pd.DataFrame({
        'Model': ['Support Vector Machines', 'KNN', 'Logistic Regression', 
                  'Random Forest', 'Naive Bayes', 'Perceptron', 
                  'Stochastic Gradient Decent', 'Linear SVC', 
                  'Decision Tree'],
        'Accuracy': [acc_svc, acc_knn, acc_log, 
                  acc_random_forest, acc_gaussian, acc_perceptron, 
                  acc_sgd, acc_linear_svc, acc_decision_tree],
        'Precision': [pre_svc, pre_knn,pre_log,
                      pre_rf,pre_gau,pre_per,pre_sgd,
                      pre_lsvc,pre_dtree],
        'Recall': [rec_svc,rec_knn,rec_log,rec_rf,rec_gau,rec_per,rec_sgd,rec_lsvc,rec_dtree],
        'F1': [f1_svc,f1_knn,f1_log,f1_rf,f1_gau,f1_per,f1_sgd,f1_lsvc,f1_dtree]
    })
    return models[['Model','Accuracy','Precision','Recall','F1']] #.sort_values(by='Score', ascending=False)

def map_for_emotion(y_list, emotional_mapping,emotion):
    return list(map(lambda x: 1 if x == emotional_mapping[emotion] else 0,y_list))

def compare_models_x_emotions(x_train, x_test, emotions):
    result = {'Model': ['Support Vector Machines', 'KNN', 'Logistic Regression', 
                  'Random Forest', 'Naive Bayes', 'Perceptron', 
                  'Stochastic Gradient Decent', 'Linear SVC', 
                  'Decision Tree']}
    for emotion,data in emotions.iteritems():
        y_train = data[0]
        y_test = data[1]
        _,acc_svc = svc(x_train,y_train,x_test,y_test)
        _,acc_knn = knn(x_train,y_train,x_test,y_test)
        _,acc_log = log_reg(x_train,y_train,x_test,y_test)
        _,acc_random_forest = random_forest(x_train,y_train,x_test,y_test)
        _,acc_gaussian = gaussian(x_train,y_train,x_test,y_test)
        _,acc_perceptron = perceptron(x_train,y_train,x_test,y_test)
        _,acc_sgd = sgd(x_train,y_train,x_test,y_test)
        _,acc_linear_svc = linear_svc(x_train,y_train,x_test,y_test)
        _,acc_decision_tree =  decision_tree(x_train,y_train,x_test,y_test)
        result[emotion] = [acc_svc, acc_knn, acc_log, 
                  acc_random_forest, acc_gaussian, acc_perceptron, 
                  acc_sgd, acc_linear_svc, acc_decision_tree]
    models = pd.DataFrame(result)
    models = models[['Model'] + list(emotions.iterkeys())]
    return models
        
    

In [25]:
def get_pattern(text):
    text = json.dumps(text)
    url = 'http://192.168.2.101:7878/api/get_patt'
    data = dict(input_tweets = text)
    resp = requests.post(url=url, data=data)
    r = json.loads(resp.text)
    return map(lambda x: x['pattern'],r)
    
def get_deep_emotion(text):
    text = json.dumps(text)
    url = 'http://192.168.2.101:7878/api/get_emo'
    data = dict(input_tweets = text)
    resp = requests.post(url=url, data=data)
    r = json.loads(resp.text)
    return r

def clean_text(text, remove_actions = True):
    punct_str = '!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~«»“…‘”'
    if(remove_actions):
        text = re.sub(r" ?\[[^)]+\]", "", text)
    for p in punct_str:
        text = text.replace(p,' ')
    text = re.sub(' +', ' ', text)
    return text.lower().strip()

def get_f1_score(precision,recall):
    return 2 * (precision * recall) / (precision + recall)

def get_patterns_load(data,patterns_df, emotion):
    index = data[data.emotion == emotion ]['index']
    patterns = patterns_df.loc[list(index)]
    load = []
    for pat in patterns.iterrows():
        load = load + list(pat[1].dropna())
    return load


def extract_patterns(data,extract=False):
    if(extract):
        patterns = {}
        for index, row in data.iterrows():
            patterns[row['index']] = set(get_pattern([row['text']])[0].values())
            print('Extracted pattern from '+ row['index'] + ' index:'+ str(index))
            print('Size: ', len(patterns[row['index']]), 'Patterns size', len(patterns))
        try:
            print('Saving Pickle')
            with open('pickles/patterns/pattern.pickle','wb') as f:
                save = {
                    'patterns' : patterns
                }
                pickle.dump(save,f,pickle.HIGHEST_PROTOCOL)
                print('Successfully saved in pattern.pickle')
                return patterns
        except Exception as e:
            print('Unable to save data to pickle', e)
            print('Patterns probably not saved.')
            return patterns
    else:
        try:
            with open('pickles/patterns/pattern.pickle','rb') as f:
                save = pickle.load(f)
                patterns = save['patterns']
                del save
                returning = {}
                for key in list(data['index']):
                    returning[key] = patterns[key]
                return returning
        except Exception as e:
            print('Error loading base datasets pickle: ', e)
            

def build_emotions_counter(data,patterns_df):
    emotions_counter ={}
    emotions_list = list(data['emotion'].unique())
    for emotion in emotions_list:
        load = get_patterns_load(data,patterns_df,emotion)
        emotions_counter[emotion] = collections.Counter(load)
    return emotions_counter

def build_frequencyframe(all_patterns,emotions_counter):
    df_patt = {}
    for pattern in all_patterns:
        df_patt[pattern] = {}
        for emotion in emotions_counter:
            df_patt[pattern][emotion] = emotions_counter[emotion][pattern]
    return pd.DataFrame(df_patt).T

def build_pfief(df_patt):
    ief = ((df_patt+1).rdiv(df_patt.sum(axis=1)+1, axis=0)+1).apply(np.log10)
    pf = ((df_patt.sum(axis=0)+1)/(df_patt+1)).apply(np.log10)
    return ief * pf

def balance_data(data):
    min_sample = min(data.groupby('emotion').count()['index'])
    emotions_list = list(data['emotion'].unique())
    samples = []
    for emotion in emotions_list:
        samples.append(data[data.emotion == emotion].sample(n=min_sample))
    result = pd.concat(samples).sample(frac=1)
    return result
        
def two_emotions(data,emotional_mapping,emotion1,emotion2):
    emotion_code = emotional_mapping[emotion1]
    emotion_sample = data[data.emotion_code == emotion_code]
    emotion_code2 = emotional_mapping[emotion2]
    emotion_sample2 = data[data.emotion_code == emotion_code2]
    if(len(emotion_sample2) < len(emotion_sample)):
        emotion_sample = emotion_sample.sample(n=len(emotion_sample2))
    else:
        emotion_sample2 = emotion_sample2.sample(n=len(emotion_sample))
    sample = pd.concat([emotion_sample,emotion_sample2]).sample(frac=1)
    return sample

def filter_word_count(data, n_count):
    return data[map(lambda x: len(x.split(' ')) >= n_count,data['text'])]

def remove_empty_patterns(data,patterns):
    empty_patterns = [k for k, v in patterns.iteritems() if len(v) < 1]
    patterns = { k:v for k, v in patterns.iteritems() if len(v) > 1 }
    data = filter(lambda x: x[1]['index'] not in empty_patterns ,data.iterrows())
    data = pd.DataFrame.from_items(data).T
    return data,patterns





In [47]:
def load_data(word_count,emotional_mapping):
    # full = generate_IEMOCAP_df()
    data = pd.read_csv('data/IEMOCAP_sentences.csv',index_col=0)
    data['emotion_code'] = data['emotion'].map( emotional_mapping ).astype(int)
    # Take away fear, surprise,disgust, xxx and others. Not enough data
    data = data[data.emotion_code < 6]
    # Clean Transcripts
    data['text'] = data['text'].apply(clean_text)
    # Filter Word Count
    data = filter_word_count(data, word_count)
    patterns = extract_patterns(data)
    data,patterns = remove_empty_patterns(data,patterns)
    return data,patterns

def build_model(data,patterns):
    transcript_order = list(data['index'])
    patterns_df = pd.DataFrame.from_dict(patterns, orient='index')
    patterns_df = patterns_df.loc[transcript_order]
    emotions_counter = build_emotions_counter(X_train,patterns_df)
    all_patterns = []
    for pat in patterns_df.iterrows():
        all_patterns = all_patterns + list(pat[1].dropna())
        
    df_patt = build_frequencyframe(all_patterns,emotions_counter)
    em_df = build_pfief(df_patt)
    return em_df

def get_frequency_vectors(data,patterns_list):
    patterns = extract_patterns(data)
    transcript_order = list(data['index'])
    frequency_vectors = []
    for index in patterns:
        frequency_vectors.append(np.isin(patterns_list,np.array(list(patterns[index]))))
    vectors = pd.DataFrame(frequency_vectors,columns=patterns_list,index=patterns.keys())
    vectors = vectors.loc[transcript_order]
    vectors = vectors * 1
    return vectors
    
def calculate_scores(em_df,vectors):
    em_matrix = em_df.T.as_matrix()
    emotional_scores = []
    for index, vector in vectors.iterrows():
        emotional_scores.append(em_matrix.dot(vector))
    emotions_list = list(em_df.columns)
    scores = pd.DataFrame(emotional_scores,columns=emotions_list,index=list(vectors.index))
    scores['pred_emotion'] = map(lambda x: x[1].idxmin(),scores.iterrows())
    scores['pred_code'] = scores['pred_emotion'].map(emotional_mapping).astype(int)
    return scores
    

In [57]:
emotional_mapping = {'ang': 0, 'sad': 1, 'exc': 2, 'neu': 3,'fru': 4,'hap': 5,'fea': 6,'sur': 7,'dis': 8, 'xxx':9,'oth':10}
data,patterns = load_data(3,emotional_mapping)
# data = two_emotions(data,emotional_mapping,'sad','exc')
# Balance Data
# data = balance_data(data)
y = data.emotion_code
X_train, X_test, y_train, y_test = train_test_split(data, y, test_size=0.3)

data.groupby('emotion').count()['index'] #  6,453 Total

emotion
ang    1015
exc     915
fru    1654
hap     519
neu    1416
sad     934
Name: index, dtype: int64

In [58]:
em_df = build_model(X_train,patterns)
patterns_list = np.array(list(em_df.index))

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike


In [69]:
em_df

Unnamed: 0,ang,exc,fru,hap,neu,sad
.+ .+ a,1.860466,1.956696,1.509439,2.672467,1.748746,2.140179
.+ .+ and,1.822112,1.986342,1.702201,2.710334,1.800145,1.866371
.+ .+ for,2.460330,2.471034,1.672305,3.043322,2.040137,2.114120
.+ .+ i,1.803318,1.938326,1.349158,2.262159,1.741773,1.693435
.+ .+ if,2.047230,2.710636,1.618180,3.618933,2.550082,3.621106
.+ .+ in,2.191232,2.081067,1.834967,2.677360,1.949644,2.498426
.+ .+ is,2.068165,2.162333,1.498328,3.106940,2.080736,3.184744
.+ .+ me,1.538971,2.580319,1.586477,3.639559,2.669524,2.136159
.+ .+ my,2.110334,2.118285,1.473528,3.307733,2.735106,3.327173
.+ .+ of,2.153954,2.212946,1.838814,2.617645,1.802802,1.830795


# Score - Training Data

In [59]:
vectors = get_frequency_vectors(X_train,patterns_list)

In [60]:
scores = calculate_scores(em_df,vectors)
pred_y = list(scores['pred_code'])

In [61]:
# pred_y, y_train
precision = precision_score(list(y_train),pred_y,average='macro')
recall = recall_score(list(y_train),pred_y,average='macro')
f1 = get_f1_score(precision,recall)
print('Precision Macro',precision)
print('Recall Macro',recall)
print('F1 Macro',f1)
print(' ')
precision = precision_score(list(y_train),pred_y,average='micro')
recall = recall_score(list(y_train),pred_y,average='micro')
f1 = get_f1_score(precision,recall)
print('Precision Micro',precision)
print('Recall Micro',recall)
print('F1 Micro',f1)
print(' ')
precision = precision_score(list(y_train),pred_y,average='weighted')
recall = recall_score(list(y_train),pred_y,average='weighted')
f1 = get_f1_score(precision,recall)
print('Precision Weighted',precision)
print('Recall Weighted',recall)
print('F1 Weighted',f1)

('Precision Macro', 0.6979970716381977)
('Recall Macro', 0.4262270280843039)
('F1 Macro', 0.5292631914390215)
 
('Precision Micro', 0.5063094974540624)
('Recall Micro', 0.5063094974540624)
('F1 Micro', 0.5063094974540624)
 
('Precision Weighted', 0.6512813891221755)
('Recall Weighted', 0.5063094974540624)
('F1 Weighted', 0.5697176034322818)


In [66]:
precision = precision_score(list(y_train),pred_y,average='macro')
recall = recall_score(list(y_train),pred_y,average='macro')
f1 = get_f1_score(precision,recall)
print('Precision Macro',precision)
print('Recall Macro',recall)
print('F1 Macro',f1)
print(' ')
precision = precision_score(list(y_train),pred_y,average='micro')
recall = recall_score(list(y_train),pred_y,average='micro')
f1 = get_f1_score(precision,recall)
print('Precision Micro',precision)
print('Recall Micro',recall)
print('F1 Micro',f1)
print(' ')
precision = precision_score(list(y_train),pred_y,average='weighted')
recall = recall_score(list(y_train),pred_y,average='weighted')
f1 = get_f1_score(precision,recall)
print('Precision Weighted',precision)
print('Recall Weighted',recall)
print('F1 Weighted',f1)
 

ValueError: Found input variables with inconsistent numbers of samples: [4517, 1936]

#  Testing

In [62]:
vectors = get_frequency_vectors(X_test,patterns_list)

In [63]:
scores = calculate_scores(em_df,vectors)
pred_y = list(scores['pred_code'])

In [65]:
precision = precision_score(list(y_test),pred_y,average='macro')
recall = recall_score(list(y_test),pred_y,average='macro')
f1 = get_f1_score(precision,recall)
print('Precision Macro',precision)
print('Recall Macro',recall)
print('F1 Macro',f1)
print(' ')
precision = precision_score(list(y_test),pred_y,average='micro')
recall = recall_score(list(y_test),pred_y,average='micro')
f1 = get_f1_score(precision,recall)
print('Precision Micro',precision)
print('Recall Micro',recall)
print('F1 Micro',f1)
print(' ')
precision = precision_score(list(y_test),pred_y,average='weighted')
recall = recall_score(list(y_test),pred_y,average='weighted')
f1 = get_f1_score(precision,recall)
print('Precision Weighted',precision)
print('Recall Weighted',recall)
print('F1 Weighted',f1)

('Precision Macro', 0.5801429755949482)
('Recall Macro', 0.3260848618801862)
('F1 Macro', 0.4175017236166719)
 
('Precision Micro', 0.40702479338842973)
('Recall Micro', 0.40702479338842973)
('F1 Micro', 0.40702479338842973)
 
('Precision Weighted', 0.5394928243436844)
('Recall Weighted', 0.40702479338842973)
('F1 Weighted', 0.46398915614305364)


# Sad & Hap

In [75]:
precision = precision_score(list(y_test),pred_y,average='macro')
recall = recall_score(list(y_test),pred_y,average='macro')
f1 = get_f1_score(precision,recall)
print('Precision Macro',precision)
print('Recall Macro',recall)
print('F1 Macro',f1)
print(' ')
precision = precision_score(list(y_test),pred_y,average='micro')
recall = recall_score(list(y_test),pred_y,average='micro')
f1 = get_f1_score(precision,recall)
print('Precision Micro',precision)
print('Recall Micro',recall)
print('F1 Micro',f1)
print(' ')
precision = precision_score(list(y_test),pred_y,average='weighted')
recall = recall_score(list(y_test),pred_y,average='weighted')
f1 = get_f1_score(precision,recall)
print('Precision Weighted',precision)
print('Recall Weighted',recall)
print('F1 Weighted',f1)

('Precision Macro', 0.5055895790803927)
('Recall Macro', 0.5053285145028265)
('F1 Macro', 0.5054590130822941)
 
('Precision Micro', 0.5)
('Recall Micro', 0.5)
('F1 Micro', 0.5)
 
('Precision Weighted', 0.5068047049674346)
('Recall Weighted', 0.5)
('F1 Weighted', 0.5033793569566477)


# Anger & Hap

In [95]:
precision = precision_score(list(y_test),pred_y,average='macro')
recall = recall_score(list(y_test),pred_y,average='macro')
f1 = get_f1_score(precision,recall)
print('Precision Macro',precision)
print('Recall Macro',recall)
print('F1 Macro',f1)
print(' ')
precision = precision_score(list(y_test),pred_y,average='micro')
recall = recall_score(list(y_test),pred_y,average='micro')
f1 = get_f1_score(precision,recall)
print('Precision Micro',precision)
print('Recall Micro',recall)
print('F1 Micro',f1)
print(' ')
precision = precision_score(list(y_test),pred_y,average='weighted')
recall = recall_score(list(y_test),pred_y,average='weighted')
f1 = get_f1_score(precision,recall)
print('Precision Weighted',precision)
print('Recall Weighted',recall)
print('F1 Weighted',f1)

('Precision Macro', 0.546664167916042)
('Recall Macro', 0.546149569085349)
('F1 Macro', 0.5464067473400861)
 
('Precision Micro', 0.5432692307692307)
('Recall Micro', 0.5432692307692307)
('F1 Micro', 0.5432692307692307)
 
('Precision Weighted', 0.5480787250605467)
('Recall Weighted', 0.5432692307692307)
('F1 Weighted', 0.5456633803620117)


# Anger & Sadness

In [115]:
precision = precision_score(list(y_test),pred_y,average='macro')
recall = recall_score(list(y_test),pred_y,average='macro')
f1 = get_f1_score(precision,recall)
print('Precision Macro',precision)
print('Recall Macro',recall)
print('F1 Macro',f1)
print(' ')
precision = precision_score(list(y_test),pred_y,average='micro')
recall = recall_score(list(y_test),pred_y,average='micro')
f1 = get_f1_score(precision,recall)
print('Precision Micro',precision)
print('Recall Micro',recall)
print('F1 Micro',f1)
print(' ')
precision = precision_score(list(y_test),pred_y,average='weighted')
recall = recall_score(list(y_test),pred_y,average='weighted')
f1 = get_f1_score(precision,recall)
print('Precision Weighted',precision)
print('Recall Weighted',recall)
print('F1 Weighted',f1)

('Precision Macro', 0.5026881720430108)
('Recall Macro', 0.5026881720430108)
('F1 Macro', 0.5026881720430108)
 
('Precision Micro', 0.5026737967914439)
('Recall Micro', 0.5026737967914439)
('F1 Micro', 0.5026737967914439)
 
('Precision Weighted', 0.5027025472945776)
('Recall Weighted', 0.5026737967914439)
('F1 Weighted', 0.5026881716319253)


# Neutral & Happiness

In [134]:
precision = precision_score(list(y_test),pred_y,average='macro')
recall = recall_score(list(y_test),pred_y,average='macro')
f1 = get_f1_score(precision,recall)
print('Precision Macro',precision)
print('Recall Macro',recall)
print('F1 Macro',f1)
print(' ')
precision = precision_score(list(y_test),pred_y,average='micro')
recall = recall_score(list(y_test),pred_y,average='micro')
f1 = get_f1_score(precision,recall)
print('Precision Micro',precision)
print('Recall Micro',recall)
print('F1 Micro',f1)
print(' ')
precision = precision_score(list(y_test),pred_y,average='weighted')
recall = recall_score(list(y_test),pred_y,average='weighted')
f1 = get_f1_score(precision,recall)
print('Precision Weighted',precision)
print('Recall Weighted',recall)
print('F1 Weighted',f1)

('Precision Macro', 0.5026881720430108)
('Recall Macro', 0.5026881720430108)
('F1 Macro', 0.5026881720430108)
 
('Precision Micro', 0.5026737967914439)
('Recall Micro', 0.5026737967914439)
('F1 Micro', 0.5026737967914439)
 
('Precision Weighted', 0.5027025472945776)
('Recall Weighted', 0.5026737967914439)
('F1 Weighted', 0.5026881716319253)


# Neutral & Sadness

In [316]:
precision = precision_score(list(y_test),pred_y,average='macro')
recall = recall_score(list(y_test),pred_y,average='macro')
f1 = get_f1_score(precision,recall)
print('Precision Macro',precision)
print('Recall Macro',recall)
print('F1 Macro',f1)
print(' ')
precision = precision_score(list(y_test),pred_y,average='micro')
recall = recall_score(list(y_test),pred_y,average='micro')
f1 = get_f1_score(precision,recall)
print('Precision Micro',precision)
print('Recall Micro',recall)
print('F1 Micro',f1)
print(' ')
precision = precision_score(list(y_test),pred_y,average='weighted')
recall = recall_score(list(y_test),pred_y,average='weighted')
f1 = get_f1_score(precision,recall)
print('Precision Weighted',precision)
print('Recall Weighted',recall)
print('F1 Weighted',f1)

('Precision Macro', 0.47599318351262115)
('Recall Macro', 0.4760416666666667)
('F1 Macro', 0.47601742385512175)
 
('Precision Micro', 0.4769585253456221)
('Recall Micro', 0.4769585253456221)
('F1 Micro', 0.4769585253456221)
 
('Precision Weighted', 0.4765562995818717)
('Recall Weighted', 0.4769585253456221)
('F1 Weighted', 0.4767573276273286)


# Neutral & Anger

In [348]:
precision = precision_score(list(y_test),pred_y,average='macro')
recall = recall_score(list(y_test),pred_y,average='macro')
f1 = get_f1_score(precision,recall)
print('Precision Macro',precision)
print('Recall Macro',recall)
print('F1 Macro',f1)
print(' ')
precision = precision_score(list(y_test),pred_y,average='micro')
recall = recall_score(list(y_test),pred_y,average='micro')
f1 = get_f1_score(precision,recall)
print('Precision Micro',precision)
print('Recall Micro',recall)
print('F1 Micro',f1)
print(' ')
precision = precision_score(list(y_test),pred_y,average='weighted')
recall = recall_score(list(y_test),pred_y,average='weighted')
f1 = get_f1_score(precision,recall)
print('Precision Weighted',precision)
print('Recall Weighted',recall)
print('F1 Weighted',f1)

('Precision Macro', 0.46908434525763043)
('Recall Macro', 0.4709975369458128)
('F1 Macro', 0.4700389943019242)
 
('Precision Micro', 0.47737556561085975)
('Recall Micro', 0.47737556561085975)
('F1 Micro', 0.47737556561085975)
 
('Precision Weighted', 0.47071297782701477)
('Recall Weighted', 0.47737556561085975)
('F1 Weighted', 0.4740208614181354)


In [None]:
# TODO
#Word count higher than 3
#Filter out empty patterns

In [349]:
data['text']

5977      what who says that bananas don't grow overnight
8253    what do you mean as you can who makes up these...
6006           well i guess you're not trying hard enough
4582                 oh very amusing indeed amanda listen
1622                                             what huh
9810          twice is every time we've tried that's ever
1978    dumber than you i mean they're a lot worse off...
1339                     i'm going to ask her to marry me
5837                      that's how you want to leave it
5955    well i can't be in charge of this i mean i got...
2653                           stop it go away i hate you
4928                          what is the matter with you
573                                           whose is it
1814                           that's very amusing indeed
1993    no it didn't um it didn't come out and i don't...
4966                                          yes sir yes
5775    okay what is it that i did wrong on the form t...
8214          

In [352]:
patterns_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,195,196,197,198,199,200,201,202,203,204
Ses01F_script02_2_F041,the .+,we .+,the .+ .+,right .+,.+ wrong,.+ but .+,.+ but we,.+ the right,.+ but,so .+,...,,,,,,,,,,
Ses05F_script01_1_M013,.+ mom,.+ years,for .+,.+ back,.+ comes,back .+,.+ after,.+ after .+,three .+,nobody .+,...,,,,,,,,,,
Ses05F_script03_2_M041,.+ music,very .+,.+ indeed,music .+,,,,,,,...,,,,,,,,,,
Ses04F_impro07_M076,you .+ to,you .+,.+ .+ you,.+ for it,.+ but .+,.+ it,i .+,.+ yeah,.+ have .+,.+ but,...,,,,,,,,,,
Ses04F_impro07_M077,.+ .+ you,.+ campus,.+ do you,.+ live,.+ year,.+ on,.+ going to,.+ going,.+ to,well .+,...,,,,,,,,,,
Ses05F_script03_2_F041,,,,,,,,,,,...,,,,,,,,,,
Ses04F_impro07_M071,the .+,.+ the .+,the .+ .+,.+ want,thing .+,want to .+,and .+,get .+,.+ to,.+ and,...,,,,,,,,,,
Ses04F_impro07_M072,of .+,.+ of .+,.+ boys,.+ of,drunk .+,lots of .+,,,,,...,,,,,,,,,,
Ses03F_impro08_M021,about .+,three .+,.+ dollars,.+ three,,,,,,,...,,,,,,,,,,
Ses01M_script01_1_M000,.+ it,.+ saw,he .+ it,saw .+,he .+,,,,,,...,,,,,,,,,,
