In [1]:
import pandas as pd 
import pickle
import warnings
warnings.filterwarnings('ignore')
import numpy as np

from sklearn.svm import SVC
from sklearn import svm
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn import preprocessing
import torch

In [2]:
def svm_classifier(X_train, X_test, Y_train, Y_test):
    svm_clf = svm.SVC(random_state=0, kernel = "rbf", gamma = "scale", class_weight = "balanced")
    C = C = [0.0001, 0.0003, 0.0005, 0.001, 0.003, 0.005, 0.01, 0.03, 0.05, 0.1, 0.3, 0.5, 1, 3, 5, 10]
    gscv_clf = GridSearchCV(
        estimator=svm_clf, 
        param_grid=dict(C=C),
        n_jobs=-1, 
        cv = 10, 
        scoring = 'f1_weighted', 
        refit = True)

    gscv_clf.fit(X_train, Y_train)
    Y_test_pred = gscv_clf.predict(X_test)
    report = classification_report(Y_test, Y_test_pred, digits=4)
    return report, gscv_clf.best_estimator_

In [3]:
y_train = pd.read_csv("../../../text_features/bert_emo_embeddings/train_labels_bert_emo.csv")
y_test = pd.read_csv("../../../text_features/bert_emo_embeddings/test_labels_bert_emo.csv")

In [4]:
with open('../../../text_features/bert_emo_embeddings/train_bert_emo_embeddings_target_.pkl', 'rb') as f:
    x_train = pickle.load(f, encoding='latin1')

with open('../../../text_features/bert_emo_embeddings/test_bert_emo_embeddings_target_.pkl', 'rb') as f:
    x_test = pickle.load(f, encoding='latin1')

In [5]:
x_train_vals = []
for sample in x_train["embeddings"]:
    x_train_vals.append(sample[0].tolist())
    
x_test_vals = []
for sample in x_test["embeddings"]:
    x_test_vals.append(sample[0].tolist())

x_train_df = pd.DataFrame({'embeddings':x_train_vals})
x_test_df = pd.DataFrame({'embeddings':x_test_vals})

def process_dataframes(data):
    temp_concat = pd.concat([data, data.embeddings.apply(pd.Series)], axis=1)
    temp_concat.drop(columns=['embeddings'], inplace = True)
    return temp_concat.add_prefix('feat_')

x_train_df = process_dataframes(x_train_df)
x_test_df = process_dataframes(x_test_df)

### Speaker Independent and Context Independent

In [6]:
report, best_est = svm_classifier(x_train_df, x_test_df, y_train, y_test)

In [7]:
best_est

In [8]:
print(report)

              precision    recall  f1-score   support

         0.0     0.6807    0.6694    0.6750       121
         1.0     0.6721    0.6833    0.6777       120

    accuracy                         0.6763       241
   macro avg     0.6764    0.6764    0.6763       241
weighted avg     0.6764    0.6763    0.6763       241



### Speaker Dependent and Context Independent

In [9]:
x_train_speakers = pd.read_csv("../../../text_features/bert_emo_embeddings/train_data_bert_emo.csv")
x_test_speakers = pd.read_csv("../../../text_features/bert_emo_embeddings/test_data_bert_emo.csv")
x_train_speakers

Unnamed: 0,target_,target_context,speaker
0,[CLS] I've been told it's a good way to move o...,[CLS] I've been told it's a good way to move o...,25
1,"[CLS] Yeah, sure. You slept with your husband....","[CLS] Yeah, sure. You slept with your husband....",1
2,[CLS] When are you coming home? [SEP],[CLS] When are you coming home? Okay. Alright....,16
3,[CLS] Riveting. [SEP],[CLS] Riveting. Bingo. Then I lifted the cushi...,0
4,"[CLS] No, this is just part of a daredevil gam...","[CLS] No, this is just part of a daredevil gam...",2
...,...,...,...
956,"[CLS] Oh, that's sweet, but today is all about...","[CLS] Oh, that's sweet, but today is all about...",7
957,[CLS] If you wanna put a label on it. [SEP],[CLS] If you wanna put a label on it. You mean...,24
958,[CLS] That you're an alcoholic? [SEP],[CLS] That you're an alcoholic? I realized som...,3
959,[CLS] All I see is a yellow smudge. [SEP],[CLS] All I see is a yellow smudge. Now go bac...,15


In [10]:
x_train_df["speaker"] = x_train_speakers["speaker"]
x_test_df["speaker"] = x_test_speakers["speaker"]
x_train_df

Unnamed: 0,feat_0,feat_1,feat_2,feat_3,feat_4,feat_5,feat_6,feat_7,feat_8,feat_9,...,feat_759,feat_760,feat_761,feat_762,feat_763,feat_764,feat_765,feat_766,feat_767,speaker
0,0.481820,0.396239,-0.108557,-0.824750,-0.144248,-0.073026,0.979758,0.433720,-0.316678,-0.434535,...,-0.501656,-0.570439,-0.090142,0.354013,0.600775,0.460253,-0.112635,-0.016665,0.536517,25
1,0.186422,-0.012715,-0.270104,-0.966090,-0.233625,-0.114313,0.726960,0.323623,0.011497,0.388439,...,-0.495940,-0.614765,-0.863458,0.146015,0.170616,0.566187,-0.506237,0.246275,0.213663,1
2,0.320359,0.196662,-0.419681,-1.139635,-0.073863,-0.443748,1.138606,0.024697,0.316930,0.445424,...,-0.291079,-0.193862,-0.685517,-0.135893,0.181228,0.520968,-0.438661,-0.345917,0.238039,16
3,-0.286904,0.300738,0.053697,-0.644980,-0.572975,-0.234221,0.913548,0.248886,0.171966,-0.380621,...,-1.099317,-0.750813,-0.887254,-0.098758,0.493229,0.721029,-0.185865,-0.017646,0.344524,0
4,-0.011912,-0.083431,-0.213718,-0.864216,-0.571687,-0.365077,0.900541,0.112959,-0.274653,0.641007,...,-0.185786,-0.433104,-0.495586,-0.382690,0.436223,0.608935,-0.549000,-0.157270,0.028428,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
956,-0.209318,-0.389856,0.090160,-0.263833,-0.206982,0.052016,0.932359,0.460748,0.106117,-0.405427,...,-0.899440,-0.532513,-0.072574,-0.027293,0.360943,0.307455,-0.115243,-0.450011,0.237368,7
957,0.028629,0.000684,-0.294860,-0.878969,-0.283896,-0.278684,0.795800,0.312894,0.100772,0.136129,...,-0.505149,-0.536615,-0.199074,-0.095573,0.222312,0.689857,-0.204491,-0.222941,0.187176,24
958,0.230703,1.102729,-0.857481,-1.371913,-0.730950,-0.635589,0.822024,0.146600,0.062294,0.648425,...,-0.073648,-0.842553,-0.645787,-0.083847,-0.035841,0.434346,-0.181223,0.311277,-0.203958,3
959,-0.088872,0.509529,0.109430,-0.872232,-0.417698,-0.755796,0.807031,0.306677,-0.247697,-0.483073,...,-0.258881,-0.453982,-0.080044,0.143520,0.438239,0.396124,-0.091045,-0.175569,0.820228,15


In [11]:
report, best_est = svm_classifier(x_train_df, x_test_df, y_train, y_test)
best_est

In [12]:
print(report)

              precision    recall  f1-score   support

         0.0     0.6857    0.5950    0.6372       121
         1.0     0.6397    0.7250    0.6797       120

    accuracy                         0.6598       241
   macro avg     0.6627    0.6600    0.6584       241
weighted avg     0.6628    0.6598    0.6583       241



### Speaker Independent and Context dependent

In [14]:
with open('../../../text_features/bert_emo_embeddings/train_bert_emo_embeddings_target_context.pkl', 'rb') as f:
    x_train_context = pickle.load(f, encoding='latin1')

with open('../../../text_features/bert_emo_embeddings/test_bert_emo_embeddings_target_context.pkl', 'rb') as f:
    x_test_context = pickle.load(f, encoding='latin1')

In [15]:
x_train_vals = []
for sample in x_train_context["embeddings"]:
    x_train_vals.append(sample[0].tolist())
    
x_test_vals = []
for sample in x_test_context["embeddings"]:
    x_test_vals.append(sample[0].tolist())

x_train_df = pd.DataFrame({'embeddings':x_train_vals})
x_test_df = pd.DataFrame({'embeddings':x_test_vals})

def process_dataframes(data):
    temp_concat = pd.concat([data, data.embeddings.apply(pd.Series)], axis=1)
    temp_concat.drop(columns=['embeddings'], inplace = True)
    return temp_concat.add_prefix('feat_')

x_train_df = process_dataframes(x_train_df)
x_test_df = process_dataframes(x_test_df)

In [16]:
report, best_est = svm_classifier(x_train_df, x_test_df, y_train.values.ravel(), y_test.values.ravel())

In [17]:
best_est

In [18]:
print(report)

              precision    recall  f1-score   support

         0.0     0.5912    0.6694    0.6279       121
         1.0     0.6154    0.5333    0.5714       120

    accuracy                         0.6017       241
   macro avg     0.6033    0.6014    0.5997       241
weighted avg     0.6033    0.6017    0.5998       241



### Speaker Dependent and Context Dependent

In [19]:
x_train_df["speaker"] = x_train_speakers["speaker"]
x_test_df["speaker"] = x_test_speakers["speaker"]
x_train_df

Unnamed: 0,feat_0,feat_1,feat_2,feat_3,feat_4,feat_5,feat_6,feat_7,feat_8,feat_9,...,feat_759,feat_760,feat_761,feat_762,feat_763,feat_764,feat_765,feat_766,feat_767,speaker
0,0.187400,-0.249752,-0.531189,-0.587696,-0.253145,0.251255,0.293818,0.102688,-0.029937,0.549572,...,-1.038667,-0.475906,-0.808535,-0.600013,0.356791,1.004951,-0.288849,-0.292046,0.170624,25
1,0.306072,-0.371220,-0.136578,-0.558405,-0.534847,0.092930,0.665439,-0.076283,-0.006559,0.253745,...,-0.582736,-0.286270,-0.805824,-0.217168,0.291527,0.816135,-0.236136,-0.092167,0.009652,1
2,0.000783,-0.011165,-0.009149,-0.985173,-0.560932,-0.335655,0.996077,0.319493,0.025274,0.294554,...,-0.111058,-0.542781,-0.469157,-0.135954,0.367989,0.389392,-0.408523,0.195740,0.186753,16
3,0.103643,-0.603226,-0.207312,-0.500240,-0.247118,-0.395290,0.654552,0.238343,-0.261050,-0.122142,...,-0.164629,-0.554498,-0.251535,-0.342107,0.068586,0.363279,-0.277995,0.019770,0.328778,0
4,0.136752,-0.041872,-0.349036,-1.014121,-0.599821,-0.623811,0.962766,0.145386,-0.263431,0.655999,...,-0.331218,-0.444341,-0.252407,-0.482110,0.396336,0.541347,-0.641876,0.100255,-0.103844,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
956,-0.164540,-0.184631,0.799775,-0.561728,-0.087351,-0.333588,0.670840,0.127491,-0.710809,-0.470882,...,-0.967548,-0.398648,-0.505340,0.639209,0.360454,0.233670,-0.127162,-0.304017,0.475064,7
957,-0.643545,0.265237,-0.420253,-0.545500,-0.326656,-0.336218,0.705373,0.341426,-0.079628,0.262987,...,-0.402745,-0.579635,-0.868935,-0.084060,-0.103174,0.685659,0.132169,0.430537,0.459013,24
958,0.219972,0.474826,-0.279532,-0.980356,-0.809623,-0.483326,0.647109,0.202451,0.043996,0.683331,...,0.020142,-0.806641,-0.558470,0.147179,0.005372,0.338175,0.059721,0.196580,0.059456,3
959,0.286993,-0.120376,0.096945,-0.735082,-0.296853,0.060511,0.594339,0.320356,-0.496224,0.074934,...,-0.281640,-0.855841,-0.595620,0.045928,0.479604,0.357689,-0.342236,-0.091274,0.453166,15


In [20]:
report, best_est = svm_classifier(x_train_df, x_test_df, y_train.values.ravel(), y_test.values.ravel())
best_est

In [21]:
print(report)

              precision    recall  f1-score   support

         0.0     0.6207    0.5950    0.6076       121
         1.0     0.6080    0.6333    0.6204       120

    accuracy                         0.6141       241
   macro avg     0.6143    0.6142    0.6140       241
weighted avg     0.6144    0.6141    0.6140       241

