In [1]:
import pandas as pd 
import pickle
import warnings
warnings.filterwarnings('ignore')
import numpy as np

from sklearn.svm import SVC
from sklearn import svm
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn import preprocessing
import torch

In [2]:
def svm_classifier(X_train, X_test, Y_train, Y_test):
    svm_clf = svm.SVC(random_state=0, kernel = "rbf", gamma = "scale", class_weight = "balanced")
    C = C = [0.0001, 0.0003, 0.0005, 0.001, 0.003, 0.005, 0.01, 0.03, 0.05, 0.1, 0.3, 0.5, 1, 3, 5, 10]
    gscv_clf = GridSearchCV(
        estimator=svm_clf, 
        param_grid=dict(C=C),
        n_jobs=-1, 
        cv = 10, 
        scoring = 'f1_weighted', 
        refit = True)

    gscv_clf.fit(X_train, Y_train)
    Y_test_pred = gscv_clf.predict(X_test)
    report = classification_report(Y_test, Y_test_pred, digits=4)
    return report, gscv_clf.best_estimator_

In [3]:
y_train = pd.read_csv("../../text_features/bert_embeddings/train_labels.csv")
y_test = pd.read_csv("../../text_features/bert_embeddings/test_labels.csv")

In [4]:
with open('../../text_features/bert_embeddings/train_bert_embeddings_target_.pkl', 'rb') as f:
    x_train = pickle.load(f, encoding='latin1')

with open('../../text_features/bert_embeddings/test_bert_embeddings_target_.pkl', 'rb') as f:
    x_test = pickle.load(f, encoding='latin1')

In [5]:
x_train_vals = []
for sample in x_train["embeddings"]:
    x_train_vals.append(sample[0].tolist())
    
x_test_vals = []
for sample in x_test["embeddings"]:
    x_test_vals.append(sample[0].tolist())

x_train_df = pd.DataFrame({'embeddings':x_train_vals})
x_test_df = pd.DataFrame({'embeddings':x_test_vals})

def process_dataframes(data):
    temp_concat = pd.concat([data, data.embeddings.apply(pd.Series)], axis=1)
    temp_concat.drop(columns=['embeddings'], inplace = True)
    return temp_concat.add_prefix('feat_')

x_train_df = process_dataframes(x_train_df)
x_test_df = process_dataframes(x_test_df)

### Speaker Independent and Context Independent

In [6]:
report, best_est = svm_classifier(x_train_df, x_test_df, y_train, y_test)

In [7]:
best_est

In [8]:
print(report)

              precision    recall  f1-score   support

         0.0     0.6789    0.6116    0.6435       121
         1.0     0.6439    0.7083    0.6746       120

    accuracy                         0.6598       241
   macro avg     0.6614    0.6600    0.6590       241
weighted avg     0.6615    0.6598    0.6590       241



### Speaker Dependent and Context Independent

In [9]:
x_train_speakers = pd.read_csv("../../text_features/bert_embeddings/train_data.csv")
x_test_speakers = pd.read_csv("../../text_features/bert_embeddings/test_data.csv")
x_train_speakers

Unnamed: 0,target_,target_context,speaker
0,[CLS] I've been told it's a good way to move o...,[CLS] I've been told it's a good way to move o...,25
1,"[CLS] Yeah, sure. You slept with your husband....","[CLS] Yeah, sure. You slept with your husband....",1
2,[CLS] When are you coming home? [SEP],[CLS] When are you coming home? Okay. Alright....,16
3,[CLS] Riveting. [SEP],[CLS] Riveting. Bingo. Then I lifted the cushi...,0
4,"[CLS] No, this is just part of a daredevil gam...","[CLS] No, this is just part of a daredevil gam...",2
...,...,...,...
956,"[CLS] Oh, that's sweet, but today is all about...","[CLS] Oh, that's sweet, but today is all about...",7
957,[CLS] If you wanna put a label on it. [SEP],[CLS] If you wanna put a label on it. You mean...,24
958,[CLS] That you're an alcoholic? [SEP],[CLS] That you're an alcoholic? I realized som...,3
959,[CLS] All I see is a yellow smudge. [SEP],[CLS] All I see is a yellow smudge. Now go bac...,15


In [10]:
x_train_df["speaker"] = x_train_speakers["speaker"]
x_test_df["speaker"] = x_test_speakers["speaker"]
x_train_df

Unnamed: 0,feat_0,feat_1,feat_2,feat_3,feat_4,feat_5,feat_6,feat_7,feat_8,feat_9,...,feat_759,feat_760,feat_761,feat_762,feat_763,feat_764,feat_765,feat_766,feat_767,speaker
0,0.531574,-0.002677,-0.377449,-0.346509,-0.357004,-0.114968,-0.139359,0.510641,0.438597,-0.468832,...,-0.395492,-0.281222,-0.151269,0.183464,-0.227712,0.016151,-0.106852,-0.075954,0.546363,25
1,0.230405,-0.500298,-0.502653,-0.045785,-0.408853,-0.143921,-0.041986,0.714192,0.254998,-0.068283,...,-0.187939,-0.245101,-0.565826,0.227214,-0.206787,0.004460,-0.477272,0.176039,0.420941,1
2,0.198217,-0.065683,-0.453662,-0.159374,-0.330294,-0.241741,0.436773,0.423417,-0.068334,-0.008599,...,-0.114280,0.013610,-0.615553,0.082351,-0.084131,-0.245042,-0.630190,-0.246156,0.167829,16
3,-0.574364,0.166891,-0.308708,-0.125461,-0.856284,-0.635108,0.486445,0.802911,0.276931,-0.836928,...,-0.585530,-0.714161,-0.674144,-0.016587,-0.359648,-0.071838,-0.346258,-0.364343,0.525816,0
4,0.362476,-0.240912,-0.305236,-0.117662,-0.365053,-0.539978,0.266043,0.564625,0.414602,-0.354684,...,-0.275061,-0.292804,-0.609617,-0.576326,0.207753,-0.100599,0.058775,0.376348,0.522347,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
956,0.007841,-0.369434,-0.231455,-0.052095,-0.572994,-0.259294,0.049462,0.489817,0.617951,-0.202881,...,-0.369166,-0.290499,-0.400612,-0.223186,0.004507,0.193803,-0.150293,0.010396,0.528742,7
957,0.146791,-0.517841,-0.283962,-0.350781,-0.263554,-0.182034,0.057058,0.768285,0.151862,-0.097999,...,-0.022245,-0.462382,-0.275672,0.156045,-0.201733,0.256771,-0.526523,-0.258645,0.478225,24
958,0.230309,0.099182,-1.189187,-0.140705,-0.700605,-0.645015,0.148480,0.355590,0.082329,0.057534,...,-0.188880,-0.089098,-0.256621,-0.055487,-0.207184,-0.197762,-0.256311,0.264371,0.465491,3
959,-0.003287,-0.021032,-0.506837,-0.390883,-0.534432,-0.525337,0.261074,0.383272,0.367904,-0.899204,...,-0.267725,-0.162286,-0.130819,0.144708,0.137884,0.210116,-0.121688,-0.176527,0.891960,15


In [11]:
report, best_est = svm_classifier(x_train_df, x_test_df, y_train, y_test)
best_est

In [12]:
print(report)

              precision    recall  f1-score   support

         0.0     0.7308    0.6281    0.6756       121
         1.0     0.6715    0.7667    0.7160       120

    accuracy                         0.6971       241
   macro avg     0.7012    0.6974    0.6958       241
weighted avg     0.7013    0.6971    0.6957       241



### Speaker Independent and Context dependent

In [13]:
with open('../../text_features/bert_embeddings/train_bert_embeddings_target_context.pkl', 'rb') as f:
    x_train_context = pickle.load(f, encoding='latin1')

with open('../../text_features/bert_embeddings/test_bert_embeddings_target_context.pkl', 'rb') as f:
    x_test_context = pickle.load(f, encoding='latin1')

In [14]:
x_train_vals = []
for sample in x_train_context["embeddings"]:
    x_train_vals.append(sample[0].tolist())
    
x_test_vals = []
for sample in x_test_context["embeddings"]:
    x_test_vals.append(sample[0].tolist())

x_train_df = pd.DataFrame({'embeddings':x_train_vals})
x_test_df = pd.DataFrame({'embeddings':x_test_vals})

def process_dataframes(data):
    temp_concat = pd.concat([data, data.embeddings.apply(pd.Series)], axis=1)
    temp_concat.drop(columns=['embeddings'], inplace = True)
    return temp_concat.add_prefix('feat_')

x_train_df = process_dataframes(x_train_df)
x_test_df = process_dataframes(x_test_df)

In [15]:
report, best_est = svm_classifier(x_train_df, x_test_df, y_train.values.ravel(), y_test.values.ravel())

In [16]:
best_est

In [17]:
print(report)

              precision    recall  f1-score   support

         0.0     0.5985    0.6529    0.6245       121
         1.0     0.6147    0.5583    0.5852       120

    accuracy                         0.6058       241
   macro avg     0.6066    0.6056    0.6048       241
weighted avg     0.6065    0.6058    0.6049       241



### Speaker Dependent and Context Dependent

In [18]:
x_train_df["speaker"] = x_train_speakers["speaker"]
x_test_df["speaker"] = x_test_speakers["speaker"]
x_train_df

Unnamed: 0,feat_0,feat_1,feat_2,feat_3,feat_4,feat_5,feat_6,feat_7,feat_8,feat_9,...,feat_759,feat_760,feat_761,feat_762,feat_763,feat_764,feat_765,feat_766,feat_767,speaker
0,0.574635,-0.695604,-0.290111,-0.501525,-0.111114,0.033933,-0.064401,0.372059,0.276928,-0.154529,...,-0.542819,-0.312360,-0.841467,-0.249062,-0.188382,-0.034155,-0.304036,-0.002833,0.474112,25
1,-0.009401,-0.713261,-0.092869,-0.188030,-0.583256,0.038700,-0.012739,0.144762,-0.005990,-0.176591,...,-0.016617,-0.309293,-0.677562,-0.093774,0.027813,0.034902,0.021323,-0.005110,0.312115,1
2,0.210755,-0.549721,-0.031042,-0.318841,-0.471447,-0.455262,0.248838,0.352645,0.497266,-0.114562,...,-0.077138,-0.524917,-0.786684,-0.052215,0.113616,0.067762,-0.151070,0.065819,0.390655,16
3,0.032532,-0.666678,0.111073,-0.566707,-0.629884,-0.351460,0.293653,0.135274,-0.025899,-0.237507,...,-0.027932,-0.491872,-0.768082,-0.060602,-0.096596,-0.065277,-0.009771,0.048125,0.485839,0
4,0.511843,-0.130731,-0.288622,-0.338487,-0.499926,-0.658664,0.347511,0.401867,0.203338,-0.103752,...,-0.569898,-0.268881,-0.146304,-0.428675,0.079669,-0.125833,-0.068329,0.323887,0.390050,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
956,0.167014,-0.660699,0.046890,-0.320457,-0.222541,-0.203675,0.082216,0.091342,0.252586,-0.353422,...,-0.444565,-0.359013,-0.615910,0.007245,0.021878,0.238162,0.252142,0.068272,0.301823,7
957,0.138407,-0.448751,-0.334320,-0.477100,-0.440597,-0.229950,0.317996,0.434057,0.261300,-0.180775,...,-0.194911,-0.291315,-0.595891,-0.007889,0.206415,-0.019882,-0.210651,0.226619,0.770230,24
958,0.151957,-0.593120,-0.360247,-0.185420,-0.694859,-0.258243,-0.187632,0.114319,0.363445,-0.348429,...,0.021035,-0.220124,-0.451208,0.222772,-0.197227,-0.212523,0.363826,0.316671,0.352101,3
959,0.223559,-0.728716,0.137703,-0.223860,-0.123140,-0.209221,-0.093874,0.517305,-0.097130,-0.260613,...,-0.160411,-0.655506,-0.872614,0.312203,0.227483,-0.191332,-0.094120,0.325375,0.292900,15


In [19]:
report, best_est = svm_classifier(x_train_df, x_test_df, y_train.values.ravel(), y_test.values.ravel())
best_est

In [20]:
print(report)

              precision    recall  f1-score   support

         0.0     0.6015    0.6612    0.6299       121
         1.0     0.6204    0.5583    0.5877       120

    accuracy                         0.6100       241
   macro avg     0.6109    0.6097    0.6088       241
weighted avg     0.6109    0.6100    0.6089       241

