In [1]:
import pandas as pd 
import pickle
import warnings
warnings.filterwarnings('ignore')
import numpy as np

from sklearn.svm import SVC
from sklearn import svm
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn import preprocessing

In [2]:
labels = pd.read_csv("../../../data/scene_labels.csv")

In [3]:
labels.head(5)

Unnamed: 0,SCENE,KEY,SPEAKER,SHOW,Sarcasm,Sarcasm_Type
0,1_10004,1_10004_u,SHELDON,BBT,0.0,NONE
1,1_10009,1_10009_u,PENNY,BBT,0.0,NONE
2,1_1001,1_1001_u,RAJ,BBT,0.0,NONE
3,1_1003,1_1003_u,HOWARD,BBT,1.0,PRO
4,1_10190,1_10190_u,SHELDON,BBT,0.0,NONE


#### Perform mean, median, max, min and sum pooling on audio feature data

In [4]:
def get_model_data(audio_features, context_audio_features):
    model_data = pd.DataFrame(columns=['audio_feature','sarcasm','sarcasm_type', 'speaker'])
    for index, row in labels.iterrows():
        audio_key = row["SCENE"] + "_u.wav"
        context_audio_key = row["SCENE"] + "_c.wav"
        model_data = model_data.append({'context_audio_feature': context_audio_features[context_audio_key],
                                    'audio_feature': audio_features[audio_key],
                                    'sarcasm' : row["Sarcasm"],
                                    'sarcasm_type' : row["Sarcasm_Type"],
                                    'speaker' : row["SPEAKER"]},
                                  ignore_index=True)
    return model_data

In [5]:
def get_train_test_split(model_data, x_columns, y_column, stratify_column):
    X_train, X_test, Y_train, Y_test = train_test_split(
        model_data[x_columns],
        model_data[y_column],
        train_size=0.8, 
        test_size=0.2, 
        random_state=42, 
        shuffle=True,
        stratify=model_data[stratify_column])
    
    print("Train: ",X_train.shape, Y_train.shape,
      "Test: ",(X_test.shape, Y_test.shape))
    print(type(X_train))
    return X_train, X_test, Y_train, Y_test

def process_dataframes_pool(data):
    temp_concat_utt = pd.concat([data, data.audio_feature.apply(pd.Series)], axis=1)
    temp_concat_utt.drop(columns=['audio_feature', 'context_audio_feature'], inplace = True)
    temp_concat_ctxt = pd.concat([data, data.context_audio_feature.apply(pd.Series)], axis=1)
    temp_concat_ctxt.drop(columns=['audio_feature','context_audio_feature'], inplace = True)
    final = pd.merge(temp_concat_ctxt, temp_concat_utt, left_index=True, right_index=True)
    return final.add_prefix('feat_')
    

In [6]:
def svm_classifier(X_train, X_test, Y_train, Y_test):
    svm_clf = svm.SVC(random_state=0, kernel = "rbf", gamma = "scale", class_weight = "balanced")
    C = C = [0.0001, 0.0003, 0.0005, 0.001, 0.003, 0.005, 0.01, 0.03, 0.05, 0.1, 0.3, 0.5, 1, 3, 5, 10]
    gscv_clf = GridSearchCV(
        estimator=svm_clf, 
        param_grid=dict(C=C),
        n_jobs=-1, 
        cv = 10, 
        scoring = 'f1_micro', 
        refit = True)

    gscv_clf.fit(X_train, Y_train)
    Y_test_pred = gscv_clf.predict(X_test)
    report = classification_report(Y_test, Y_test_pred, digits=4)
    return report, gscv_clf.best_estimator_

### Librosa

In [7]:
with open('../../../audio_features/trill_features.pickle', 'rb') as f:
    librosa_audio_features = pickle.load(f, encoding='latin1')
    
with open('../../../audio_features/trill_context_features.pickle', 'rb') as f:
    librosa_context_audio_features = pickle.load(f, encoding='latin1')
    
model_data = get_model_data(librosa_audio_features, librosa_context_audio_features)

In [8]:
le = preprocessing.LabelEncoder()
model_data['speaker_encode'] = le.fit_transform(model_data['speaker'])
model_data.head(5)

Unnamed: 0,audio_feature,sarcasm,sarcasm_type,speaker,context_audio_feature,speaker_encode
0,"[-1.406311, -0.46280488, -1.1319538, -1.194219...",0.0,NONE,SHELDON,"[-1.3758289, -0.44799396, -1.1103612, -1.20222...",25
1,"[-1.4183334, -0.36521277, -1.1331908, -1.17071...",0.0,NONE,PENNY,"[-1.4331173, -0.39332274, -1.1685593, -1.18357...",15
2,"[-1.3609562, -0.27142158, -0.63655925, -1.0188...",0.0,NONE,RAJ,"[-1.4360552, -0.3508314, -0.7310817, -1.081333...",21
3,"[-1.4286865, -0.31481665, -0.67340577, -0.9725...",1.0,PRO,HOWARD,"[-1.3828517, -0.2948188, -0.68553615, -1.03285...",7
4,"[-1.3821282, -0.4007631, -1.1102539, -1.178829...",0.0,NONE,SHELDON,"[-1.3755091, -0.40953887, -1.064101, -1.183665...",25


### Speaker InDependent

In [9]:
X_train, X_test, Y_train, Y_test = get_train_test_split(model_data, ['context_audio_feature', 'audio_feature'], 'sarcasm', 'sarcasm_type')
X_train_mean, X_test_mean = process_dataframes_pool(X_train), process_dataframes_pool(X_test)

Train:  (961, 2) (961,) Test:  ((241, 2), (241,))
<class 'pandas.core.frame.DataFrame'>


In [10]:
X_train_mean

Unnamed: 0,feat_0_x,feat_1_x,feat_2_x,feat_3_x,feat_4_x,feat_5_x,feat_6_x,feat_7_x,feat_8_x,feat_9_x,...,feat_1014_y,feat_1015_y,feat_1016_y,feat_1017_y,feat_1018_y,feat_1019_y,feat_1020_y,feat_1021_y,feat_1022_y,feat_1023_y
424,-1.460673,-0.388124,-1.198663,-1.172316,2.503312,2.108518,1.060398,0.578683,1.630594,0.908577,...,-1.162612,-0.139947,-0.305690,-0.247433,0.739481,-0.989511,-2.269561,3.742936,0.263359,-2.695333
190,-1.425038,-0.402647,-1.134631,-1.175803,2.469460,2.352583,1.019048,0.553464,1.669619,0.851985,...,-1.166308,-0.075299,-0.292138,-0.233188,0.755889,-1.025950,-2.254203,3.758036,0.267499,-2.720352
1080,-1.427363,-0.368090,-1.115730,-1.161536,2.483415,2.243965,1.025378,0.573571,1.674619,0.853325,...,-1.165422,-0.067976,-0.282917,-0.231348,0.737755,-0.991106,-2.226220,3.681908,0.262515,-2.692864
973,-1.447276,-0.404969,-1.155175,-1.178029,2.490339,2.111906,1.045165,0.564328,1.657525,0.881496,...,-1.182927,-0.139150,-0.314663,-0.204058,0.741997,-1.019110,-2.296838,3.781696,0.251019,-2.740161
410,-1.473973,-0.371872,-1.209209,-1.169344,2.506720,2.008169,1.081889,0.601725,1.604805,0.936224,...,-1.122036,-0.441546,-0.385793,-0.146994,0.739058,-1.033458,-2.524935,3.921366,0.253506,-2.771898
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
738,-1.529125,-0.310616,-1.230099,-1.165104,2.557058,1.905361,1.111075,0.628417,1.588291,0.980805,...,-1.210930,-0.447864,-0.393628,-0.083342,0.711424,-0.996131,-2.516047,3.951467,0.213092,-2.845674
647,-1.480675,-0.329713,-1.213819,-1.167793,2.510993,1.963868,1.093477,0.599524,1.593545,0.946480,...,-1.203671,-0.350470,-0.369357,-0.122106,0.717571,-1.002789,-2.454391,3.894517,0.211688,-2.814557
711,-1.452896,-0.309939,-1.208318,-1.158181,2.498697,2.065699,1.039881,0.595853,1.598895,0.901927,...,-1.184283,-0.248647,-0.349260,-0.195685,0.707577,-0.954186,-2.337197,3.788609,0.229639,-2.754420
48,-1.450521,-0.391397,-1.191595,-1.180369,2.498699,2.088746,1.057705,0.565362,1.642491,0.907640,...,-1.158164,-0.163340,-0.320960,-0.211799,0.728438,-0.980004,-2.272155,3.734425,0.251807,-2.738416


In [11]:
mean_report, mean_best_est = svm_classifier(X_train_mean, X_test_mean, Y_train, Y_test)

In [12]:
print("********************************mean report********************************")
print(mean_report)
print(mean_best_est)

********************************mean report********************************
              precision    recall  f1-score   support

         0.0     0.5000    0.7500    0.6000       120
         1.0     0.5082    0.2562    0.3407       121

    accuracy                         0.5021       241
   macro avg     0.5041    0.5031    0.4703       241
weighted avg     0.5041    0.5021    0.4698       241

SVC(C=5, class_weight='balanced', random_state=0)


### Speaker Dependent

In [13]:
X_train, X_test, Y_train, Y_test = get_train_test_split(model_data, ['context_audio_feature', 'audio_feature', 'speaker_encode'], 'sarcasm', 'sarcasm')
X_train_mean, X_test_mean = process_dataframes_pool(X_train), process_dataframes_pool(X_test)

Train:  (961, 3) (961,) Test:  ((241, 3), (241,))
<class 'pandas.core.frame.DataFrame'>


In [14]:
X_train_mean

Unnamed: 0,feat_speaker_encode_x,feat_0_x,feat_1_x,feat_2_x,feat_3_x,feat_4_x,feat_5_x,feat_6_x,feat_7_x,feat_8_x,...,feat_1014_y,feat_1015_y,feat_1016_y,feat_1017_y,feat_1018_y,feat_1019_y,feat_1020_y,feat_1021_y,feat_1022_y,feat_1023_y
324,25,-1.435899,-0.356071,-1.173630,-1.172852,2.495012,2.111707,1.045857,0.566400,1.638244,...,-1.150176,-0.332603,-0.376076,-0.170648,0.729075,-0.998647,-2.412177,3.862189,0.233891,-2.757186
642,1,-1.495978,-0.357585,-1.255854,-1.176874,2.519893,1.795417,1.080924,0.581245,1.591160,...,-1.146680,-0.230358,-0.330342,-0.201567,0.749877,-1.018977,-2.371366,3.831006,0.251335,-2.729712
799,16,-1.499909,-0.317617,-1.255241,-1.171323,2.522731,1.800133,1.102057,0.606150,1.567258,...,-1.194996,-0.489715,-0.399532,-0.070543,0.697940,-0.987779,-2.536886,3.943640,0.201161,-2.827163
581,0,-1.489429,-0.358854,-1.246603,-1.182041,2.521551,1.811041,1.085306,0.572120,1.596421,...,-1.158856,-0.351114,-0.373615,-0.150376,0.713772,-0.962083,-2.401637,3.832661,0.229754,-2.785010
718,2,-1.469169,-0.420366,-1.208700,-1.188269,2.512614,1.951101,1.064685,0.546292,1.626176,...,-1.180912,-0.248936,-0.353331,-0.169784,0.734094,-1.005434,-2.379762,3.852985,0.242589,-2.783767
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
472,7,-1.455992,-0.363240,-1.184412,-1.170477,2.497103,2.143296,1.058765,0.583927,1.622526,...,-1.183382,-0.286069,-0.352684,-0.149890,0.736478,-1.020625,-2.432813,3.885089,0.225404,-2.776237
939,24,-1.513859,-0.311104,-1.241584,-1.170220,2.526661,1.782769,1.110188,0.604483,1.568482,...,-1.199218,-0.374180,-0.385631,-0.093995,0.702022,-0.989769,-2.472220,3.904171,0.195215,-2.811480
1201,3,-1.498174,-0.301732,-1.221516,-1.151396,2.511181,1.897816,1.101122,0.644424,1.586013,...,-1.229984,-0.198861,-0.351575,-0.130121,0.669315,-0.979705,-2.326248,3.787921,0.183100,-2.771972
618,15,-1.463509,-0.370534,-1.212894,-1.170065,2.512630,1.992508,1.055953,0.580780,1.624395,...,-1.140780,-0.248772,-0.353479,-0.196242,0.752512,-1.017570,-2.370615,3.823453,0.258205,-2.737882


In [15]:
mean_report, mean_best_est = svm_classifier(X_train_mean, X_test_mean, Y_train, Y_test)

In [16]:
print("********************************mean report********************************")
print(mean_report)
print(mean_best_est)
print("\n")

********************************mean report********************************
              precision    recall  f1-score   support

         0.0     0.6260    0.6364    0.6311       121
         1.0     0.6271    0.6167    0.6218       120

    accuracy                         0.6266       241
   macro avg     0.6266    0.6265    0.6265       241
weighted avg     0.6266    0.6266    0.6265       241

SVC(C=0.3, class_weight='balanced', random_state=0)


