In [1]:
import pandas as pd 
import pickle
import warnings
warnings.filterwarnings('ignore')
import numpy as np

from sklearn.svm import SVC
from sklearn import svm
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn import preprocessing

In [2]:
labels = pd.read_csv("../../data/scene_labels.csv")

In [3]:
labels.head(5)

Unnamed: 0,SCENE,KEY,SPEAKER,SHOW,Sarcasm,Sarcasm_Type
0,1_10004,1_10004_u,SHELDON,BBT,0.0,NONE
1,1_10009,1_10009_u,PENNY,BBT,0.0,NONE
2,1_1001,1_1001_u,RAJ,BBT,0.0,NONE
3,1_1003,1_1003_u,HOWARD,BBT,1.0,PRO
4,1_10190,1_10190_u,SHELDON,BBT,0.0,NONE


#### Perform mean, median, max, min and sum pooling on audio feature data

In [4]:
def get_audio_mean_pool(audio) -> np.ndarray:
    return np.array([np.mean(feature_vector, axis=1) for feature_vector in audio])
    
def get_audio_median_pool(audio) -> np.ndarray:
        return np.array([np.median(feature_vector, axis=1) for feature_vector in audio])
    
def get_audio_max_pool(audio) -> np.ndarray:
        return np.array([np.max(feature_vector, axis=1) for feature_vector in audio])

def get_audio_min_pool(audio) -> np.ndarray:
        return np.array([np.min(feature_vector, axis=1) for feature_vector in audio])

def get_audio_sum_pool(audio) -> np.ndarray:
        return np.array([np.sum(feature_vector, axis=1) for feature_vector in audio])

In [5]:
def get_model_data(audio_features):
    model_data = pd.DataFrame(columns=['audio_feature','sarcasm','sarcasm_type', 'speaker'])
    for index, row in labels.iterrows():
        audio_key = row["SCENE"] + "_u.wav"
        model_data = model_data.append({'audio_feature': audio_features[audio_key],
                                    'sarcasm' : row["Sarcasm"],
                                    'sarcasm_type' : row["Sarcasm_Type"],
                                    'speaker' : row["SPEAKER"]},
                                  ignore_index=True)
    return model_data

In [6]:
def get_train_test_split(model_data, x_columns, y_column, stratify_column):
    X_train, X_test, Y_train, Y_test = train_test_split(
        model_data[x_columns],
        model_data[y_column],
        train_size=0.8, 
        test_size=0.2, 
        random_state=42, 
        shuffle=True,
        stratify=model_data[stratify_column])
    
    print("Train: ",X_train.shape, Y_train.shape,
      "Test: ",(X_test.shape, Y_test.shape))
    print(type(X_train))
    return X_train, X_test, Y_train, Y_test

def process_dataframes_pool(data):
    temp_concat = pd.concat([data, data.audio_feature.apply(pd.Series)], axis=1)
    temp_concat.drop(columns=['audio_feature'], inplace = True)
    return temp_concat.add_prefix('feat_')

def get_pooled_data(X_train, X_test, pool_type):
    if pool_type == "mean":
        X_train_mean = X_train.copy()
        X_train_mean["audio_feature"] = get_audio_mean_pool(X_train["audio_feature"]).tolist()
        X_test_mean = X_test.copy()
        X_test_mean["audio_feature"] = get_audio_mean_pool(X_test["audio_feature"]).tolist()
        return process_dataframes_pool(X_train_mean), process_dataframes_pool(X_test_mean)
    if pool_type == "median":
        X_train_median = X_train.copy()
        X_train_median["audio_feature"] = get_audio_median_pool(X_train["audio_feature"]).tolist()
        X_test_median = X_test.copy()
        X_test_median["audio_feature"] = get_audio_median_pool(X_test["audio_feature"]).tolist()
        return process_dataframes_pool(X_train_median), process_dataframes_pool(X_test_median)
    if pool_type == "max":
        X_train_max = X_train.copy()
        X_train_max["audio_feature"] = get_audio_max_pool(X_train["audio_feature"]).tolist()
        X_test_max = X_test.copy()
        X_test_max["audio_feature"] = get_audio_max_pool(X_test["audio_feature"]).tolist()
        return process_dataframes_pool(X_train_max), process_dataframes_pool(X_test_max)
    if pool_type == "min":
        X_train_min = X_train.copy()
        X_train_min["audio_feature"] = get_audio_min_pool(X_train["audio_feature"]).tolist()
        X_test_min = X_test.copy()
        X_test_min["audio_feature"] = get_audio_min_pool(X_test["audio_feature"]).tolist()
        return process_dataframes_pool(X_train_min), process_dataframes_pool(X_test_min)
    if pool_type == "sum":
        X_train_sum = X_train.copy()
        X_train_sum["audio_feature"] = get_audio_sum_pool(X_train["audio_feature"]).tolist()
        X_test_sum = X_test.copy()
        X_test_sum["audio_feature"] = get_audio_sum_pool(X_test["audio_feature"]).tolist()
        return process_dataframes_pool(X_train_sum), process_dataframes_pool(X_test_sum)
    

In [7]:
def svm_classifier(X_train, X_test, Y_train, Y_test):
    svm_clf = svm.SVC(random_state=0, kernel = "rbf", gamma = "scale", class_weight = "balanced")
    C = C = [0.0001, 0.0003, 0.0005, 0.001, 0.003, 0.005, 0.01, 0.03, 0.05, 0.1, 0.3, 0.5, 1, 3, 5, 10]
    gscv_clf = GridSearchCV(
        estimator=svm_clf, 
        param_grid=dict(C=C),
        n_jobs=-1, 
        cv = 10, 
        scoring = 'f1_micro', 
        refit = True)

    gscv_clf.fit(X_train, Y_train)
    Y_test_pred = gscv_clf.predict(X_test)
    report = classification_report(Y_test, Y_test_pred, digits=4)
    return report, gscv_clf.best_estimator_

### Librosa

In [8]:
with open('../../audio_features/feat_dict_librosa_lld.pickle', 'rb') as f:
    librosa_audio_features = pickle.load(f, encoding='latin1')
    
model_data = get_model_data(librosa_audio_features)

In [9]:
le = preprocessing.LabelEncoder()
model_data['speaker_encode'] = le.fit_transform(model_data['speaker'])
model_data.head(5)

Unnamed: 0,audio_feature,sarcasm,sarcasm_type,speaker,speaker_encode
0,"[[-386.6164855957031, -649.6673512776692, -633...",0.0,NONE,SHELDON,25
1,"[[-255.5221405029297, -484.69307309105284, -52...",0.0,NONE,PENNY,15
2,"[[-569.0548095703125, -381.4147456242488, -221...",0.0,NONE,RAJ,21
3,"[[-237.61074829101562, -211.002773845897, -382...",1.0,PRO,HOWARD,7
4,"[[-530.5701293945312, -374.83951552370763, -42...",0.0,NONE,SHELDON,25


### Speaker InDependent

In [10]:
X_train, X_test, Y_train, Y_test = get_train_test_split(model_data, ['audio_feature'], 'sarcasm', 'sarcasm_type')
X_train_mean, X_test_mean = get_pooled_data(X_train, X_test, "mean")
X_train_median, X_test_median = get_pooled_data(X_train, X_test, "median")
X_train_max, X_test_max = get_pooled_data(X_train, X_test, "max")
X_train_min, X_test_min = get_pooled_data(X_train, X_test, "min")
X_train_sum, X_test_sum = get_pooled_data(X_train, X_test, "sum")

Train:  (961, 1) (961,) Test:  ((241, 1), (241,))
<class 'pandas.core.frame.DataFrame'>


In [11]:
X_train_mean

Unnamed: 0,feat_0,feat_1,feat_2,feat_3,feat_4,feat_5,feat_6,feat_7,feat_8,feat_9,...,feat_680,feat_681,feat_682,feat_683,feat_684,feat_685,feat_686,feat_687,feat_688,feat_689
424,-475.214443,22.230255,-30.389343,9.356083,-25.193953,-4.742029,-18.190674,3.859700,-5.164784,-0.245572,...,1.125484e-06,-8.350791e-06,-2.448649e-05,-2.444950e-06,1.773051e-06,-5.933391e-06,5.577799e-06,2.768175e-06,-2.865543e-08,2457.913572
98,-383.700522,44.330383,-1.143299,31.240711,2.512837,-4.263235,-0.377138,-9.900404,-29.065777,-8.333339,...,-7.951677e-07,-3.734744e-06,-8.925929e-06,-4.285647e-06,-2.841774e-06,-1.403442e-06,-8.258487e-07,-3.735531e-06,-9.697663e-06,2281.819354
96,-408.116363,34.196676,-17.684775,10.452450,-8.696273,-4.746235,-4.298050,-0.913800,-11.862584,-2.972479,...,-9.458136e-04,-1.064999e-04,-1.579285e-06,-5.513255e-04,-9.186812e-05,-1.953621e-05,-5.345652e-05,-2.036902e-06,-1.151531e-06,2683.551936
648,-549.895570,52.015751,-17.792252,42.007593,-15.245116,6.626003,-6.957200,0.831902,-7.603333,-3.962928,...,-1.512497e-07,-1.164477e-07,-6.910331e-08,-3.674868e-08,-3.853535e-08,-4.781354e-08,-2.258065e-08,-2.754303e-08,-6.079017e-09,1542.658204
498,-484.033065,13.740776,-30.143910,11.784698,-32.461205,-12.111294,-4.164078,-10.356448,-19.124904,-11.152544,...,-1.259957e-05,-1.605140e-06,-3.267080e-05,-1.177216e-05,-4.930996e-05,-5.374148e-05,-5.310571e-05,-6.045012e-06,7.113467e-06,3213.760963
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
60,-503.934620,45.219435,-17.596261,18.482775,-10.065928,1.210883,-2.318956,-4.566677,-17.875577,4.333945,...,1.467485e-05,6.855907e-06,9.488974e-06,9.887191e-06,1.343416e-05,1.595856e-05,2.279976e-05,6.814564e-06,-1.620970e-06,2098.028743
504,-508.937384,66.037407,-13.812483,18.062318,-6.897021,1.604473,-8.603562,-6.208298,-15.863127,1.427535,...,8.296602e-07,-2.717651e-07,1.876983e-06,4.021389e-07,1.804765e-06,2.381020e-06,2.987660e-06,3.182019e-06,2.412659e-06,2108.397842
599,-502.236101,9.123266,-32.910932,-4.696678,-25.462556,-7.457479,-8.051113,-5.733243,-7.124014,-5.500740,...,-2.502448e-04,-1.317079e-04,-2.153736e-04,-1.838440e-04,-1.509853e-04,-6.235810e-05,-9.545638e-05,-5.277569e-05,-1.071856e-05,2467.338950
666,-536.851896,40.682628,-25.600732,8.490287,-6.783008,-20.551486,-14.880793,-1.451396,-15.454609,-8.472469,...,-5.961599e-07,-2.506771e-07,-5.134845e-07,-6.879201e-07,-2.686137e-07,-3.054581e-07,-4.152518e-07,-6.974888e-08,-1.511985e-07,2421.520774


In [12]:
mean_report, mean_best_est = svm_classifier(X_train_mean, X_test_mean, Y_train, Y_test)
median_report, median_best_est = svm_classifier(X_train_median, X_test_median, Y_train, Y_test)
max_report, max_best_est = svm_classifier(X_train_max, X_test_max, Y_train, Y_test)
min_report, min_best_est = svm_classifier(X_train_min, X_test_min, Y_train, Y_test)
sum_report, sum_best_est = svm_classifier(X_train_sum, X_test_sum, Y_train, Y_test)

In [13]:
print("********************************mean report********************************")
print(mean_report)
print(mean_best_est)
print("\n")
print("********************************median report********************************")
print(median_report)
print(median_best_est)
print("\n")
print("********************************max report********************************")
print(max_report)
print(max_best_est)
print("\n")
print("********************************min report********************************")
print(min_report)
print(min_best_est)
print("\n")
print("********************************sum report********************************")
print(sum_report)
print(sum_best_est)

********************************mean report********************************
              precision    recall  f1-score   support

         0.0       0.65      0.62      0.63       120
         1.0       0.64      0.67      0.65       121

    accuracy                           0.64       241
   macro avg       0.64      0.64      0.64       241
weighted avg       0.64      0.64      0.64       241

SVC(C=3, class_weight='balanced', random_state=0)


********************************median report********************************
              precision    recall  f1-score   support

         0.0       0.66      0.61      0.63       120
         1.0       0.64      0.69      0.67       121

    accuracy                           0.65       241
   macro avg       0.65      0.65      0.65       241
weighted avg       0.65      0.65      0.65       241

SVC(C=5, class_weight='balanced', random_state=0)


********************************max report********************************
             

### Speaker Dependent

In [14]:
X_train, X_test, Y_train, Y_test = get_train_test_split(model_data, ['audio_feature', 'speaker_encode'], 'sarcasm', 'sarcasm')
X_train_mean, X_test_mean = get_pooled_data(X_train, X_test, "mean")
X_train_median, X_test_median = get_pooled_data(X_train, X_test, "median")
X_train_max, X_test_max = get_pooled_data(X_train, X_test, "max")
X_train_min, X_test_min = get_pooled_data(X_train, X_test, "min")
X_train_sum, X_test_sum = get_pooled_data(X_train, X_test, "sum")

Train:  (961, 2) (961,) Test:  ((241, 2), (241,))


In [15]:
X_train_mean

Unnamed: 0,feat_speaker_encode,feat_0,feat_1,feat_2,feat_3,feat_4,feat_5,feat_6,feat_7,feat_8,...,feat_680,feat_681,feat_682,feat_683,feat_684,feat_685,feat_686,feat_687,feat_688,feat_689
972,13,-417.934630,25.649501,-23.124400,7.151210,-33.514285,1.015226,-10.072275,-1.772847,-22.075682,...,3.125001e-05,3.363074e-04,-1.859900e-04,5.771899e-04,-3.505927e-05,2.355367e-04,-6.211684e-05,-2.271472e-05,-3.628723e-05,2224.894796
226,7,-542.199661,63.753865,-17.823581,31.076114,-12.121194,-10.728873,0.987807,2.401540,-30.571569,...,1.916038e-06,1.976086e-06,6.936923e-07,5.992640e-07,5.200017e-07,5.180726e-07,2.188896e-06,6.610668e-06,1.807837e-05,2212.197218
971,2,-554.604940,91.562807,-18.606515,3.485788,-12.768727,-8.257111,-5.398056,-11.684904,-4.700527,...,9.532620e-09,1.629619e-08,1.944741e-08,2.692590e-08,1.544791e-08,3.824641e-08,3.338188e-08,5.296624e-09,2.503845e-09,1580.497854
121,25,-385.906356,55.885129,-18.641027,24.667320,-5.517603,-13.886243,-6.189629,0.752990,-13.800856,...,-2.295678e-06,-7.863721e-06,-1.177124e-05,-1.388764e-05,-1.749519e-06,-7.899977e-06,-2.108979e-06,-2.006001e-06,-3.428370e-06,2039.398631
989,2,-596.360484,88.436462,-19.024528,24.429874,-9.664282,-14.308451,-4.053717,-13.884195,-16.080202,...,1.860968e-07,-5.192429e-08,-1.236025e-08,4.932657e-09,-3.912226e-09,-1.020064e-07,-5.402432e-08,-1.545107e-08,-1.335328e-08,1815.053709
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
725,2,-442.224633,59.950655,-19.250685,15.311796,-15.094549,-5.456449,2.997406,0.561126,-3.082117,...,-1.976777e-05,-2.113976e-05,7.821691e-06,-6.673777e-06,-3.541251e-05,-1.159624e-06,-1.837707e-06,-2.205160e-06,-4.564579e-06,1897.248526
1132,6,-487.702368,63.768049,-14.794437,-2.686840,9.832649,-6.248889,-13.965803,-8.218028,-10.715918,...,-6.406918e-05,-1.725976e-04,-7.241511e-05,-6.703549e-05,-7.335419e-05,-5.783737e-05,-3.032331e-05,-4.453566e-05,-1.577433e-05,2178.411268
196,25,-465.177863,56.005337,-37.486298,9.183127,-16.623523,-0.363870,0.132237,-15.156362,-23.317768,...,7.376517e-08,-6.329895e-07,-1.188611e-06,-3.852324e-06,2.982007e-07,-5.830455e-07,5.253016e-08,5.692598e-08,-5.445352e-08,1935.028193
307,1,-553.055326,20.481338,-24.308689,-5.458126,-16.244791,-8.686555,-2.590983,-6.526678,-12.343914,...,-9.871221e-09,-8.811425e-09,-7.197934e-09,-8.138662e-09,-7.735329e-09,-8.187274e-09,-6.085815e-09,-1.330235e-09,-6.351616e-10,3260.421903


In [16]:
mean_report, mean_best_est = svm_classifier(X_train_mean, X_test_mean, Y_train, Y_test)
median_report, median_best_est = svm_classifier(X_train_median, X_test_median, Y_train, Y_test)
max_report, max_best_est = svm_classifier(X_train_max, X_test_max, Y_train, Y_test)
min_report, min_best_est = svm_classifier(X_train_min, X_test_min, Y_train, Y_test)
sum_report, sum_best_est = svm_classifier(X_train_sum, X_test_sum, Y_train, Y_test)

In [17]:
print("********************************mean report********************************")
print(mean_report)
print(mean_best_est)
print("\n")
print("********************************median report********************************")
print(median_report)
print(median_best_est)
print("\n")
print("********************************max report********************************")
print(max_report)
print(max_best_est)
print("\n")
print("********************************min report********************************")
print(min_report)
print(min_best_est)
print("\n")
print("********************************sum report********************************")
print(sum_report)
print(sum_best_est)

********************************mean report********************************
              precision    recall  f1-score   support

         0.0       0.57      0.55      0.56       121
         1.0       0.56      0.59      0.58       120

    accuracy                           0.57       241
   macro avg       0.57      0.57      0.57       241
weighted avg       0.57      0.57      0.57       241

SVC(C=3, class_weight='balanced', random_state=0)


********************************median report********************************
              precision    recall  f1-score   support

         0.0       0.56      0.54      0.55       121
         1.0       0.55      0.57      0.56       120

    accuracy                           0.56       241
   macro avg       0.56      0.56      0.56       241
weighted avg       0.56      0.56      0.56       241

SVC(C=3, class_weight='balanced', random_state=0)


********************************max report********************************
             