In [1]:
import pandas as pd 
import pickle
import warnings
warnings.filterwarnings('ignore')
import numpy as np

from sklearn.svm import SVC
from sklearn import svm
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV, cross_val_score

In [2]:
labels = pd.read_csv("../../data/scene_labels.csv")

In [3]:
labels.head(5)

Unnamed: 0,SCENE,KEY,SPEAKER,SHOW,Sarcasm,Sarcasm_Type
0,1_10004,1_10004_u,SHELDON,BBT,0.0,NONE
1,1_10009,1_10009_u,PENNY,BBT,0.0,NONE
2,1_1001,1_1001_u,RAJ,BBT,0.0,NONE
3,1_1003,1_1003_u,HOWARD,BBT,1.0,PRO
4,1_10190,1_10190_u,SHELDON,BBT,0.0,NONE


#### Perform mean, median, max, min and sum pooling on audio feature data

In [4]:
def get_audio_mean_pool(audio) -> np.ndarray:
        return np.array([np.mean(feature_vector, axis=1) for feature_vector in audio])
    
def get_audio_median_pool(audio) -> np.ndarray:
        return np.array([np.median(feature_vector, axis=1) for feature_vector in audio])
    
def get_audio_max_pool(audio) -> np.ndarray:
        return np.array([np.max(feature_vector, axis=1) for feature_vector in audio])

def get_audio_min_pool(audio) -> np.ndarray:
        return np.array([np.min(feature_vector, axis=1) for feature_vector in audio])

def get_audio_sum_pool(audio) -> np.ndarray:
        return np.array([np.sum(feature_vector, axis=1) for feature_vector in audio])

In [5]:
def get_model_data(audio_features):
    model_data = pd.DataFrame(columns=['audio_feature','sarcasm','sarcasm_type'])
    for index, row in labels.iterrows():
        audio_key = row["SCENE"] + "_u.wav"
        model_data = model_data.append({'audio_feature': audio_features[audio_key],
                                    'sarcasm' : row["Sarcasm"],
                                    'sarcasm_type' : row["Sarcasm_Type"]},
                                  ignore_index=True)
    return model_data

In [6]:
def get_train_test_split(model_data, x_column, y_column, stratify_column):
    X_train, X_test, Y_train, Y_test = train_test_split(
        model_data[x_column],
        model_data[y_column],
        train_size=0.8, 
        test_size=0.2, 
        random_state=0, 
        shuffle=True,
        stratify=model_data[stratify_column])
    
    print("Train: ",X_train.shape, Y_train.shape,
      "Test: ",(X_test.shape, Y_test.shape))
    return X_train, X_test, Y_train, Y_test

def process_dataframes_pool(data):
    temp_concat = pd.concat([data, data.audio_feature.apply(pd.Series)], axis=1)
    temp_concat.drop(columns=['audio_feature'], inplace = True)
    return temp_concat.add_prefix('feat_')

def get_pooled_data(X_train, X_test, pool_type):
    if pool_type == "mean":
        X_train_mean = X_train.copy()
        X_train_mean["audio_feature"] = get_audio_mean_pool(X_train["audio_feature"]).tolist()
        X_test_mean = X_test.copy()
        X_test_mean["audio_feature"] = get_audio_mean_pool(X_test["audio_feature"]).tolist()
        return process_dataframes_pool(X_train_mean), process_dataframes_pool(X_test_mean)
    if pool_type == "median":
        X_train_median = X_train.copy()
        X_train_median["audio_feature"] = get_audio_median_pool(X_train["audio_feature"]).tolist()
        X_test_median = X_test.copy()
        X_test_median["audio_feature"] = get_audio_median_pool(X_test["audio_feature"]).tolist()
        return process_dataframes_pool(X_train_median), process_dataframes_pool(X_test_median)
    if pool_type == "max":
        X_train_max = X_train.copy()
        X_train_max["audio_feature"] = get_audio_max_pool(X_train["audio_feature"]).tolist()
        X_test_max = X_test.copy()
        X_test_max["audio_feature"] = get_audio_max_pool(X_test["audio_feature"]).tolist()
        return process_dataframes_pool(X_train_max), process_dataframes_pool(X_test_max)
    if pool_type == "min":
        X_train_min = X_train.copy()
        X_train_min["audio_feature"] = get_audio_min_pool(X_train["audio_feature"]).tolist()
        X_test_min = X_test.copy()
        X_test_min["audio_feature"] = get_audio_min_pool(X_test["audio_feature"]).tolist()
        return process_dataframes_pool(X_train_min), process_dataframes_pool(X_test_min)
    if pool_type == "sum":
        X_train_sum = X_train.copy()
        X_train_sum["audio_feature"] = get_audio_sum_pool(X_train["audio_feature"]).tolist()
        X_test_sum = X_test.copy()
        X_test_sum["audio_feature"] = get_audio_sum_pool(X_test["audio_feature"]).tolist()
        return process_dataframes_pool(X_train_sum), process_dataframes_pool(X_test_sum)

In [7]:
def svm_classifier(X_train, X_test, Y_train, Y_test):
    svm_clf = svm.SVC(random_state=0, kernel = "rbf", gamma = "scale", class_weight = "balanced")
    C = [0.001, 0.01, 0.1, 1, 10]
    gscv_clf = GridSearchCV(
        estimator=svm_clf, 
        param_grid=dict(C=C),
        n_jobs=-1, 
        cv = 10, 
        scoring = 'f1_micro', 
        refit = True)

    gscv_clf.fit(X_train, Y_train)
    Y_test_pred = gscv_clf.predict(X_test)
    report = classification_report(Y_test, Y_test_pred)
    return report, gscv_clf.best_estimator_

### OpenSmile

In [8]:
with open('../../audio_features/feat_dict_opensmile_lld.pickle', 'rb') as f:
    opensmile_audio_features = pickle.load(f, encoding='latin1')
    
model_data = get_model_data(opensmile_audio_features)

In [9]:
X_train, X_test, Y_train, Y_test = get_train_test_split(model_data, ['audio_feature'], 'sarcasm', 'sarcasm_type')
X_train_mean, X_test_mean = get_pooled_data(X_train, X_test, "mean")
X_train_median, X_test_median = get_pooled_data(X_train, X_test, "median")
X_train_max, X_test_max = get_pooled_data(X_train, X_test, "max")
X_train_min, X_test_min = get_pooled_data(X_train, X_test, "min")
X_train_sum, X_test_sum = get_pooled_data(X_train, X_test, "sum")

Train:  (961, 1) (961,) Test:  ((241, 1), (241,))


In [10]:
X_train_mean

Unnamed: 0,feat_0,feat_1,feat_2,feat_3,feat_4,feat_5,feat_6,feat_7,feat_8,feat_9,...,feat_15,feat_16,feat_17,feat_18,feat_19,feat_20,feat_21,feat_22,feat_23,feat_24
424,1.491213,-2.384312,11.865175,0.034428,-0.008572,0.472744,-1.601864,-9.782475,-13.237856,-19.647501,...,3.225135,868.375854,1177.199463,-23.723825,1848.764526,1023.078369,-30.769655,2843.863525,992.648376,-31.036385
98,2.776465,1.257948,7.120889,0.032892,-0.001380,1.320131,5.244477,0.839845,18.091770,-5.875186,...,4.692653,835.097534,1133.149170,-70.327477,1776.199707,1089.311035,-69.854668,2787.739502,1004.648682,-69.044617
96,1.842652,4.424860,6.345959,0.027093,-0.007225,0.575260,3.494483,-8.549600,7.381177,-6.600612,...,-1.885522,830.398865,1076.222168,-47.320107,1811.624146,1035.461548,-45.879559,2777.460205,1026.260620,-46.149082
648,0.855282,-7.657495,19.482576,0.016400,-0.010407,0.259895,11.910596,5.150325,8.070842,-15.966461,...,7.475876,727.954712,1318.186523,-101.488045,1747.598022,1038.176025,-104.434929,2709.564453,924.384277,-102.959740
498,1.539908,-1.259865,12.114417,0.057048,-0.002199,0.531530,2.442333,-14.394490,-1.858237,-33.073872,...,6.306717,839.031128,1236.971924,-51.545940,1806.361450,1022.592957,-56.403263,2808.417725,905.699219,-57.031853
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
60,1.170683,-0.018558,9.858979,0.029423,-0.005392,0.348891,6.774230,-3.156608,9.328506,-10.381284,...,4.061554,762.469421,1208.681519,-68.211494,1734.914429,1159.418823,-69.616028,2715.551514,901.365295,-69.640709
504,0.841725,0.602596,10.666152,0.027452,-0.004144,0.215543,11.593579,-2.820777,9.249925,-9.564174,...,6.431040,714.353516,1239.544434,-56.036236,1675.160400,1075.469116,-53.917629,2678.739014,932.215759,-60.244041
599,1.279698,4.213064,12.500858,0.046630,0.002767,0.436650,-5.776570,-21.415548,-14.877404,-16.120607,...,5.134602,890.250854,1163.025024,-49.319115,1839.877075,1097.528076,-49.490597,2823.563232,951.373413,-50.758400
666,0.747540,-3.741484,14.360613,0.040962,-0.009081,0.188456,9.221932,-8.742914,-6.652271,-17.292669,...,3.996136,860.950500,1219.432861,-66.943810,1862.543213,993.043945,-70.166367,2841.324219,991.494629,-72.706589


In [11]:
mean_report, mean_best_est = svm_classifier(X_train_mean, X_test_mean, Y_train, Y_test)
median_report, median_best_est = svm_classifier(X_train_median, X_test_median, Y_train, Y_test)
max_report, max_best_est = svm_classifier(X_train_max, X_test_max, Y_train, Y_test)
min_report, min_best_est = svm_classifier(X_train_min, X_test_min, Y_train, Y_test)
sum_report, sum_best_est = svm_classifier(X_train_sum, X_test_sum, Y_train, Y_test)

In [12]:
print("********************************mean report********************************")
print(mean_report)
print(mean_best_est)
print("\n")
print("********************************median report********************************")
print(median_report)
print(median_best_est)
print("\n")
print("********************************max report********************************")
print(max_report)
print(max_best_est)
print("\n")
print("********************************min report********************************")
print(min_report)
print(min_best_est)
print("\n")
print("********************************sum report********************************")
print(sum_report)
print(sum_best_est)

********************************mean report********************************
              precision    recall  f1-score   support

         0.0       0.59      0.57      0.58       120
         1.0       0.59      0.61      0.60       121

    accuracy                           0.59       241
   macro avg       0.59      0.59      0.59       241
weighted avg       0.59      0.59      0.59       241

SVC(C=10, class_weight='balanced', random_state=0)


********************************median report********************************
              precision    recall  f1-score   support

         0.0       0.65      0.39      0.49       120
         1.0       0.57      0.79      0.66       121

    accuracy                           0.59       241
   macro avg       0.61      0.59      0.58       241
weighted avg       0.61      0.59      0.58       241

SVC(C=10, class_weight='balanced', random_state=0)


********************************max report********************************
           