In [1]:
import pandas as pd 
import pickle
import warnings
warnings.filterwarnings('ignore')
import numpy as np

from sklearn.svm import SVC
from sklearn import svm
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn import preprocessing

In [2]:
labels = pd.read_csv("../../../data/scene_labels.csv")

In [3]:
labels.head(5)

Unnamed: 0,SCENE,KEY,SPEAKER,SHOW,Sarcasm,Sarcasm_Type
0,1_10004,1_10004_u,SHELDON,BBT,0.0,NONE
1,1_10009,1_10009_u,PENNY,BBT,0.0,NONE
2,1_1001,1_1001_u,RAJ,BBT,0.0,NONE
3,1_1003,1_1003_u,HOWARD,BBT,1.0,PRO
4,1_10190,1_10190_u,SHELDON,BBT,0.0,NONE


#### Perform mean, median, max, min and sum pooling on audio feature data

In [4]:
def get_audio_mean_pool(audio) -> np.ndarray:
    return np.array([np.mean(feature_vector, axis=1) for feature_vector in audio])
    
def get_audio_median_pool(audio) -> np.ndarray:
        return np.array([np.median(feature_vector, axis=1) for feature_vector in audio])
    
def get_audio_max_pool(audio) -> np.ndarray:
        return np.array([np.max(feature_vector, axis=1) for feature_vector in audio])

def get_audio_min_pool(audio) -> np.ndarray:
        return np.array([np.min(feature_vector, axis=1) for feature_vector in audio])

def get_audio_sum_pool(audio) -> np.ndarray:
        return np.array([np.sum(feature_vector, axis=1) for feature_vector in audio])

In [5]:
def get_model_data(audio_features):
    model_data = pd.DataFrame(columns=['audio_feature','sarcasm','sarcasm_type', 'speaker'])
    for index, row in labels.iterrows():
        audio_key = row["SCENE"] + "_u.wav"
        model_data = model_data.append({'audio_feature': audio_features[audio_key],
                                    'sarcasm' : row["Sarcasm"],
                                    'sarcasm_type' : row["Sarcasm_Type"],
                                    'speaker' : row["SPEAKER"]},
                                  ignore_index=True)
    return model_data

In [6]:
def get_train_test_split(model_data, x_columns, y_column, stratify_column):
    X_train, X_test, Y_train, Y_test = train_test_split(
        model_data[x_columns],
        model_data[y_column],
        train_size=0.8, 
        test_size=0.2, 
        random_state=42, 
        shuffle=True,
        stratify=model_data[stratify_column])
    
    print("Train: ",X_train.shape, Y_train.shape,
      "Test: ",(X_test.shape, Y_test.shape))
    print(type(X_train))
    return X_train, X_test, Y_train, Y_test

def process_dataframes_pool(data):
    temp_concat = pd.concat([data, data.audio_feature.apply(pd.Series)], axis=1)
    temp_concat.drop(columns=['audio_feature'], inplace = True)
    return temp_concat.add_prefix('feat_')
    

In [7]:
def svm_classifier(X_train, X_test, Y_train, Y_test):
    svm_clf = svm.SVC(random_state=0, kernel = "rbf", gamma = "scale", class_weight = "balanced")
    C = C = [0.0001, 0.0003, 0.0005, 0.001, 0.003, 0.005, 0.01, 0.03, 0.05, 0.1, 0.3, 0.5, 1, 3, 5, 10]
    gscv_clf = GridSearchCV(
        estimator=svm_clf, 
        param_grid=dict(C=C),
        n_jobs=-1, 
        cv = 10, 
        scoring = 'f1_micro', 
        refit = True)

    gscv_clf.fit(X_train, Y_train)
    Y_test_pred = gscv_clf.predict(X_test)
    report = classification_report(Y_test, Y_test_pred, digits=4)
    return report, gscv_clf.best_estimator_

### Librosa

In [9]:
with open('../../../audio_features/trill_features.pickle', 'rb') as f:
    librosa_audio_features = pickle.load(f, encoding='latin1')
    
model_data = get_model_data(librosa_audio_features)

In [10]:
le = preprocessing.LabelEncoder()
model_data['speaker_encode'] = le.fit_transform(model_data['speaker'])
model_data.head(5)

Unnamed: 0,audio_feature,sarcasm,sarcasm_type,speaker,speaker_encode
0,"[-1.406311, -0.46280488, -1.1319538, -1.194219...",0.0,NONE,SHELDON,25
1,"[-1.4183334, -0.36521277, -1.1331908, -1.17071...",0.0,NONE,PENNY,15
2,"[-1.3609562, -0.27142158, -0.63655925, -1.0188...",0.0,NONE,RAJ,21
3,"[-1.4286865, -0.31481665, -0.67340577, -0.9725...",1.0,PRO,HOWARD,7
4,"[-1.3821282, -0.4007631, -1.1102539, -1.178829...",0.0,NONE,SHELDON,25


### Speaker InDependent

In [11]:
X_train, X_test, Y_train, Y_test = get_train_test_split(model_data, ['audio_feature'], 'sarcasm', 'sarcasm_type')
X_train_mean, X_test_mean = process_dataframes_pool(X_train), process_dataframes_pool(X_test)

Train:  (961, 1) (961,) Test:  ((241, 1), (241,))
<class 'pandas.core.frame.DataFrame'>


In [12]:
X_train_mean

Unnamed: 0,feat_0,feat_1,feat_2,feat_3,feat_4,feat_5,feat_6,feat_7,feat_8,feat_9,...,feat_1014,feat_1015,feat_1016,feat_1017,feat_1018,feat_1019,feat_1020,feat_1021,feat_1022,feat_1023
424,-1.423094,-0.404519,-1.131059,-1.184439,2.471947,2.290250,1.031759,0.568991,1.652935,0.869289,...,-1.162612,-0.139947,-0.305690,-0.247433,0.739481,-0.989511,-2.269561,3.742936,0.263359,-2.695333
190,-1.430729,-0.405363,-1.129719,-1.177521,2.475554,2.166654,1.032673,0.554429,1.670064,0.846416,...,-1.166308,-0.075299,-0.292138,-0.233188,0.755889,-1.025950,-2.254203,3.758036,0.267499,-2.720352
1080,-1.422743,-0.365815,-1.104770,-1.171733,2.473014,2.210454,1.025295,0.567170,1.671288,0.842855,...,-1.165422,-0.067976,-0.282917,-0.231348,0.737755,-0.991106,-2.226220,3.681908,0.262515,-2.692864
973,-1.445915,-0.393820,-1.150608,-1.170797,2.476055,2.105887,1.051193,0.582195,1.642733,0.871222,...,-1.182927,-0.139150,-0.314663,-0.204058,0.741997,-1.019110,-2.296838,3.781696,0.251019,-2.740161
410,-1.527617,-0.417854,-1.243504,-1.188529,2.545276,1.658862,1.073739,0.559732,1.624065,0.995140,...,-1.122036,-0.441546,-0.385793,-0.146994,0.739058,-1.033458,-2.524935,3.921366,0.253506,-2.771898
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
738,-1.535696,-0.289886,-1.241670,-1.159724,2.535906,1.872112,1.115768,0.630956,1.565267,0.990527,...,-1.210930,-0.447864,-0.393628,-0.083342,0.711424,-0.996131,-2.516047,3.951467,0.213092,-2.845674
647,-1.502283,-0.319235,-1.221090,-1.165204,2.520394,1.928990,1.100912,0.615718,1.587839,0.953703,...,-1.203671,-0.350470,-0.369357,-0.122106,0.717571,-1.002789,-2.454391,3.894517,0.211688,-2.814557
711,-1.440577,-0.315460,-1.173051,-1.163077,2.489651,2.200766,1.060644,0.600175,1.629129,0.908617,...,-1.184283,-0.248647,-0.349260,-0.195685,0.707577,-0.954186,-2.337197,3.788609,0.229639,-2.754420
48,-1.421200,-0.357135,-1.149428,-1.167977,2.490263,2.065675,1.036413,0.576298,1.652899,0.869402,...,-1.158164,-0.163340,-0.320960,-0.211799,0.728438,-0.980004,-2.272155,3.734425,0.251807,-2.738416


In [13]:
mean_report, mean_best_est = svm_classifier(X_train_mean, X_test_mean, Y_train, Y_test)

In [14]:
print("********************************mean report********************************")
print(mean_report)
print(mean_best_est)

********************************mean report********************************
              precision    recall  f1-score   support

         0.0     0.5079    0.8083    0.6238       120
         1.0     0.5400    0.2231    0.3158       121

    accuracy                         0.5145       241
   macro avg     0.5239    0.5157    0.4698       241
weighted avg     0.5240    0.5145    0.4692       241

SVC(C=3, class_weight='balanced', random_state=0)


### Speaker Dependent

In [15]:
X_train, X_test, Y_train, Y_test = get_train_test_split(model_data, ['audio_feature', 'speaker_encode'], 'sarcasm', 'sarcasm')
X_train_mean, X_test_mean = process_dataframes_pool(X_train), process_dataframes_pool(X_test)

Train:  (961, 2) (961,) Test:  ((241, 2), (241,))
<class 'pandas.core.frame.DataFrame'>


In [16]:
X_train_mean

Unnamed: 0,feat_speaker_encode,feat_0,feat_1,feat_2,feat_3,feat_4,feat_5,feat_6,feat_7,feat_8,...,feat_1014,feat_1015,feat_1016,feat_1017,feat_1018,feat_1019,feat_1020,feat_1021,feat_1022,feat_1023
324,25,-1.492354,-0.381521,-1.186158,-1.180722,2.516167,1.899118,1.075207,0.578896,1.611228,...,-1.150176,-0.332603,-0.376076,-0.170648,0.729075,-0.998647,-2.412177,3.862189,0.233891,-2.757186
642,1,-1.461178,-0.422459,-1.168064,-1.184170,2.498249,2.082854,1.058678,0.556468,1.636716,...,-1.146680,-0.230358,-0.330342,-0.201567,0.749877,-1.018977,-2.371366,3.831006,0.251335,-2.729712
799,16,-1.555756,-0.295611,-1.228475,-1.160219,2.555374,1.746646,1.115522,0.646803,1.577754,...,-1.194996,-0.489715,-0.399532,-0.070543,0.697940,-0.987779,-2.536886,3.943640,0.201161,-2.827163
581,0,-1.479637,-0.330484,-1.193856,-1.166560,2.513616,1.932635,1.065653,0.582459,1.608197,...,-1.158856,-0.351114,-0.373615,-0.150376,0.713772,-0.962083,-2.401637,3.832661,0.229754,-2.785010
718,2,-1.462858,-0.359653,-1.198722,-1.168545,2.499120,2.030795,1.072693,0.583170,1.614221,...,-1.180912,-0.248936,-0.353331,-0.169784,0.734094,-1.005434,-2.379762,3.852985,0.242589,-2.783767
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
472,7,-1.494839,-0.369031,-1.196056,-1.164966,2.514503,1.979364,1.093544,0.602628,1.610850,...,-1.183382,-0.286069,-0.352684,-0.149890,0.736478,-1.020625,-2.432813,3.885089,0.225404,-2.776237
939,24,-1.526209,-0.316863,-1.205737,-1.154204,2.531477,1.879628,1.111348,0.644424,1.584402,...,-1.199218,-0.374180,-0.385631,-0.093995,0.702022,-0.989769,-2.472220,3.904171,0.195215,-2.811480
1201,3,-1.504954,-0.293425,-1.135379,-1.144920,2.508777,2.054394,1.087241,0.673250,1.616296,...,-1.229984,-0.198861,-0.351575,-0.130121,0.669315,-0.979705,-2.326248,3.787921,0.183100,-2.771972
618,15,-1.466969,-0.434521,-1.186376,-1.181080,2.502289,2.022657,1.046404,0.558106,1.628828,...,-1.140780,-0.248772,-0.353479,-0.196242,0.752512,-1.017570,-2.370615,3.823453,0.258205,-2.737882


In [17]:
mean_report, mean_best_est = svm_classifier(X_train_mean, X_test_mean, Y_train, Y_test)

In [18]:
print("********************************mean report********************************")
print(mean_report)
print(mean_best_est)
print("\n")

********************************mean report********************************
              precision    recall  f1-score   support

         0.0     0.6396    0.5868    0.6121       121
         1.0     0.6154    0.6667    0.6400       120

    accuracy                         0.6266       241
   macro avg     0.6275    0.6267    0.6260       241
weighted avg     0.6276    0.6266    0.6260       241

SVC(C=10, class_weight='balanced', random_state=0)


