In [1]:
import pandas as pd
import numpy as np
import os
import warnings
from sklearn import preprocessing
import pickle

from sklearn.svm import SVC
from sklearn import svm
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV

In [2]:
labels = pd.read_csv("scene_labels.csv")

In [3]:
labels.head(5)

Unnamed: 0,SCENE,KEY,SPEAKER,SHOW,Sarcasm,Sarcasm_Type
0,1_10004,1_10004_u,SHELDON,BBT,0.0,NONE
1,1_10009,1_10009_u,PENNY,BBT,0.0,NONE
2,1_1001,1_1001_u,RAJ,BBT,0.0,NONE
3,1_1003,1_1003_u,HOWARD,BBT,1.0,PRO
4,1_10190,1_10190_u,SHELDON,BBT,0.0,NONE


In [4]:
scenes = list(pd.unique(labels["SCENE"]))

In [5]:
len(scenes)

1202

In [6]:
file = open('audio_embed_final.pickle', 'rb')
audio_data = pickle.load(file)
file.close()

In [7]:
file = open('text_embed_pca_final.pickle', 'rb')
text_data = pickle.load(file)
file.close()

In [8]:
file = open('visual_embed_padded_final.pickle', 'rb')
video_data = pickle.load(file)
file.close()

In [9]:
atv_data = {}
for scene in scenes:
    atv_data[scene] = np.concatenate((audio_data[scene], text_data[scene], video_data[scene]), axis=1)


In [10]:
def get_model_data(atv_data):
    model_data = pd.DataFrame(columns=['atv_feature','sarcasm','sarcasm_type', 'speaker'])
    for index, row in labels.iterrows():
        model_data = model_data.append({'atv_feature': atv_data[row['SCENE']], 
                                    'sarcasm' : row["Sarcasm"],
                                    'sarcasm_type' : row["Sarcasm_Type"],
                                    'speaker' : row["SPEAKER"]},
                                  ignore_index=True)
    return model_data

In [11]:
def get_mean_pool(video) -> np.ndarray:
    return np.array([np.mean(feature_vector, axis=0) for feature_vector in video])

In [12]:
def get_train_test_split(model_data, x_columns, y_column, stratify_column):
    X_train, X_test, Y_train, Y_test = train_test_split(
        model_data[x_columns],
        model_data[y_column],
        train_size=0.8, 
        test_size=0.2, 
        random_state=42, 
        shuffle=True,
        stratify=model_data[stratify_column])
    
    print("Train: ",X_train.shape, Y_train.shape,
      "Test: ",(X_test.shape, Y_test.shape))
    print(type(X_train))
    return X_train, X_test, Y_train, Y_test

def process_dataframes_pool(data):
    temp_concat = pd.concat([data, data.atv_feature.apply(pd.Series)], axis=1)
    temp_concat.drop(columns=['atv_feature'], inplace = True)
    return temp_concat.add_prefix('feat_')

def get_pooled_data(X_train, X_test, pool_type = "mean"):
    if pool_type == "mean":
        X_train_mean = X_train.copy()
        X_train_mean["atv_feature"] = get_mean_pool(X_train["atv_feature"]).tolist()
        X_test_mean = X_test.copy()
        X_test_mean["atv_feature"] = get_mean_pool(X_test["atv_feature"]).tolist()
        return process_dataframes_pool(X_train_mean), process_dataframes_pool(X_test_mean)


In [13]:
def svm_classifier(X_train, X_test, Y_train, Y_test):
    svm_clf = svm.SVC(random_state=0, kernel = "rbf", gamma = "scale", class_weight = "balanced")
    C = C = [0.0001, 0.0003, 0.0005, 0.001, 0.003, 0.005, 0.01, 0.03, 0.05, 0.1, 0.3, 0.5, 1, 3, 5, 10]
    gscv_clf = GridSearchCV(
        estimator=svm_clf, 
        param_grid=dict(C=C),
        n_jobs=-1, 
        cv = 10, 
        scoring = 'f1_micro', 
        refit = True)

    gscv_clf.fit(X_train, Y_train)
    Y_test_pred = gscv_clf.predict(X_test)
    report = classification_report(Y_test, Y_test_pred, digits=4)
    return report, gscv_clf.best_estimator_

In [14]:
warnings.filterwarnings("ignore")

model_data = get_model_data(atv_data)
# Label Encode Speaker
le = preprocessing.LabelEncoder()
model_data['speaker_encode'] = le.fit_transform(model_data['speaker'])
model_data.head(5)

Unnamed: 0,atv_feature,sarcasm,sarcasm_type,speaker,speaker_encode
0,"[[-637.1869506835938, 10.25528335571289, -3.98...",0.0,NONE,SHELDON,25
1,"[[-625.8624267578125, 51.68547058105469, 40.30...",0.0,NONE,PENNY,15
2,"[[-500.3988952636719, 21.715717315673828, 18.9...",0.0,NONE,RAJ,21
3,"[[-313.1777038574219, 97.45339965820312, -58.0...",1.0,PRO,HOWARD,7
4,"[[-337.88116455078125, 107.24081420898438, -49...",0.0,NONE,SHELDON,25


### Speaker Dependent

In [15]:
X_train, X_test, Y_train, Y_test = get_train_test_split(model_data, ['atv_feature', 'speaker_encode'], 'sarcasm', 'sarcasm')
X_train_mean, X_test_mean = get_pooled_data(X_train, X_test, "mean")

Train:  (961, 2) (961,) Test:  ((241, 2), (241,))
<class 'pandas.core.frame.DataFrame'>


In [16]:
X_train_mean.head()

Unnamed: 0,feat_speaker_encode,feat_0,feat_1,feat_2,feat_3,feat_4,feat_5,feat_6,feat_7,feat_8,...,feat_3496,feat_3497,feat_3498,feat_3499,feat_3500,feat_3501,feat_3502,feat_3503,feat_3504,feat_3505
324,25,-309.633377,28.205383,-23.962814,8.264493,0.502298,-10.360812,-7.408038,-4.478196,-5.369362,...,0.041239,0.053222,0.047092,0.044052,0.057916,0.020851,0.053538,0.035359,0.050763,0.039219
642,1,-377.257815,22.165556,-7.688077,1.793058,-3.076399,-4.935691,-7.138889,-5.397475,-7.337884,...,0.023595,0.027295,0.025616,0.022845,0.022523,0.008976,0.025631,0.017353,0.025114,0.01931
799,16,-412.10007,38.079424,-19.959661,4.498839,-6.36041,-10.015628,-2.164258,-6.037595,-4.609024,...,0.037934,0.049429,0.045749,0.042964,0.048225,0.021547,0.051708,0.036421,0.047238,0.037284
581,0,-377.630487,40.764981,-15.825529,3.544501,5.060644,-1.968106,-8.223052,0.048321,-7.808522,...,0.048638,0.047141,0.051199,0.043347,0.052052,0.019797,0.048663,0.038394,0.047618,0.039271
718,2,-392.259138,31.09035,-9.662526,4.076527,-7.194349,-4.926465,-4.253204,-3.178074,-4.550088,...,0.183438,0.19778,0.196845,0.159446,0.204896,0.079009,0.200967,0.143274,0.203651,0.140484


In [17]:
print("Processing Mean-pooled data")
mean_report, mean_best_est = svm_classifier(X_train_mean, X_test_mean, Y_train, Y_test)

Processing Mean-pooled data


In [18]:
print("********************************mean report********************************")
print(mean_report)
print(mean_best_est)

********************************mean report********************************
              precision    recall  f1-score   support

         0.0     0.6557    0.6612    0.6584       121
         1.0     0.6555    0.6500    0.6527       120

    accuracy                         0.6556       241
   macro avg     0.6556    0.6556    0.6556       241
weighted avg     0.6556    0.6556    0.6556       241

SVC(C=10, class_weight='balanced', random_state=0)
