In [1]:
import pandas as pd
import numpy as np
import os
import warnings
from sklearn import preprocessing
import pickle

from sklearn.svm import SVC
from sklearn import svm
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV

In [2]:
labels = pd.read_csv("/Users/yoshithaakunuri/Documents/CSCI535/Project/Final/data/scene_labels.csv")

In [3]:
labels.head(5)

Unnamed: 0,SCENE,KEY,SPEAKER,SHOW,Sarcasm,Sarcasm_Type
0,1_10004,1_10004_u,SHELDON,BBT,0.0,NONE
1,1_10009,1_10009_u,PENNY,BBT,0.0,NONE
2,1_1001,1_1001_u,RAJ,BBT,0.0,NONE
3,1_1003,1_1003_u,HOWARD,BBT,1.0,PRO
4,1_10190,1_10190_u,SHELDON,BBT,0.0,NONE


In [4]:
scenes = list(pd.unique(labels["SCENE"]))

In [5]:
len(scenes)

1202

In [6]:
file = open('/Users/yoshithaakunuri/Documents/CSCI535/Project/Final/MultiModal/Early Fusion/Data/text_embed_pca_final.pickle', 'rb')
text_data = pickle.load(file)
file.close()

In [7]:
file = open('/Users/yoshithaakunuri/Documents/CSCI535/Project/Final/MultiModal/Early Fusion/Data/visual_embed_padded_final.pickle', 'rb')
video_data = pickle.load(file)
file.close()

In [8]:
tv_data = {}
for scene in scenes:
    tv_data[scene] = np.concatenate((text_data[scene], video_data[scene]), axis=1)


In [9]:
def get_model_data(tv_data):
    model_data = pd.DataFrame(columns=['tv_feature','sarcasm','sarcasm_type', 'speaker'])
    for index, row in labels.iterrows():
#         audio_key = row["SCENE"] + "_u.wav"
        model_data = model_data.append({'tv_feature': tv_data[row['SCENE']], 
                                    'sarcasm' : row["Sarcasm"],
                                    'sarcasm_type' : row["Sarcasm_Type"],
                                    'speaker' : row["SPEAKER"]},
                                  ignore_index=True)
    return model_data

In [10]:
def get_mean_pool(video) -> np.ndarray:
    return np.array([np.mean(feature_vector, axis=0) for feature_vector in video])

In [11]:
def get_train_test_split(model_data, x_columns, y_column, stratify_column):
    X_train, X_test, Y_train, Y_test = train_test_split(
        model_data[x_columns],
        model_data[y_column],
        train_size=0.8, 
        test_size=0.2, 
        random_state=42, 
        shuffle=True,
        stratify=model_data[stratify_column])
    
    print("Train: ",X_train.shape, Y_train.shape,
      "Test: ",(X_test.shape, Y_test.shape))
    print(type(X_train))
    return X_train, X_test, Y_train, Y_test

def process_dataframes_pool(data):
    temp_concat = pd.concat([data, data.tv_feature.apply(pd.Series)], axis=1)
    temp_concat.drop(columns=['tv_feature'], inplace = True)
    return temp_concat.add_prefix('feat_')

def get_pooled_data(X_train, X_test, pool_type = "mean"):
    if pool_type == "mean":
        X_train_mean = X_train.copy()
        X_train_mean["tv_feature"] = get_mean_pool(X_train["tv_feature"]).tolist()
        X_test_mean = X_test.copy()
        X_test_mean["tv_feature"] = get_mean_pool(X_test["tv_feature"]).tolist()
        return process_dataframes_pool(X_train_mean), process_dataframes_pool(X_test_mean)


In [12]:
def svm_classifier(X_train, X_test, Y_train, Y_test):
    svm_clf = svm.SVC(random_state=0, kernel = "rbf", gamma = "scale", class_weight = "balanced")
    C = C = [0.0001, 0.0003, 0.0005, 0.001, 0.003, 0.005, 0.01, 0.03, 0.05, 0.1, 0.3, 0.5, 1, 3, 5, 10]
    gscv_clf = GridSearchCV(
        estimator=svm_clf, 
        param_grid=dict(C=C),
        n_jobs=-1, 
        cv = 10, 
        scoring = 'f1_micro', 
        refit = True)

    gscv_clf.fit(X_train, Y_train)
    Y_test_pred = gscv_clf.predict(X_test)
    report = classification_report(Y_test, Y_test_pred, digits=4)
    return report, gscv_clf.best_estimator_

In [13]:
warnings.filterwarnings("ignore")

model_data = get_model_data(tv_data)
# Label Encode Speaker
le = preprocessing.LabelEncoder()
model_data['speaker_encode'] = le.fit_transform(model_data['speaker'])
model_data.head(5)

Unnamed: 0,tv_feature,sarcasm,sarcasm_type,speaker,speaker_encode
0,"[[-5.775393, -0.55747294, -5.9735856, 0.882094...",0.0,NONE,SHELDON,25
1,"[[-9.726368, -4.6866755, -11.944455, -6.425826...",0.0,NONE,PENNY,15
2,"[[-14.0798025, 4.135473, -8.056498, -4.865928,...",0.0,NONE,RAJ,21
3,"[[-5.3839507, 2.0448134, -13.128815, -6.819113...",1.0,PRO,HOWARD,7
4,"[[-3.3831522, 5.3072224, -5.980673, -1.8846271...",0.0,NONE,SHELDON,25


### Speaker Dependent

In [14]:
X_train, X_test, Y_train, Y_test = get_train_test_split(model_data, ['tv_feature', 'speaker_encode'], 'sarcasm', 'sarcasm')
X_train_mean, X_test_mean = get_pooled_data(X_train, X_test, "mean")

Train:  (961, 2) (961,) Test:  ((241, 2), (241,))
<class 'pandas.core.frame.DataFrame'>


In [15]:
X_train_mean.head()

Unnamed: 0,feat_speaker_encode,feat_0,feat_1,feat_2,feat_3,feat_4,feat_5,feat_6,feat_7,feat_8,...,feat_2806,feat_2807,feat_2808,feat_2809,feat_2810,feat_2811,feat_2812,feat_2813,feat_2814,feat_2815
324,25,0.576967,0.925955,0.138869,0.062862,-0.95181,0.109291,-0.421197,-0.642044,-0.35887,...,0.041239,0.053222,0.047092,0.044052,0.057916,0.020851,0.053538,0.035359,0.050763,0.039219
642,1,-1.008488,0.143696,-1.130845,-1.068172,-1.17399,0.626055,-0.669882,-0.846938,0.555073,...,0.023595,0.027295,0.025616,0.022845,0.022523,0.008976,0.025631,0.017353,0.025114,0.01931
799,16,0.341749,-0.131565,-0.958209,-0.000862,-0.916453,0.030041,-0.140461,-0.346623,0.114331,...,0.037934,0.049429,0.045749,0.042964,0.048225,0.021547,0.051708,0.036421,0.047238,0.037284
581,0,0.425735,-0.161134,-0.923989,-0.114258,1.326604,-0.112728,-0.061326,-0.600252,-0.109899,...,0.048638,0.047141,0.051199,0.043347,0.052052,0.019797,0.048663,0.038394,0.047618,0.039271
718,2,0.311526,0.434988,-1.715193,0.574301,0.753508,0.528136,0.283772,-0.856657,-0.141398,...,0.183438,0.19778,0.196845,0.159446,0.204896,0.079009,0.200967,0.143274,0.203651,0.140484


In [16]:
print("Processing Mean-pooled data")
mean_report, mean_best_est = svm_classifier(X_train_mean, X_test_mean, Y_train, Y_test)

Processing Mean-pooled data


In [17]:
print("********************************mean report********************************")
print(mean_report)
print(mean_best_est)

********************************mean report********************************
              precision    recall  f1-score   support

         0.0     0.6455    0.5868    0.6147       121
         1.0     0.6183    0.6750    0.6454       120

    accuracy                         0.6307       241
   macro avg     0.6319    0.6309    0.6301       241
weighted avg     0.6319    0.6307    0.6300       241

SVC(C=1, class_weight='balanced', random_state=0)
