In [1]:
import pandas as pd
import numpy as np
import os
import warnings
from sklearn import preprocessing

from sklearn.svm import SVC
from sklearn import svm
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV

In [2]:
labels = pd.read_csv("/Users/yoshithaakunuri/Documents/CSCI535/Project/Final/data/scene_labels.csv")

In [3]:
labels.head(5)

Unnamed: 0,SCENE,KEY,SPEAKER,SHOW,Sarcasm,Sarcasm_Type
0,1_10004,1_10004_u,SHELDON,BBT,0.0,NONE
1,1_10009,1_10009_u,PENNY,BBT,0.0,NONE
2,1_1001,1_1001_u,RAJ,BBT,0.0,NONE
3,1_1003,1_1003_u,HOWARD,BBT,1.0,PRO
4,1_10190,1_10190_u,SHELDON,BBT,0.0,NONE


In [4]:
scenes = list(pd.unique(labels["SCENE"]))

In [5]:
len(scenes)

1202

In [6]:
parent_dir = "/Users/yoshithaakunuri/Documents/CSCI535/Project/Final/data/features/utterances_final"
visual_features = {}
for i in range(len(scenes)):
    # Visual Features
    try:
        vf = np.load(os.path.join(parent_dir, "resnet_pool5_" + scenes[i] + ".npy"))
        #Global average pooling
        vf_p = np.apply_over_axes(np.mean, vf, [2, 3]) 
        vf_p = np.reshape(vf_p, (vf_p.shape[0],2048))
    except:
        vf_p = np.zeros((15, 2048))
    
    visual_features[scenes[i]] = vf_p

In [7]:
def get_model_data(video_features):
    model_data = pd.DataFrame(columns=['video_feature','sarcasm','sarcasm_type', 'speaker'])
    for index, row in labels.iterrows():
#         audio_key = row["SCENE"] + "_u.wav"
        model_data = model_data.append({'video_feature': video_features[row["SCENE"]],
                                    'sarcasm' : row["Sarcasm"],
                                    'sarcasm_type' : row["Sarcasm_Type"],
                                    'speaker' : row["SPEAKER"]},
                                  ignore_index=True)
    return model_data

In [8]:
def get_video_mean_pool(video) -> np.ndarray:
    return np.array([np.mean(feature_vector, axis=0) for feature_vector in video])
    
def get_video_median_pool(video) -> np.ndarray:
        return np.array([np.median(feature_vector, axis=0) for feature_vector in video])
    
def get_video_max_pool(video) -> np.ndarray:
        return np.array([np.max(feature_vector, axis=0) for feature_vector in video])

def get_video_min_pool(video) -> np.ndarray:
        return np.array([np.min(feature_vector, axis=0) for feature_vector in video])

def get_video_sum_pool(video) -> np.ndarray:
        return np.array([np.sum(feature_vector, axis=0) for feature_vector in video])

In [9]:
def get_train_test_split(model_data, x_columns, y_column, stratify_column):
    X_train, X_test, Y_train, Y_test = train_test_split(
        model_data[x_columns],
        model_data[y_column],
        train_size=0.8, 
        test_size=0.2, 
        random_state=42, 
        shuffle=True,
        stratify=model_data[stratify_column])
    
    print("Train: ",X_train.shape, Y_train.shape,
      "Test: ",(X_test.shape, Y_test.shape))
    print(type(X_train))
    return X_train, X_test, Y_train, Y_test

def process_dataframes_pool(data):
    temp_concat = pd.concat([data, data.video_feature.apply(pd.Series)], axis=1)
    temp_concat.drop(columns=['video_feature'], inplace = True)
    return temp_concat.add_prefix('feat_')

def get_pooled_data(X_train, X_test, pool_type):
    if pool_type == "mean":
        X_train_mean = X_train.copy()
        X_train_mean["video_feature"] = get_video_mean_pool(X_train["video_feature"]).tolist()
        X_test_mean = X_test.copy()
        X_test_mean["video_feature"] = get_video_mean_pool(X_test["video_feature"]).tolist()
        return process_dataframes_pool(X_train_mean), process_dataframes_pool(X_test_mean)
    if pool_type == "median":
        X_train_median = X_train.copy()
        X_train_median["video_feature"] = get_video_median_pool(X_train["video_feature"]).tolist()
        X_test_median = X_test.copy()
        X_test_median["video_feature"] = get_video_median_pool(X_test["video_feature"]).tolist()
        return process_dataframes_pool(X_train_median), process_dataframes_pool(X_test_median)
    if pool_type == "max":
        X_train_max = X_train.copy()
        X_train_max["video_feature"] = get_video_max_pool(X_train["video_feature"]).tolist()
        X_test_max = X_test.copy()
        X_test_max["video_feature"] = get_video_max_pool(X_test["video_feature"]).tolist()
        return process_dataframes_pool(X_train_max), process_dataframes_pool(X_test_max)
    if pool_type == "min":
        X_train_min = X_train.copy()
        X_train_min["video_feature"] = get_video_min_pool(X_train["video_feature"]).tolist()
        X_test_min = X_test.copy()
        X_test_min["video_feature"] = get_video_min_pool(X_test["video_feature"]).tolist()
        return process_dataframes_pool(X_train_min), process_dataframes_pool(X_test_min)
    if pool_type == "sum":
        X_train_sum = X_train.copy()
        X_train_sum["video_feature"] = get_video_sum_pool(X_train["video_feature"]).tolist()
        X_test_sum = X_test.copy()
        X_test_sum["video_feature"] = get_video_sum_pool(X_test["video_feature"]).tolist()
        return process_dataframes_pool(X_train_sum), process_dataframes_pool(X_test_sum)
    

In [10]:
def svm_classifier(X_train, X_test, Y_train, Y_test):
    svm_clf = svm.SVC(random_state=0, kernel = "rbf", gamma = "scale", class_weight = "balanced")
    C = C = [0.0001, 0.0003, 0.0005, 0.001, 0.003, 0.005, 0.01, 0.03, 0.05, 0.1, 0.3, 0.5, 1, 3, 5, 10]
    gscv_clf = GridSearchCV(
        estimator=svm_clf, 
        param_grid=dict(C=C),
        n_jobs=-1, 
        cv = 10, 
        scoring = 'f1_micro', 
        refit = True)

    gscv_clf.fit(X_train, Y_train)
    Y_test_pred = gscv_clf.predict(X_test)
    report = classification_report(Y_test, Y_test_pred, digits=4)
    return report, gscv_clf.best_estimator_

In [11]:
warnings.filterwarnings("ignore")

model_data = get_model_data(visual_features)
# Label Encode Speaker
le = preprocessing.LabelEncoder()
model_data['speaker_encode'] = le.fit_transform(model_data['speaker'])
model_data.head(5)

Unnamed: 0,video_feature,sarcasm,sarcasm_type,speaker,speaker_encode
0,"[[0.13835047, 0.2704592, 0.44648886, 0.1415337...",0.0,NONE,SHELDON,25
1,"[[0.46479157, 0.1813915, 0.22123067, 0.5245148...",0.0,NONE,PENNY,15
2,"[[0.253619, 0.25664786, 0.6646118, 0.4821793, ...",0.0,NONE,RAJ,21
3,"[[0.55624646, 0.16990338, 0.62457716, 0.209021...",1.0,PRO,HOWARD,7
4,"[[0.6140023, 0.4846397, 0.79425097, 0.13518682...",0.0,NONE,SHELDON,25


### Speaker Independent

In [12]:
X_train, X_test, Y_train, Y_test = get_train_test_split(model_data, ['video_feature'], 'sarcasm', 'sarcasm_type')
X_train_mean, X_test_mean = get_pooled_data(X_train, X_test, "mean")
X_train_median, X_test_median = get_pooled_data(X_train, X_test, "median")
X_train_max, X_test_max = get_pooled_data(X_train, X_test, "max")
X_train_min, X_test_min = get_pooled_data(X_train, X_test, "min")
X_train_sum, X_test_sum = get_pooled_data(X_train, X_test, "sum")

Train:  (961, 1) (961,) Test:  ((241, 1), (241,))
<class 'pandas.core.frame.DataFrame'>


In [13]:
X_train_mean.head()

Unnamed: 0,feat_0,feat_1,feat_2,feat_3,feat_4,feat_5,feat_6,feat_7,feat_8,feat_9,...,feat_2038,feat_2039,feat_2040,feat_2041,feat_2042,feat_2043,feat_2044,feat_2045,feat_2046,feat_2047
424,0.381499,0.585135,0.521604,0.376826,0.564878,0.525846,0.528426,0.523864,0.445912,0.396786,...,0.407312,0.451004,0.436169,0.403903,0.458166,0.186549,0.437864,0.346711,0.424549,0.356524
190,0.362125,0.582412,0.489282,0.387754,0.592771,0.514074,0.465398,0.480668,0.462627,0.382084,...,0.393004,0.448692,0.426229,0.393386,0.479226,0.199675,0.470453,0.318255,0.464625,0.323267
1080,0.333978,0.594456,0.476321,0.385883,0.535276,0.477293,0.456089,0.492375,0.43815,0.412621,...,0.361899,0.44532,0.422796,0.388583,0.439197,0.181434,0.459565,0.320772,0.446543,0.327752
973,0.332667,0.546447,0.529252,0.397142,0.50858,0.52637,0.503004,0.500019,0.40419,0.403084,...,0.403726,0.408884,0.413097,0.378066,0.485424,0.18004,0.440938,0.335303,0.41697,0.322297
410,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [14]:
print("Processing Mean-pooled data")
mean_report, mean_best_est = svm_classifier(X_train_mean, X_test_mean, Y_train, Y_test)
print("Processing Median-pooled data")
median_report, median_best_est = svm_classifier(X_train_median, X_test_median, Y_train, Y_test)
print("Processing Max-pooled data")
max_report, max_best_est = svm_classifier(X_train_max, X_test_max, Y_train, Y_test)
print("Processing Min-pooled data")
min_report, min_best_est = svm_classifier(X_train_min, X_test_min, Y_train, Y_test)
print("Processing Sum-pooled data")
sum_report, sum_best_est = svm_classifier(X_train_sum, X_test_sum, Y_train, Y_test)

Processing Mean-pooled data
Processing Median-pooled data
Processing Max-pooled data
Processing Min-pooled data
Processing Sum-pooled data


In [15]:
print("********************************mean report********************************")
print(mean_report)
print(mean_best_est)
print("\n")
print("********************************median report********************************")
print(median_report)
print(median_best_est)
print("\n")
print("********************************max report********************************")
print(max_report)
print(max_best_est)
print("\n")
print("********************************min report********************************")
print(min_report)
print(min_best_est)
print("\n")
print("********************************sum report********************************")
print(sum_report)
print(sum_best_est)

********************************mean report********************************
              precision    recall  f1-score   support

         0.0     0.5895    0.4667    0.5209       120
         1.0     0.5616    0.6777    0.6142       121

    accuracy                         0.5726       241
   macro avg     0.5756    0.5722    0.5676       241
weighted avg     0.5755    0.5726    0.5678       241

SVC(C=10, class_weight='balanced', random_state=0)


********************************median report********************************
              precision    recall  f1-score   support

         0.0     0.6197    0.7333    0.6718       120
         1.0     0.6768    0.5537    0.6091       121

    accuracy                         0.6432       241
   macro avg     0.6482    0.6435    0.6404       241
weighted avg     0.6484    0.6432    0.6403       241

SVC(C=0.1, class_weight='balanced', random_state=0)


********************************max report********************************
          

### Speaker Dependent

In [16]:
X_train, X_test, Y_train, Y_test = get_train_test_split(model_data, ['video_feature', 'speaker_encode'], 'sarcasm', 'sarcasm')
X_train_mean, X_test_mean = get_pooled_data(X_train, X_test, "mean")
X_train_median, X_test_median = get_pooled_data(X_train, X_test, "median")
X_train_max, X_test_max = get_pooled_data(X_train, X_test, "max")
X_train_min, X_test_min = get_pooled_data(X_train, X_test, "min")
X_train_sum, X_test_sum = get_pooled_data(X_train, X_test, "sum")

Train:  (961, 2) (961,) Test:  ((241, 2), (241,))
<class 'pandas.core.frame.DataFrame'>


In [17]:
X_train_mean.head()

Unnamed: 0,feat_speaker_encode,feat_0,feat_1,feat_2,feat_3,feat_4,feat_5,feat_6,feat_7,feat_8,...,feat_2038,feat_2039,feat_2040,feat_2041,feat_2042,feat_2043,feat_2044,feat_2045,feat_2046,feat_2047
324,25,0.366877,0.631909,0.535369,0.430392,0.555703,0.449654,0.50052,0.450372,0.469444,...,0.371151,0.478997,0.423825,0.396466,0.521244,0.187658,0.481839,0.318229,0.456867,0.352971
642,1,0.359215,0.55701,0.540176,0.3872,0.610274,0.559137,0.533949,0.492948,0.455658,...,0.424716,0.491309,0.461088,0.411208,0.405419,0.161563,0.461361,0.312351,0.452058,0.347572
799,16,0.357687,0.532455,0.488337,0.394808,0.514892,0.504754,0.515192,0.482578,0.422621,...,0.341406,0.444857,0.411741,0.386679,0.434025,0.19392,0.465375,0.327785,0.425144,0.335552
581,0,0.34354,0.603617,0.514443,0.367614,0.529612,0.448785,0.481983,0.515264,0.424043,...,0.437738,0.424267,0.460791,0.390121,0.468468,0.178169,0.43797,0.345549,0.428563,0.353435
718,2,0.344098,0.53583,0.508524,0.402521,0.532478,0.518151,0.493107,0.440488,0.411921,...,0.412736,0.445004,0.442901,0.358753,0.461017,0.177771,0.452176,0.322367,0.458215,0.31609


In [18]:
print("Processing Mean-pooled data")
mean_report, mean_best_est = svm_classifier(X_train_mean, X_test_mean, Y_train, Y_test)
print("Processing Median-pooled data")
median_report, median_best_est = svm_classifier(X_train_median, X_test_median, Y_train, Y_test)
print("Processing Max-pooled data")
max_report, max_best_est = svm_classifier(X_train_max, X_test_max, Y_train, Y_test)
print("Processing Min-pooled data")
min_report, min_best_est = svm_classifier(X_train_min, X_test_min, Y_train, Y_test)
print("Processing Sum-pooled data")
sum_report, sum_best_est = svm_classifier(X_train_sum, X_test_sum, Y_train, Y_test)

Processing Mean-pooled data
Processing Median-pooled data
Processing Max-pooled data
Processing Min-pooled data
Processing Sum-pooled data


In [19]:
print("********************************mean report********************************")
print(mean_report)
print(mean_best_est)
print("\n")
print("********************************median report********************************")
print(median_report)
print(median_best_est)
print("\n")
print("********************************max report********************************")
print(max_report)
print(max_best_est)
print("\n")
print("********************************min report********************************")
print(min_report)
print(min_best_est)
print("\n")
print("********************************sum report********************************")
print(sum_report)
print(sum_best_est)

********************************mean report********************************
              precision    recall  f1-score   support

         0.0     0.6230    0.6281    0.6255       121
         1.0     0.6218    0.6167    0.6192       120

    accuracy                         0.6224       241
   macro avg     0.6224    0.6224    0.6224       241
weighted avg     0.6224    0.6224    0.6224       241

SVC(C=10, class_weight='balanced', random_state=0)


********************************median report********************************
              precision    recall  f1-score   support

         0.0     0.5887    0.6033    0.5959       121
         1.0     0.5897    0.5750    0.5823       120

    accuracy                         0.5892       241
   macro avg     0.5892    0.5892    0.5891       241
weighted avg     0.5892    0.5892    0.5891       241

SVC(C=10, class_weight='balanced', random_state=0)


********************************max report********************************
           