In [1]:
import pandas as pd 
import pickle
import warnings
warnings.filterwarnings('ignore')
import numpy as np

from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from sklearn import preprocessing

import torch
import torch.nn as nn
from torch.utils.data.sampler import SubsetRandomSampler
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torch.utils.data import Dataset

In [2]:
labels = pd.read_csv("../../data/scene_labels.csv")

In [3]:
labels.head(5)

Unnamed: 0,SCENE,KEY,SPEAKER,SHOW,Sarcasm,Sarcasm_Type
0,1_10004,1_10004_u,SHELDON,BBT,0.0,NONE
1,1_10009,1_10009_u,PENNY,BBT,0.0,NONE
2,1_1001,1_1001_u,RAJ,BBT,0.0,NONE
3,1_1003,1_1003_u,HOWARD,BBT,1.0,PRO
4,1_10190,1_10190_u,SHELDON,BBT,0.0,NONE


#### Perform mean, median, max, min and sum pooling on audio feature data

In [4]:
def get_model_data(audio_features):
    model_data = pd.DataFrame(columns=['audio_feature','sarcasm','sarcasm_type', 'speaker'])
    for index, row in labels.iterrows():
        audio_key = row["SCENE"] + "_u.wav"
        model_data = model_data.append({'audio_feature': audio_features[audio_key],
                                    'sarcasm' : row["Sarcasm"],
                                    'sarcasm_type' : row["Sarcasm_Type"],
                                    'speaker' : row["SPEAKER"]},
                                  ignore_index=True)
    return model_data

In [5]:
def get_train_test_split(model_data, x_column, y_column, stratify_column):
    train_count = 484
    train_data = model_data.groupby(stratify_column).apply(lambda x: x.sample(n=train_count, random_state=42))
    train_index_tuples = train_data.index.values.tolist()
    train_index_list = []
    for tup in train_index_tuples:
        train_index_list.append(tup[1])
    test_data = model_data[~model_data.index.isin(train_index_list)]
    train_data.reset_index(drop=True, inplace = True)
    test_data.reset_index(drop=True, inplace = True)
    return train_data, test_data

In [6]:
class GRUTensorDataset(Dataset):
    def __init__(self, dataframe, speaker):
        self.data = dataframe
        self.speaker = speaker

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        if self.speaker:
            features = self.data.loc[index, 'padded_audio_feature']
            a=np.empty((18,1))
            a.fill(self.data.loc[index, 'speaker_encode'])
            final_features = np.hstack((features, a))
            label = self.data.loc[index, 'sarcasm']
            return torch.from_numpy(final_features).float(), label
        else:
            features = self.data.loc[index, 'padded_audio_feature']
            label = self.data.loc[index, 'sarcasm']
            return torch.from_numpy(features).float(), label

    def __getindexlist__(self):
        return list(self.data.index.values)
    
class GRUNetSD(nn.Module):
    def __init__(self, input_dim, hidden_dim, 
                 output_dim, n_layers):
        super(GRUNetSD, self).__init__()
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers
        
        self.gru = nn.GRU(input_dim, hidden_dim, 
                          n_layers, batch_first = True)
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.softmax = nn.LogSoftmax(dim=1)
        
    def forward(self, x, h):
        out, h = self.gru(x, h)
        out = self.softmax(self.fc(out[:,-1]))
        return out, h
    
    def init_hidden(self, batch_size):
        weight = next(self.parameters()).data
        hidden = weight.new(self.n_layers, batch_size, 
                            self.hidden_dim).zero_()
        return hidden
    
class GRUNetSID(nn.Module):
    def __init__(self, input_dim, hidden_dim, 
                 output_dim, n_layers):
        super(GRUNetSID, self).__init__()
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers
        
        self.gru = nn.GRU(input_dim, hidden_dim, 
                          n_layers, batch_first = True)
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.softmax = nn.LogSoftmax(dim=1)
        
    def forward(self, x, h):
        out, h = self.gru(x, h)
        out = self.softmax(self.fc(out[:,-1]))
        return out, h
    
    def init_hidden(self, batch_size):
        weight = next(self.parameters()).data
        hidden = weight.new(self.n_layers, batch_size, 
                            self.hidden_dim).zero_()
        return hidden
    
def evaluateGRU(gru, review, size):
    hidden = gru.init_hidden(size)
    output, hidden = gru(review, hidden)
    return output

def categoryFromOutput(output):
    top_n, top_i = torch.max(output,dim=1)
    return top_i

def test_accuracy(gru, loader, size):
    actuals = []
    predictions = []
    for data, target in loader:
        output = evaluateGRU(gru, data, size)
        prediction_index = categoryFromOutput(output)
        predictions = prediction_index.tolist()
        actuals = target.tolist()
    return predictions, actuals
    
hidden_size = 18
output_size = 2
input_size = 690
input_size_sid = 690
input_size_sd = 691
n_layers = 1

In [7]:
with open('../../audio_features/feat_dict_librosa_lld.pickle', 'rb') as f:
    librosa_audio_features = pickle.load(f, encoding='latin1')
    
model_data = get_model_data(librosa_audio_features)
le = preprocessing.LabelEncoder()
model_data['speaker_encode'] = le.fit_transform(model_data['speaker'])
model_data.head(5)

Unnamed: 0,audio_feature,sarcasm,sarcasm_type,speaker,speaker_encode
0,"[[-386.6164855957031, -649.6673512776692, -633...",0.0,NONE,SHELDON,25
1,"[[-255.5221405029297, -484.69307309105284, -52...",0.0,NONE,PENNY,15
2,"[[-569.0548095703125, -381.4147456242488, -221...",0.0,NONE,RAJ,21
3,"[[-237.61074829101562, -211.002773845897, -382...",1.0,PRO,HOWARD,7
4,"[[-530.5701293945312, -374.83951552370763, -42...",0.0,NONE,SHELDON,25


In [8]:
desired_length = 18

train_data, test_data = get_train_test_split(model_data, 'audio_feature', 'sarcasm', 'sarcasm')
fnn_train = train_data.copy()
fnn_test = test_data.copy()
fnn_train.reset_index(drop=True, inplace = True)
fnn_test.reset_index(drop=True, inplace = True)

        
fnn_train['padded_audio_feature'] = fnn_train.loc[:, 'audio_feature']
for index, row in fnn_train.iterrows():
    data_array = row['padded_audio_feature']
    new_array = []
    for arr in data_array:
        if arr.shape[0] < desired_length:
            arr = np.pad(arr, (0, desired_length - arr.shape[0]), 'constant')
            new_array.append(arr)
        else:
            new_array.append(arr)
    fnn_train.at[index, "padded_audio_feature"] = np.transpose(np.array(new_array))

fnn_test['padded_audio_feature'] = fnn_test.loc[:, 'audio_feature']
for index, row in fnn_test.iterrows():
    data_array = row['padded_audio_feature']
    new_array = []
    for arr in data_array:
        if arr.shape[0] < desired_length:
            arr = np.pad(arr, (0, desired_length - arr.shape[0]), 'constant')
            new_array.append(arr)
        else:
            new_array.append(arr)
    fnn_test.at[index, "padded_audio_feature"] = np.transpose(np.array(new_array))

fnn_train["sarcasm"] = fnn_train["sarcasm"].astype('int').to_numpy()
fnn_test["sarcasm"] = fnn_test["sarcasm"].astype('int').to_numpy()

### Speaker Independent

In [9]:
fnn_train_tensor = GRUTensorDataset(fnn_train, False)
fnn_test_tensor = GRUTensorDataset(fnn_test, False)

num_of_workers = 0
batch_size = 44
valid_size = 0.1

train_indices = list(range(len(fnn_train_tensor)))
np.random.shuffle(train_indices)

test_indices = list(range(len(fnn_test_tensor)))
np.random.shuffle(test_indices)

train_loader = torch.utils.data.DataLoader(
    fnn_train_tensor, 
    batch_size=batch_size, 
    sampler=SubsetRandomSampler(train_indices)
)

test_loader = torch.utils.data.DataLoader(
    fnn_test_tensor, 
    batch_size=batch_size, 
    sampler=SubsetRandomSampler(test_indices)
)

In [10]:
gru = GRUNetSID(input_size_sid, hidden_size, output_size, n_layers)
print(gru)
criterion = nn.NLLLoss()
optimizer = torch.optim.Adam(gru.parameters(), lr=0.001)

GRUNetSID(
  (gru): GRU(690, 18, batch_first=True)
  (fc): Linear(in_features=18, out_features=2, bias=True)
  (softmax): LogSoftmax(dim=1)
)


In [11]:
n_epochs = 1001
    
test_min_loss = np.inf

for epoch in range(n_epochs):
    torch.manual_seed(42)
    train_loss = 0.0
    test_loss = 0.0
    gru.train()
    for data, target in train_loader:
        h = gru.init_hidden(batch_size)
        optimizer.zero_grad()
        output, h = gru(data, h.data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()*data.size(0)

    gru.eval()
    for data, target in test_loader:
        if data.shape[1] < 44:
            continue
        h = gru.init_hidden(batch_size)
        output, h = gru(data, h.data)
        loss = criterion(output, target)
        test_loss += loss.item()*data.size(0)

    train_loss = train_loss / len(train_loader.dataset)
    test_loss = test_loss / len(test_loader.dataset)
    
#     if(epoch%20 == 0):
# #         print(f"Epoch: {epoch+1:02}")
# #         print("\tTraining Loss: {:.6f} \Test Loss: {:.6f}".format(train_loss, test_loss))
#     if test_loss <= test_min_loss:
# #         print("Test loss decreased ({:.6f} --> {:.6f}). Saving model...".format(test_min_loss, test_loss))
# #         torch.save(gru.state_dict(), "fnnmodel.pt")
#         test_min_loss = test_loss
    if(epoch%50 == 0):
        torch.save(gru.state_dict(), "gru/gru_" + str(epoch) + "_.pt")
        print("Epoch: " + str(epoch))
        test_loader = torch.utils.data.DataLoader(fnn_test_tensor, batch_size=fnn_test_tensor.__len__())
        predictions, actuals = test_accuracy(gru, test_loader, fnn_test_tensor.__len__())
        print(pd.DataFrame(classification_report(actuals, predictions, output_dict=True)).T)
        test_loader = torch.utils.data.DataLoader(fnn_test_tensor, batch_size=batch_size, sampler=SubsetRandomSampler(test_indices))

Epoch: 0
              precision  recall  f1-score  support
0                  0.50     1.0  0.666667    117.0
1                  0.00     0.0  0.000000    117.0
accuracy           0.50     0.5  0.500000      0.5
macro avg          0.25     0.5  0.333333    234.0
weighted avg       0.25     0.5  0.333333    234.0
Epoch: 50
              precision    recall  f1-score     support
0              0.546154  0.606838  0.574899  117.000000
1              0.557692  0.495726  0.524887  117.000000
accuracy       0.551282  0.551282  0.551282    0.551282
macro avg      0.551923  0.551282  0.549893  234.000000
weighted avg   0.551923  0.551282  0.549893  234.000000
Epoch: 100
              precision    recall  f1-score     support
0              0.592308  0.658120  0.623482  117.000000
1              0.615385  0.547009  0.579186  117.000000
accuracy       0.602564  0.602564  0.602564    0.602564
macro avg      0.603846  0.602564  0.601334  234.000000
weighted avg   0.603846  0.602564  0.601334  234

### Speaker Dependent

In [15]:
fnn_train_tensor = GRUTensorDataset(fnn_train, True)
fnn_test_tensor = GRUTensorDataset(fnn_test, True)

num_of_workers = 0
batch_size = 48
valid_size = 0.1

train_indices = list(range(len(fnn_train_tensor)))
np.random.shuffle(train_indices)

test_indices = list(range(len(fnn_test_tensor)))
np.random.shuffle(test_indices)

train_loader = torch.utils.data.DataLoader(
    fnn_train_tensor, 
    batch_size=batch_size, 
    sampler=SubsetRandomSampler(train_indices),
    drop_last=True
)

test_loader = torch.utils.data.DataLoader(
    fnn_test_tensor, 
    batch_size=batch_size, 
    sampler=SubsetRandomSampler(test_indices),
    drop_last=True
)

In [16]:
gru = GRUNetSD(input_size_sd, hidden_size, output_size, n_layers)
print(gru)
criterion = nn.NLLLoss()
optimizer = torch.optim.Adam(gru.parameters(), lr=0.001)

GRUNetSD(
  (gru): GRU(691, 18, batch_first=True)
  (fc): Linear(in_features=18, out_features=2, bias=True)
  (softmax): LogSoftmax(dim=1)
)


In [17]:
n_epochs = 1001
    
test_min_loss = np.inf

for epoch in range(n_epochs):
    torch.manual_seed(42)
    train_loss = 0.0
    test_loss = 0.0
    gru.train()
    for data, target in train_loader:
        h = gru.init_hidden(batch_size)
        optimizer.zero_grad()
        output, h = gru(data, h.data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()*data.size(0)

    gru.eval()
    for data, target in test_loader:
        if data.shape[1] < 44:
            continue
        h = gru.init_hidden(batch_size)
        output, h = gru(data, h.data)
        loss = criterion(output, target)
        test_loss += loss.item()*data.size(0)

    train_loss = train_loss / len(train_loader.dataset)
    test_loss = test_loss / len(test_loader.dataset)
    
#     if(epoch%20 == 0):
# #         print(f"Epoch: {epoch+1:02}")
# #         print("\tTraining Loss: {:.6f} \Test Loss: {:.6f}".format(train_loss, test_loss))
#     if test_loss <= test_min_loss:
# #         print("Test loss decreased ({:.6f} --> {:.6f}). Saving model...".format(test_min_loss, test_loss))
# #         torch.save(gru.state_dict(), "fnnmodel.pt")
#         test_min_loss = test_loss
    if(epoch%50 == 0):
        torch.save(gru.state_dict(), "gru/gru_" + str(epoch) + "_.pt")
        print("Epoch: " + str(epoch))
        test_loader = torch.utils.data.DataLoader(fnn_test_tensor, batch_size=fnn_test_tensor.__len__())
        predictions, actuals = test_accuracy(gru, test_loader, fnn_test_tensor.__len__())
        print(pd.DataFrame(classification_report(actuals, predictions, output_dict=True)).T)
        test_loader = torch.utils.data.DataLoader(fnn_test_tensor, batch_size=batch_size, sampler=SubsetRandomSampler(test_indices))

Epoch: 0
              precision    recall  f1-score     support
0              0.583333  0.059829  0.108527  117.000000
1              0.504505  0.957265  0.660767  117.000000
accuracy       0.508547  0.508547  0.508547    0.508547
macro avg      0.543919  0.508547  0.384647  234.000000
weighted avg   0.543919  0.508547  0.384647  234.000000
Epoch: 50
              precision    recall  f1-score     support
0              0.632000  0.675214  0.652893  117.000000
1              0.651376  0.606838  0.628319  117.000000
accuracy       0.641026  0.641026  0.641026    0.641026
macro avg      0.641688  0.641026  0.640606  234.000000
weighted avg   0.641688  0.641026  0.640606  234.000000
Epoch: 100
              precision    recall  f1-score     support
0              0.631579  0.717949  0.672000  117.000000
1              0.673267  0.581197  0.623853  117.000000
accuracy       0.649573  0.649573  0.649573    0.649573
macro avg      0.652423  0.649573  0.647927  234.000000
weighted avg   0.6