In [1]:
import pandas as pd 
import pickle
import warnings
warnings.filterwarnings('ignore')
import numpy as np

from sklearn.metrics import classification_report
from sklearn import preprocessing
import torch
import torch.nn as nn
from torch.utils.data.sampler import SubsetRandomSampler
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torch.utils.data import Dataset

In [2]:
y_train = pd.read_csv("../../text_features/embeddings/train_labels.csv")
y_test = pd.read_csv("../../text_features/embeddings/test_labels.csv")

In [3]:
with open('../../text_features/embeddings/train_bert_embeddings_target_.pkl', 'rb') as f:
    x_train = pickle.load(f, encoding='latin1')

with open('../../text_features/embeddings/test_bert_embeddings_target_.pkl', 'rb') as f:
    x_test = pickle.load(f, encoding='latin1')

In [4]:
x_train_vals = []
for sample in x_train["embeddings"]:
    x_train_vals.append(sample[0].tolist())
    
x_test_vals = []
for sample in x_test["embeddings"]:
    x_test_vals.append(sample[0].tolist())

x_train_df = pd.DataFrame({'embeddings':x_train_vals})
x_test_df = pd.DataFrame({'embeddings':x_test_vals})

x_train_df["sarcasm"] = y_train["sarcasm"]
x_test_df["sarcasm"] = y_test["sarcasm"]
x_train_df["sarcasm"] = x_train_df["sarcasm"].astype('int').to_numpy()
x_test_df["sarcasm"] = x_test_df["sarcasm"].astype('int').to_numpy()

In [39]:
class FNNTensorDataset(Dataset):
    def __init__(self, dataframe, speaker):
        self.data = dataframe
        self.speaker = speaker

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        if self.speaker:
            features = self.data.loc[index, 'embeddings']
            features = np.array(features)
            a=np.empty((1,1))
            a.fill(self.data.loc[index, 'speaker'])
            a = a.reshape((-1,))
            final_features = np.hstack((features, a))
            label = self.data.loc[index, 'sarcasm']
            return torch.Tensor(final_features).float(), label
        else:
            features = self.data.loc[index, 'embeddings']
            label = self.data.loc[index, 'sarcasm']
            return torch.Tensor(features).float(), label
    
    def __getindexlist__(self):
        return list(self.data.index.values)

class FNNNetSID(nn.Module):
    def __init__(self):
        super(FNNNetSID, self).__init__()
        hidden_1 = 100
        hidden_2 = 10
        self.fc1 = nn.Linear(768, hidden_1)
        self.fc2 = nn.Linear(hidden_1, hidden_2)
        self.fc3 = nn.Linear(hidden_2, 2)

    def forward(self, x):
        x = x.view(-1, 768)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.softmax(self.fc3(x))
        return x
    
class FNNNetSD(nn.Module):
    def __init__(self):
        super(FNNNetSD, self).__init__()
        hidden_1 = 100
        hidden_2 = 10
        self.fc1 = nn.Linear(769, hidden_1)
        self.fc2 = nn.Linear(hidden_1, hidden_2)
        self.fc3 = nn.Linear(hidden_2, 2)

    def forward(self, x):
        x = x.view(-1, 769)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.softmax(self.fc3(x))
        return x
    
def predict_fnn(fnn_model, dataloader):
    prediction_list = []
    actual_list = []
    for data, target in dataloader:
        outputs = fnn_model(data)
        _, predicted = torch.max(outputs.data, 1) 
        prediction_list.append(predicted.cpu())
        actual_list.append(target)
    return prediction_list, actual_list

### Speaker Independent and Context Independent

In [6]:
fnn_train_tensor = FNNTensorDataset(x_train_df[['embeddings', 'sarcasm']], False)
fnn_test_tensor = FNNTensorDataset(x_test_df[['embeddings', 'sarcasm']], False)

num_of_workers = 0
batch_size = 100
valid_size = 0.2

train_indices = list(range(len(fnn_train_tensor)))
np.random.shuffle(train_indices)

test_indices = list(range(len(fnn_test_tensor)))
np.random.shuffle(test_indices)

train_loader = torch.utils.data.DataLoader(
    fnn_train_tensor, 
    batch_size=batch_size, 
    sampler=SubsetRandomSampler(train_indices)
)

test_loader = torch.utils.data.DataLoader(
    fnn_test_tensor, 
    batch_size=batch_size, 
    sampler=SubsetRandomSampler(test_indices)
)

In [7]:
model = FNNNetSID()
print(model)

FNNNetSID(
  (fc1): Linear(in_features=768, out_features=100, bias=True)
  (fc2): Linear(in_features=100, out_features=10, bias=True)
  (fc3): Linear(in_features=10, out_features=2, bias=True)
)


In [8]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
test_min_loss = np.inf

for epoch in range(101):
    model.train()
    train_loss = 0.0
    test_loss = 0.0
    for inputs, target in train_loader:
        optimizer.zero_grad()
        output = model(inputs)
        loss = loss_fn(output, target.long())
        loss.backward()
        optimizer.step()
        train_loss += loss.item()*inputs.size(0)

    model.eval()
    for inputs, target in test_loader:
        inputs, target = inputs, target
        output = model(inputs)
        loss = loss_fn(output, target)
        test_loss += loss.item()*inputs.size(0)

    train_loss = train_loss / len(train_loader.dataset)
    test_loss = test_loss / len(test_loader.dataset)
    
    if(epoch%20 == 0):
        print("Epoch: " + str(epoch))
        test_loader_test = torch.utils.data.DataLoader(fnn_test_tensor, batch_size=fnn_test_tensor.__len__())
        predictions, actuals = predict_fnn(model, test_loader_test)
        print(classification_report(actuals[0].tolist(), predictions[0].tolist(), digits=4))

Epoch: 0
              precision    recall  f1-score   support

           0     0.7143    0.4132    0.5236       121
           1     0.5848    0.8333    0.6873       120

    accuracy                         0.6224       241
   macro avg     0.6495    0.6233    0.6054       241
weighted avg     0.6498    0.6224    0.6051       241

Epoch: 20
              precision    recall  f1-score   support

           0     0.6118    0.8595    0.7148       121
           1     0.7606    0.4500    0.5654       120

    accuracy                         0.6556       241
   macro avg     0.6862    0.6548    0.6401       241
weighted avg     0.6859    0.6556    0.6404       241

Epoch: 40
              precision    recall  f1-score   support

           0     0.6818    0.6198    0.6494       121
           1     0.6489    0.7083    0.6773       120

    accuracy                         0.6639       241
   macro avg     0.6653    0.6641    0.6633       241
weighted avg     0.6654    0.6639    0.6633  

### Speaker Dependent and Context Independent

In [9]:
x_train_speakers = pd.read_csv("../../text_features/embeddings/train_data.csv")
x_test_speakers = pd.read_csv("../../text_features/embeddings/test_data.csv")
x_train_speakers

Unnamed: 0,target_,target_context,speaker
0,[CLS] I've been told it's a good way to move o...,[CLS] I've been told it's a good way to move o...,25
1,"[CLS] Yeah, sure. You slept with your husband....","[CLS] Yeah, sure. You slept with your husband....",1
2,[CLS] When are you coming home? [SEP],[CLS] When are you coming home? Okay. Alright....,16
3,[CLS] Riveting. [SEP],[CLS] Riveting. Bingo. Then I lifted the cushi...,0
4,"[CLS] No, this is just part of a daredevil gam...","[CLS] No, this is just part of a daredevil gam...",2
...,...,...,...
956,"[CLS] Oh, that's sweet, but today is all about...","[CLS] Oh, that's sweet, but today is all about...",7
957,[CLS] If you wanna put a label on it. [SEP],[CLS] If you wanna put a label on it. You mean...,24
958,[CLS] That you're an alcoholic? [SEP],[CLS] That you're an alcoholic? I realized som...,3
959,[CLS] All I see is a yellow smudge. [SEP],[CLS] All I see is a yellow smudge. Now go bac...,15


In [10]:
x_train_df["speaker"] = x_train_speakers["speaker"]
x_test_df["speaker"] = x_test_speakers["speaker"]
x_train_df

Unnamed: 0,embeddings,sarcasm,speaker
0,"[0.5315735936164856, -0.0026773586869239807, -...",0,25
1,"[0.2304050773382187, -0.5002979636192322, -0.5...",0,1
2,"[0.19821715354919434, -0.065682552754879, -0.4...",0,16
3,"[-0.574364423751831, 0.16689100861549377, -0.3...",1,0
4,"[0.36247628927230835, -0.24091212451457977, -0...",1,2
...,...,...,...
956,"[0.007841311395168304, -0.3694342374801636, -0...",0,7
957,"[0.14679118990898132, -0.5178409218788147, -0....",1,24
958,"[0.2303091436624527, 0.0991823822259903, -1.18...",1,3
959,"[-0.0032869046553969383, -0.021031878888607025...",0,15


In [40]:
fnn_train_tensor = FNNTensorDataset(x_train_df[['embeddings', 'speaker', 'sarcasm']], True)
fnn_test_tensor = FNNTensorDataset(x_test_df[['embeddings', 'speaker', 'sarcasm']], True)

num_of_workers = 0
batch_size = 100
valid_size = 0.2

train_indices = list(range(len(fnn_train_tensor)))
np.random.shuffle(train_indices)

test_indices = list(range(len(fnn_test_tensor)))
np.random.shuffle(test_indices)

train_loader = torch.utils.data.DataLoader(
    fnn_train_tensor, 
    batch_size=batch_size, 
    sampler=SubsetRandomSampler(train_indices)
)

test_loader = torch.utils.data.DataLoader(
    fnn_test_tensor, 
    batch_size=batch_size, 
    sampler=SubsetRandomSampler(test_indices)
)

In [41]:
model = FNNNetSD()
print(model)

FNNNetSD(
  (fc1): Linear(in_features=769, out_features=100, bias=True)
  (fc2): Linear(in_features=100, out_features=10, bias=True)
  (fc3): Linear(in_features=10, out_features=2, bias=True)
)


In [42]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
test_min_loss = np.inf

for epoch in range(101):
    model.train()
    train_loss = 0.0
    test_loss = 0.0
    for inputs, target in train_loader:
        optimizer.zero_grad()
        output = model(inputs)
        loss = loss_fn(output, target)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()*inputs.size(0)

    model.eval()
    for inputs, target in test_loader:
        inputs, target = inputs, target
        output = model(inputs)
        loss = loss_fn(output, target)
        test_loss += loss.item()*inputs.size(0)

    train_loss = train_loss / len(train_loader.dataset)
    test_loss = test_loss / len(test_loader.dataset)
    
    if(epoch%20 == 0):
        print("Epoch: " + str(epoch))
        test_loader_test = torch.utils.data.DataLoader(fnn_test_tensor, batch_size=fnn_test_tensor.__len__())
        predictions, actuals = predict_fnn(model, test_loader_test)
        print(classification_report(actuals[0].tolist(), predictions[0].tolist(), digits=4))

Epoch: 0
              precision    recall  f1-score   support

           0     0.6235    0.8347    0.7138       121
           1     0.7468    0.4917    0.5930       120

    accuracy                         0.6639       241
   macro avg     0.6851    0.6632    0.6534       241
weighted avg     0.6849    0.6639    0.6536       241

Epoch: 20
              precision    recall  f1-score   support

           0     0.7353    0.6198    0.6726       121
           1     0.6691    0.7750    0.7181       120

    accuracy                         0.6971       241
   macro avg     0.7022    0.6974    0.6954       241
weighted avg     0.7023    0.6971    0.6953       241

Epoch: 40
              precision    recall  f1-score   support

           0     0.6905    0.7190    0.7045       121
           1     0.7043    0.6750    0.6894       120

    accuracy                         0.6971       241
   macro avg     0.6974    0.6970    0.6969       241
weighted avg     0.6974    0.6971    0.6969  