In [1]:
import pandas as pd 
import pickle
import warnings
warnings.filterwarnings('ignore')
import numpy as np

from sklearn.metrics import classification_report
from sklearn import preprocessing
import torch
import torch.nn as nn
from torch.utils.data.sampler import SubsetRandomSampler
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
from torch.autograd import Variable

In [2]:
y_train = pd.read_csv("../../../text_features/bert_emo_embeddings/train_labels_bert_emo.csv")
y_test = pd.read_csv("../../../text_features/bert_emo_embeddings/train_labels_bert_emo.csv")

In [3]:
with open('../../../text_features/bert_emo_embeddings/train_bert_emo_embeddings_target_.pkl', 'rb') as f:
    x_train = pickle.load(f, encoding='latin1')

with open('../../../text_features/bert_emo_embeddings/test_bert_emo_embeddings_target_.pkl', 'rb') as f:
    x_test = pickle.load(f, encoding='latin1')

In [4]:
x_train_vals = []
for sample in x_train["embeddings"]:
    x_train_vals.append(torch.stack(sample))
    
x_test_vals = []
for sample in x_test["embeddings"]:
    x_test_vals.append(torch.stack(sample))

x_train_df = pd.DataFrame({'embeddings':x_train_vals})
x_test_df = pd.DataFrame({'embeddings':x_test_vals})

x_train_df["sarcasm"] = y_train["sarcasm"]
x_test_df["sarcasm"] = y_test["sarcasm"]
x_train_df["sarcasm"] = x_train_df["sarcasm"].astype('int').to_numpy()
x_test_df["sarcasm"] = x_test_df["sarcasm"].astype('int').to_numpy()

In [5]:
class RNNTensorDataset(Dataset):
    def __init__(self, dataframe, speaker):
        self.data = dataframe
        self.speaker = speaker

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        if self.speaker:
            features = self.data.loc[index, 'embeddings']
            a=np.empty((512,1))
            a.fill(self.data.loc[index, 'speaker_encode'])
            final_features = np.hstack((features, a))
            label = self.data.loc[index, 'sarcasm']
            return torch.from_numpy(final_features).float(), label
        else:
            features = self.data.loc[index, 'embeddings']
            label = self.data.loc[index, 'sarcasm']
            return features.float(), label
    
    def __getindexlist__(self):
        return list(self.data.index.values)
    
class RNNetSD(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, layers):
        super(RNNetSD, self).__init__()
        self.hidden_dim = hidden_dim
        self.layers = layers
        self.rnn = nn.RNN(input_dim, hidden_dim, num_layers=layers, batch_first=True, nonlinearity="relu")
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        h0 = Variable(torch.zeros(self.layers, x.size(0), self.hidden_dim))
        out, hn = self.rnn(x, h0)
        out = F.softmax(self.fc(out[:, -1, :]))
        return out
    
class RNNetSID(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, layers):
        super(RNNetSID, self).__init__()
        self.hidden_dim = hidden_dim
        self.layers = layers
        self.rnn = nn.RNN(input_dim, hidden_dim, num_layers=layers, batch_first=True, nonlinearity="relu")
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        h0 = Variable(torch.zeros(self.layers, x.size(0), self.hidden_dim))
        out, hn = self.rnn(x, h0)
        out = F.softmax(self.fc(out[:, -1, :]))
        return out
    
def evaluateRNN(rnn, review, size):
    output = rnn(review)
    return output

def categoryFromOutput(output):
    top_n, top_i = torch.max(output,dim=1)
    return top_i

def test_accuracy(rnn, loader, size):
    actuals = []
    predictions = []
    for data, target in loader:
        output = evaluateRNN(rnn, data, size)
        prediction_index = categoryFromOutput(output)
        predictions = prediction_index.tolist()
        actuals = target.tolist()
    return predictions, actuals

In [6]:
EMBEDDING_DIM_sid = 768
EMBEDDING_DIM_sd = 769
HIDDEN_DIM = 20
OUTPUT_DIM = 2
layers = 2
criterion = nn.NLLLoss()

### Speaker Independent and Context Independent

In [7]:
rnn_train_tensor = RNNTensorDataset(x_train_df[['embeddings', 'sarcasm']], False)
rnn_test_tensor = RNNTensorDataset(x_test_df[['embeddings', 'sarcasm']], False)

num_of_workers = 0
batch_size = 44
valid_size = 0.1

train_indices = list(range(len(rnn_train_tensor)))
np.random.shuffle(train_indices)

test_indices = list(range(len(rnn_test_tensor)))
np.random.shuffle(test_indices)

train_loader = torch.utils.data.DataLoader(
    rnn_train_tensor, 
    batch_size=batch_size, 
    sampler=SubsetRandomSampler(train_indices)
)

test_loader = torch.utils.data.DataLoader(
    rnn_test_tensor, 
    batch_size=batch_size, 
    sampler=SubsetRandomSampler(test_indices)
)

test_loader_epoch = torch.utils.data.DataLoader(
    rnn_test_tensor, batch_size=rnn_test_tensor.__len__())

In [8]:
rnn = RNNetSID(EMBEDDING_DIM_sid, HIDDEN_DIM, OUTPUT_DIM, layers)
print(rnn)

optimizer = torch.optim.Adam(rnn.parameters(), lr=0.001)

n_epochs = 501
    
test_min_loss = np.inf

for epoch in range(n_epochs):
    torch.manual_seed(42)
    train_loss = 0.0
    test_loss = 0.0
    rnn.train()
    for data, target in train_loader:
        optimizer.zero_grad()
        output = rnn(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()*data.size(0)

    rnn.eval()
    for data, target in test_loader:
        if data.shape[1] < 44:
            continue
        output = rnn(data)
        loss = criterion(output, target)
        test_loss += loss.item()*data.size(0)

    train_loss = train_loss / len(train_loader.dataset)
    test_loss = test_loss / len(test_loader.dataset)

    if(epoch%20 == 0):
        print("Epoch: " + str(epoch))
        predictions, actuals = test_accuracy(rnn, test_loader_epoch, rnn_test_tensor.__len__())
        print(classification_report(actuals, predictions, digits=4))

RNNetSID(
  (rnn): RNN(768, 20, num_layers=2, batch_first=True)
  (fc): Linear(in_features=20, out_features=2, bias=True)
)
Epoch: 0
              precision    recall  f1-score   support

           0     0.4688    0.3689    0.4128       122
           1     0.4690    0.5714    0.5152       119

    accuracy                         0.4689       241
   macro avg     0.4689    0.4701    0.4640       241
weighted avg     0.4689    0.4689    0.4634       241

Epoch: 20
              precision    recall  f1-score   support

           0     0.5213    0.4016    0.4537       122
           1     0.5034    0.6218    0.5564       119

    accuracy                         0.5104       241
   macro avg     0.5123    0.5117    0.5050       241
weighted avg     0.5125    0.5104    0.5044       241

Epoch: 40
              precision    recall  f1-score   support

           0     0.4969    0.6557    0.5654       122
           1     0.4750    0.3193    0.3819       119

    accuracy                 

Epoch: 480
              precision    recall  f1-score   support

           0     0.5510    0.4426    0.4909       122
           1     0.5245    0.6303    0.5725       119

    accuracy                         0.5353       241
   macro avg     0.5377    0.5364    0.5317       241
weighted avg     0.5379    0.5353    0.5312       241

Epoch: 500
              precision    recall  f1-score   support

           0     0.4795    0.5738    0.5224       122
           1     0.4526    0.3613    0.4019       119

    accuracy                         0.4689       241
   macro avg     0.4660    0.4676    0.4621       241
weighted avg     0.4662    0.4689    0.4629       241



### Speaker Dependent and Context Independent

In [9]:
x_train_speakers = pd.read_csv("../../../text_features/bert_emo_embeddings/train_data_bert_emo.csv")
x_test_speakers = pd.read_csv("../../../text_features/bert_emo_embeddings/test_data_bert_emo.csv")
x_train_speakers

Unnamed: 0,target_,target_context,speaker
0,[CLS] I've been told it's a good way to move o...,[CLS] I've been told it's a good way to move o...,25
1,"[CLS] Yeah, sure. You slept with your husband....","[CLS] Yeah, sure. You slept with your husband....",1
2,[CLS] When are you coming home? [SEP],[CLS] When are you coming home? Okay. Alright....,16
3,[CLS] Riveting. [SEP],[CLS] Riveting. Bingo. Then I lifted the cushi...,0
4,"[CLS] No, this is just part of a daredevil gam...","[CLS] No, this is just part of a daredevil gam...",2
...,...,...,...
956,"[CLS] Oh, that's sweet, but today is all about...","[CLS] Oh, that's sweet, but today is all about...",7
957,[CLS] If you wanna put a label on it. [SEP],[CLS] If you wanna put a label on it. You mean...,24
958,[CLS] That you're an alcoholic? [SEP],[CLS] That you're an alcoholic? I realized som...,3
959,[CLS] All I see is a yellow smudge. [SEP],[CLS] All I see is a yellow smudge. Now go bac...,15


In [10]:
x_train_df["speaker_encode"] = x_train_speakers["speaker"]
x_test_df["speaker_encode"] = x_test_speakers["speaker"]
x_train_df

Unnamed: 0,embeddings,sarcasm,speaker_encode
0,"[[tensor(0.4818), tensor(0.3962), tensor(-0.10...",0,25
1,"[[tensor(0.1864), tensor(-0.0127), tensor(-0.2...",0,1
2,"[[tensor(0.3204), tensor(0.1967), tensor(-0.41...",0,16
3,"[[tensor(-0.2869), tensor(0.3007), tensor(0.05...",1,0
4,"[[tensor(-0.0119), tensor(-0.0834), tensor(-0....",1,2
...,...,...,...
956,"[[tensor(-0.2093), tensor(-0.3899), tensor(0.0...",0,7
957,"[[tensor(0.0286), tensor(0.0007), tensor(-0.29...",1,24
958,"[[tensor(0.2307), tensor(1.1027), tensor(-0.85...",1,3
959,"[[tensor(-0.0889), tensor(0.5095), tensor(0.10...",0,15


In [11]:
rnn_train_tensor = RNNTensorDataset(x_train_df[['embeddings', 'sarcasm', 'speaker_encode']], True)
rnn_test_tensor = RNNTensorDataset(x_test_df[['embeddings', 'sarcasm', 'speaker_encode']], True)

num_of_workers = 0
batch_size = 44
valid_size = 0.1

train_indices = list(range(len(rnn_train_tensor)))
np.random.shuffle(train_indices)

test_indices = list(range(len(rnn_test_tensor)))
np.random.shuffle(test_indices)

train_loader = torch.utils.data.DataLoader(
    rnn_train_tensor, 
    batch_size=batch_size, 
    sampler=SubsetRandomSampler(train_indices)
)

test_loader = torch.utils.data.DataLoader(
    rnn_test_tensor, 
    batch_size=batch_size, 
    sampler=SubsetRandomSampler(test_indices)
)

test_loader_epoch = torch.utils.data.DataLoader(
    rnn_test_tensor, batch_size=rnn_test_tensor.__len__())

In [12]:
rnn = RNNetSD(EMBEDDING_DIM_sd, HIDDEN_DIM, OUTPUT_DIM, layers)
print(rnn)

optimizer = torch.optim.Adam(rnn.parameters(), lr=0.001)

n_epochs = 501
    
test_min_loss = np.inf

for epoch in range(n_epochs):
    torch.manual_seed(42)
    train_loss = 0.0
    test_loss = 0.0
    rnn.train()
    for data, target in train_loader:
        optimizer.zero_grad()
        output = rnn(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()*data.size(0)

    rnn.eval()
    for data, target in test_loader:
        if data.shape[1] < 44:
            continue
        output = rnn(data)
        loss = criterion(output, target)
        test_loss += loss.item()*data.size(0)

    train_loss = train_loss / len(train_loader.dataset)
    test_loss = test_loss / len(test_loader.dataset)

    if(epoch%20 == 0):
        print("Epoch: " + str(epoch))
        predictions, actuals = test_accuracy(rnn, test_loader_epoch, rnn_test_tensor.__len__())
        print(classification_report(actuals, predictions, digits=4))

RNNetSD(
  (rnn): RNN(769, 20, num_layers=2, batch_first=True)
  (fc): Linear(in_features=20, out_features=2, bias=True)
)
Epoch: 0
              precision    recall  f1-score   support

           0     0.5062    1.0000    0.6722       122
           1     0.0000    0.0000    0.0000       119

    accuracy                         0.5062       241
   macro avg     0.2531    0.5000    0.3361       241
weighted avg     0.2563    0.5062    0.3403       241

Epoch: 20
              precision    recall  f1-score   support

           0     0.4867    0.5984    0.5368       122
           1     0.4615    0.3529    0.4000       119

    accuracy                         0.4772       241
   macro avg     0.4741    0.4757    0.4684       241
weighted avg     0.4743    0.4772    0.4692       241

Epoch: 40
              precision    recall  f1-score   support

           0     0.5126    0.5000    0.5062       122
           1     0.5000    0.5126    0.5062       119

    accuracy                  

Epoch: 480
              precision    recall  f1-score   support

           0     0.0000    0.0000    0.0000       122
           1     0.4938    1.0000    0.6611       119

    accuracy                         0.4938       241
   macro avg     0.2469    0.5000    0.3306       241
weighted avg     0.2438    0.4938    0.3264       241

Epoch: 500
              precision    recall  f1-score   support

           0     0.0000    0.0000    0.0000       122
           1     0.4938    1.0000    0.6611       119

    accuracy                         0.4938       241
   macro avg     0.2469    0.5000    0.3306       241
weighted avg     0.2438    0.4938    0.3264       241

