In [1]:
import pandas as pd
import torch
import pickle
import warnings
warnings.filterwarnings('ignore')
import numpy as np

from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from sklearn import preprocessing

import torch.nn as nn
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
import torch.nn.functional as F

In [2]:
y_test = pd.read_csv("../text_features/bert_embeddings/test_labels.csv")

In [3]:
with open('../text_features/bert_embeddings/test_bert_embeddings_target_.pkl', 'rb') as f:
    x_test = pickle.load(f, encoding='latin1')
    
x_test_speakers = pd.read_csv("../text_features/bert_embeddings/test_data.csv")

test_labels = pd.read_csv("final_train_test/y_test_final.csv")
    
x_test_vals = []
for sample in x_test["embeddings"]:
    x_test_vals.append(sample[0].tolist())
x_test_df = pd.DataFrame({'embeddings':x_test_vals})
x_test_df["sarcasm"] = y_test["sarcasm"]
x_test_df["sarcasm"] = x_test_df["sarcasm"].astype('int').to_numpy()
x_test_df["speaker"] = x_test_speakers["speaker"]

In [4]:
x_test_speakers

Unnamed: 0,target_,target_context,speaker
0,"[CLS] Yeah, she couldn't live without the Chan...","[CLS] Yeah, she couldn't live without the Chan...",2
1,[CLS] An entire dinner to talk about your rese...,[CLS] An entire dinner to talk about your rese...,25
2,[CLS] Is it your teen years? [SEP],"[CLS] Is it your teen years? No, there's somet...",7
3,[CLS] That's funny. I always thought Howard wa...,[CLS] That's funny. I always thought Howard wa...,21
4,"[CLS] I'm sorry, what? [SEP]","[CLS] I'm sorry, what? You could have a baby, ...",15
...,...,...,...
236,[CLS] Provided he has already read and is fami...,[CLS] Provided he has already read and is fami...,25
237,[CLS] Are you still enjoying your nap? [SEP],[CLS] Are you still enjoying your nap? Hi Emma...,2
238,[CLS] Did I wake you? [SEP],[CLS] Did I wake you? Are you kidding me? Joey...,2
239,[CLS] Hey. [SEP],[CLS] Hey. I didn't know there were docks. Wen...,8


In [5]:
x_test_df

Unnamed: 0,embeddings,sarcasm,speaker
0,"[-0.05679185688495636, -0.35763344168663025, -...",0,2
1,"[0.3534734845161438, -0.37012195587158203, -0....",1,25
2,"[0.4724458158016205, -0.40140026807785034, -0....",1,7
3,"[0.20552656054496765, -0.378557950258255, -0.1...",1,21
4,"[0.17461548745632172, 0.03321833908557892, -0....",0,15
...,...,...,...
236,"[-0.09152349829673767, -0.6926221251487732, -0...",0,25
237,"[0.4652370810508728, -0.27492570877075195, -0....",1,2
238,"[0.3032662570476532, -0.21665722131729126, -0....",1,2
239,"[-0.3526242971420288, 0.08399864286184311, -0....",0,8


In [6]:
class FNNTensorDataset(Dataset):
    def __init__(self, dataframe, speaker):
        self.data = dataframe
        self.speaker = speaker

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        if self.speaker:
            features = self.data.loc[index, 'embeddings']
            features = np.array(features)
            a=np.empty((1,1))
            a.fill(self.data.loc[index, 'speaker'])
            a = a.reshape((-1,))
            final_features = np.hstack((features, a))
            label = self.data.loc[index, 'sarcasm']
            return torch.Tensor(final_features).float(), label
        else:
            features = self.data.loc[index, 'embeddings']
            label = self.data.loc[index, 'sarcasm']
            return torch.Tensor(features).float(), label
    
    def __getindexlist__(self):
        return list(self.data.index.values)
    
class FNNNetSD(nn.Module):
    def __init__(self):
        super(FNNNetSD, self).__init__()
        hidden_1 = 100
        hidden_2 = 10
        self.fc1 = nn.Linear(769, hidden_1)
        self.fc2 = nn.Linear(hidden_1, hidden_2)
        self.fc3 = nn.Linear(hidden_2, 2)

    def forward(self, x):
        x = x.view(-1, 769)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.softmax(self.fc3(x))
        return x
    
fnn_test_tensor = FNNTensorDataset(x_test_df[['embeddings', 'speaker', 'sarcasm']], True)
test_loader = torch.utils.data.DataLoader(fnn_test_tensor, batch_size=fnn_test_tensor.__len__())

In [7]:
model = FNNNetSD()
print(model)
audio_best_model = model.load_state_dict(torch.load("fnn_lexical_best_model20.pt"))
print(audio_best_model)

FNNNetSD(
  (fc1): Linear(in_features=769, out_features=100, bias=True)
  (fc2): Linear(in_features=100, out_features=10, bias=True)
  (fc3): Linear(in_features=10, out_features=2, bias=True)
)
<All keys matched successfully>


In [8]:
def predict_fnn(fnn_model, dataloader):
    prediction_list = []
    actual_list = []
    for data, target in dataloader:
        outputs = fnn_model(data)
        _, predicted = torch.max(outputs.data, 1) 
        prediction_list.append(predicted.cpu())
        actual_list.append(target)
    return prediction_list, actual_list
predictions, actuals = predict_fnn(model, test_loader)

In [9]:
predictions

[tensor([1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1,
         1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0,
         0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0,
         0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
         0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0,
         0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1,
         1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1,
         0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0,
         0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0,
         1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0,
         0])]

In [11]:
lexical_predictions = pd.DataFrame(test_labels['scene'].tolist(), list(zip(predictions[0].tolist(), actuals[0].tolist())), columns = ['scene', 'lexical_predictions', 'actuals'])

ValueError: Shape of passed values is (241, 1), indices imply (241, 3)

In [None]:
lexical_predictions

In [None]:
lexical_predictions.to_csv("lexical_predictions.csv", index = False)