In [1]:
import pandas as pd 
import pickle
import warnings
warnings.filterwarnings('ignore')
import numpy as np

from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight

import torch
import torch.nn as nn
from torch.utils.data.sampler import SubsetRandomSampler
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torch.utils.data import Dataset

In [2]:
labels = pd.read_csv("../../data/scene_labels.csv")

In [3]:
labels.head(5)

Unnamed: 0,SCENE,KEY,SHOW,Sarcasm,Sarcasm_Type
0,1_10004,1_10004_u,BBT,0.0,NONE
1,1_10009,1_10009_u,BBT,0.0,NONE
2,1_1001,1_1001_u,BBT,0.0,NONE
3,1_1003,1_1003_u,BBT,1.0,PRO
4,1_10190,1_10190_u,BBT,0.0,NONE


#### Perform mean, median, max, min and sum pooling on audio feature data

In [4]:
def get_model_data(audio_features):
    model_data = pd.DataFrame(columns=['audio_feature','sarcasm','sarcasm_type'])
    for index, row in labels.iterrows():
        audio_key = row["SCENE"] + "_u.wav"
        model_data = model_data.append({'audio_feature': audio_features[audio_key],
                                    'sarcasm' : row["Sarcasm"],
                                    'sarcasm_type' : row["Sarcasm_Type"]},
                                  ignore_index=True)
    return model_data

In [5]:
def get_train_test_split(model_data, x_column, y_column, stratify_column):
    train_count = 301
    train_data = model_data.groupby(stratify_column).apply(lambda x: x.sample(n=train_count, random_state=42))
    train_index_list = train_data.index.values.tolist()
    test_data = model_data[~model_data.index.isin(train_index_list)]
    train_data.reset_index(drop=True, inplace = True)
    test_data.reset_index(drop=True, inplace = True)
    return train_data, test_data

In [6]:
class FNNTensorDataset(Dataset):
    def __init__(self, dataframe):
        self.data = dataframe

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        features = self.data.loc[index, 'padded_audio_feature']
        label = self.data.loc[index, 'sarcasm']
        return torch.from_numpy(features).float(), label
    
    def __getindexlist__(self):
        return list(self.data.index.values)
    
class FNNNet(nn.Module):
    def __init__(self):
        super(FNNNet, self).__init__()
        hidden_1 = 100
        self.fc1 = nn.Linear(18*283, hidden_1)
        self.fc2 = nn.Linear(hidden_1, 2)

    def forward(self, x):
        x = x.view(-1, 18*283)
        x = F.relu(self.fc1(x))
        x = F.softmax(self.fc2(x))
        return x
    
def predict_fnn(fnn_model, dataloader):
    prediction_list = []
    actual_list = []
    for data, target in dataloader:
        outputs = fnn_model(data)
        _, predicted = torch.max(outputs.data, 1) 
        prediction_list.append(predicted.cpu())
        actual_list.append(target)
    return prediction_list, actual_list

### Librosa

In [7]:
with open('../../audio_features/feat_dict_librosa_lld_2.pickle', 'rb') as f:
    librosa_audio_features = pickle.load(f, encoding='latin1')
    
model_data = get_model_data(librosa_audio_features)

desired_length = 18

train_data, test_data = get_train_test_split(model_data, 'audio_feature', 'sarcasm', 'sarcasm')
fnn_train = train_data.copy()
fnn_test = test_data.copy()
fnn_train.reset_index(drop=True, inplace = True)
fnn_test.reset_index(drop=True, inplace = True)

        
fnn_train['padded_audio_feature'] = fnn_train.loc[:, 'audio_feature']
for index, row in fnn_train.iterrows():
    data_array = row['padded_audio_feature']
    new_array = []
    for arr in data_array:
        if arr.shape[0] < desired_length:
            arr = np.pad(arr, (0, desired_length - arr.shape[0]), 'constant')
            new_array.append(arr)
        else:
            new_array.append(arr)
    fnn_train.at[index, "padded_audio_feature"] = np.array(new_array)

fnn_test['padded_audio_feature'] = fnn_test.loc[:, 'audio_feature']
for index, row in fnn_test.iterrows():
    data_array = row['padded_audio_feature']
    new_array = []
    for arr in data_array:
        if arr.shape[0] < desired_length:
            arr = np.pad(arr, (0, desired_length - arr.shape[0]), 'constant')
            new_array.append(arr)
        else:
            new_array.append(arr)
    fnn_test.at[index, "padded_audio_feature"] = np.array(new_array)

fnn_train["sarcasm"] = fnn_train["sarcasm"].astype('int').to_numpy()
fnn_test["sarcasm"] = fnn_test["sarcasm"].astype('int').to_numpy()

In [8]:
fnn_train_tensor = FNNTensorDataset(fnn_train)
fnn_test_tensor = FNNTensorDataset(fnn_test)

num_of_workers = 0
batch_size = 30
valid_size = 0.1

train_indices = list(range(len(fnn_train_tensor)))
np.random.shuffle(train_indices)

test_indices = list(range(len(fnn_test_tensor)))
np.random.shuffle(test_indices)

train_loader = torch.utils.data.DataLoader(
    fnn_train_tensor, 
    batch_size=batch_size, 
    sampler=SubsetRandomSampler(train_indices)
)

test_loader = torch.utils.data.DataLoader(
    fnn_test_tensor, 
    batch_size=batch_size, 
    sampler=SubsetRandomSampler(test_indices)
)

In [9]:
model = FNNNet()
print(model)

FNNNet(
  (fc1): Linear(in_features=5094, out_features=100, bias=True)
  (fc2): Linear(in_features=100, out_features=2, bias=True)
)


In [10]:
loss_fn = nn.NLLLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
test_min_loss = np.inf

for epoch in range(501):
    model.train()
    train_loss = 0.0
    test_loss = 0.0
    for inputs, target in train_loader:
        optimizer.zero_grad()
        output = model(inputs)
        loss = loss_fn(output, target)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()*inputs.size(0)

    model.eval()
    for inputs, target in test_loader:
        inputs, target = inputs, target
        output = model(inputs)
        loss = loss_fn(output, target)
        test_loss += loss.item()*inputs.size(0)

    train_loss = train_loss / len(train_loader.dataset)
    test_loss = test_loss / len(test_loader.dataset)
    
    if(epoch%50 == 0):
        torch.save(model.state_dict(), "fnn/fnn_" + str(epoch) + "_.pt")
        print("Epoch: " + str(epoch))
        test_loader = torch.utils.data.DataLoader(fnn_test_tensor, batch_size=fnn_test_tensor.__len__())
        predictions, actuals = predict_fnn(model, test_loader)
        print(classification_report(actuals[0].tolist(), predictions[0].tolist()))
        test_loader = torch.utils.data.DataLoader(fnn_test_tensor, batch_size=batch_size, sampler=SubsetRandomSampler(test_indices))

Epoch: 0
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       601
           1       0.50      1.00      0.67       601

    accuracy                           0.50      1202
   macro avg       0.25      0.50      0.33      1202
weighted avg       0.25      0.50      0.33      1202

Epoch: 50
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       601
           1       0.50      1.00      0.67       601

    accuracy                           0.50      1202
   macro avg       0.25      0.50      0.33      1202
weighted avg       0.25      0.50      0.33      1202

Epoch: 100
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       601
           1       0.50      1.00      0.67       601

    accuracy                           0.50      1202
   macro avg       0.25      0.50      0.33      1202
weighted avg       0.25      0.50      0.33 