In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.nn.utils.rnn import pack_padded_sequence
from torch.nn.utils.rnn import pad_packed_sequence
import matplotlib.pyplot as plt
import pandas as pd
import pickle

dev = "cpu"
# if torch.cuda.is_available(): dev = "cuda" 

device = torch.device(dev)

In [None]:
# Hyperparameters
train_proportion = 0.80
hidden_layer_size = 70
learning_rate = 0.0001
batch_size = 64
epochs = 10000
n_hidden_layers = 1
drop_prob = 0.5

In [None]:
with open("encoded_dataframe", "rb") as encoded_dataframe:
    encoded_df = pickle.load(encoded_dataframe)

embedding_dim = encoded_df[0][0][0].size(0)

In [None]:
# Shuffle data and split into training and testing data
train_dataset = encoded_df.sample(frac = train_proportion)
test_dataset = encoded_df.drop(train_dataset.index)

train_size = train_dataset.shape[0]
test_size = test_dataset.shape[0]

train_tensor = torch.utils.data.TensorDataset(
    torch.stack(tuple(train_dataset[0])).type(torch.float32).to(device),
    torch.stack(tuple(train_dataset[1])).to(device),
    torch.stack(tuple(train_dataset[2])).to(device))
test_tensor = torch.utils.data.TensorDataset(
    torch.stack(tuple(test_dataset[0])).type(torch.float32).to(device),
    torch.stack(tuple(test_dataset[1])).to(device),
    torch.stack(tuple(test_dataset[2])).type(torch.long).to(device))

train_loader = torch.utils.data.DataLoader(dataset = train_tensor, batch_size = batch_size, shuffle = True)
test_loader = torch.utils.data.DataLoader(dataset = test_tensor, batch_size = 1)

In [None]:
class SRN_model(nn.Module):
    def __init__(self, embedding_dim, hidden_dim, n_hidden_layers, output_size, drop_prob):

        super().__init__()
        self.srn = nn.RNN(embedding_dim, hidden_dim, n_hidden_layers, dropout=drop_prob, batch_first=True)
        self.dropout = nn.Dropout(drop_prob)
        self.full_connected = nn.Linear(hidden_dim, output_size)
 
    def forward(self, batched_sequences, sentence_lengths):
        batched_sequences=pack_padded_sequence(batched_sequences, sentence_lengths, batch_first=True, enforce_sorted=False)
        srn_out, _ = self.srn(batched_sequences)
        srn_out, _ = pad_packed_sequence(srn_out, batch_first=True)
        output = self.dropout(srn_out)
        output = self.full_connected(output)
        output = torch.gather(output, 1 ,torch.cat((sentence_lengths.unsqueeze(1), sentence_lengths.unsqueeze(1)), 1).unsqueeze(1)-1)
        return torch.squeeze(output, 1)

In [None]:
net = SRN_model(embedding_dim, hidden_layer_size, n_hidden_layers, 2, drop_prob)

num_positive = 3685
num_negative = 2106

net.to(device)
weight = torch.FloatTensor([num_positive/num_negative, num_positive/num_positive]).to(device)

# Negative log likelihood loss. Suited for classification tasks.
criterion = F.nll_loss

optimizer = optim.Adam(net.parameters(), lr=learning_rate, weight_decay=0.00001)

In [None]:
def train(net, criterion, optimizer, data, label, lengths):

    loss = 0

    # Forward
    output = net(data, lengths)
    
    # Apply output nonlinearity. Log_softmax chosen as it is suited for classification tasks
    output = (F.log_softmax(output, dim=1))
    
    loss = criterion(output, torch.squeeze(label,1), weight=weight)

    loss.backward()

    optimizer.step()

    return loss.data.item()

In [None]:
plot_loss = []
plot_correct = []

num_batches = train_size//batch_size

for e in range(epochs):
    loss = 0.

    # Trains on every training data item individually each epoch
    for data, label, lengths in train_loader:
        optimizer.zero_grad()
        loss += train(net, criterion, optimizer, data, label, lengths)

    # Evaluate proportion of the test set correctly predicted.
    correct = 0
    for data, label, lengths in test_loader:
        output = net(data, lengths)
        if (torch.argmax(output.data) == label[0][0]): correct += 1
    accuracy = correct/test_size*100

    # Append loss and accuracy results to lists for later plotting.
    plot_loss.append(loss/num_batches)
    plot_correct.append(accuracy)
    
    # Print loss and accuracy every epoch.
    print("Epoch %02d, loss = %f, accuracy = %.2f%%" % (e+1, loss / num_batches, accuracy))

In [None]:
# Plot results
plt.plot(plot_loss)
plt.xlabel('Epoch')
plt.ylabel('Avg. Loss per Epoch (on Training Set)')
plt.show()

plt.plot(plot_correct)
plt.xlabel('Epoch')
plt.ylabel('Accuracy per Epoch (on Test Set)')
plt.show()