In [50]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from random import shuffle
import pickle

# if torch.cuda.is_available(): dev = "cuda"
# else: dev = "cpu"
dev = "cpu"
device = torch.device(dev)

In [51]:
# Hyperparameters
train_proportion = 0.80
hidden_layer_size = 50
learning_rate = 0.001
batch_size = 64
epochs = 1000

In [52]:
# Hyperparameters - LSTM
embedding_dim = 400
hidden_dim = 50
n_layers = 2
vocab_size = 10215 # need to check

In [53]:
with open("encoded_dataframe", "rb") as encoded_dataframe:
    encoded_df = pickle.load(encoded_dataframe)

In [54]:
# Shuffle data and split into training and testing data
train_dataset = encoded_df.sample(frac = train_proportion)
test_dataset = encoded_df.drop(train_dataset.index)

train_size = train_dataset.shape[0]
test_size = test_dataset.shape[0]

train_tensor = torch.utils.data.TensorDataset(torch.stack(tuple(train_dataset[0])).type(torch.float32).to(device), torch.stack(tuple(train_dataset[1])).to(device))
test_tensor = torch.utils.data.TensorDataset(torch.stack(tuple(test_dataset[0])).type(torch.float32).to(device), torch.stack(tuple(test_dataset[1])).to(device))

train_loader = torch.utils.data.DataLoader(dataset = train_tensor, batch_size = batch_size, shuffle = True)
test_loader = torch.utils.data.DataLoader(dataset = test_tensor, batch_size = 1)

In [55]:
class SRN_model(nn.Module):
    def __init__(self, num_input, num_hid, num_out):
        super().__init__()
        self.num_hid = num_hid
        self.batch_size = 1
        self.H0= nn.Parameter(torch.Tensor(num_hid))
        self.W = nn.Parameter(torch.Tensor(num_input, num_hid))
        self.U = nn.Parameter(torch.Tensor(num_hid, num_hid))
        self.hid_bias = nn.Parameter(torch.Tensor(num_hid))
        self.V = nn.Parameter(torch.Tensor(num_hid, num_out))
        self.out_bias = nn.Parameter(torch.Tensor(num_out))

        # Various initialisation schemes. Initialisation is important.
        nn.init.zeros_(self.H0)
        nn.init.xavier_normal_(self.W)
        nn.init.xavier_normal_(self.U)
        nn.init.zeros_(self.hid_bias)
        nn.init.xavier_normal_(self.V)
        nn.init.zeros_(self.out_bias)

    def init_hidden(self):
        H0 = torch.tanh(self.H0)
        return(H0.unsqueeze(0))
 
    def forward(self, seq):
        seq_size, _ = seq.size()
        h_t = self.init_hidden()
        for t in range(seq_size):
            x_t = seq[t]
            c_t = x_t @ self.W + h_t @ self.U + self.hid_bias
            h_t = torch.tanh(c_t)
        output = h_t @ self.V + self.out_bias
        return output

In [56]:
class LSTM_Classifier(nn.Module):

    def __init__(self, embedding_dim, hidden_dim, n_layers, vocab_size, output_size, drop_prob=0.5):
     
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, n_layers, dropout=drop_prob, batch_first=True)
        self.output_size = output_size
        self.dropout = nn.Dropout(drop_prob)
        self.full_connected = nn.Linear(hidden_dim, output_size)
        self.sig = nn.Sigmoid()

        
    def init_hidden(self):

        weight = next(self.parameters()).data
        hidden = (weight.new(self.n_layers, batch_size, self.hidden_dim).zero_(),
                      weight.new(self.n_layers, batch_size, self.hidden_dim).zero_()
                 )
        return hidden
        
        
    def forward(self, sentence):
        hidden = self.init_hidden()
        embeds = self.embedding(sentence)
        batch_size = sentence.size(0)
        lstm_out, hidden = self.lstm(embeds, hidden)
        lstm_out = lstm_out.contiguous().view(-1, self.hidden_dim)
        output = self.dropout(lstm_out)
        output = self.full_connected(output)
        output = self.sig(output)
        output = output.view(batch_size, -1)
        output = output[:, -1]

        return output
    

In [57]:
# net = SRN_model(len(encoded_df[0][0][0]),hidden_layer_size,2)
net = LSTM_Classifier(embedding_dim, hidden_dim, n_layers, vocab_size, 2)

num_positive = 3685
num_negative = 2106

net.to(device)
weight = torch.FloatTensor([num_positive/num_negative, num_positive/num_positive]).to(device)

# Negative log likelihood loss. Suited for classification tasks.
criterion = F.nll_loss

optimizer = optim.Adam(net.parameters(), lr=learning_rate, weight_decay=0.00001)

In [58]:
net

LSTM_Classifier(
  (embedding): Embedding(10215, 400)
  (lstm): LSTM(400, 50, num_layers=2, batch_first=True, dropout=0.5)
  (dropout): Dropout(p=0.5, inplace=False)
  (full_connected): Linear(in_features=50, out_features=2, bias=True)
  (sig): Sigmoid()
)

In [59]:
def train(net, criterion, optimizer, data, label):

    loss = 0
    outputs = []

    for i in range(data.shape[0]):

        # Forward
        output, hidden = net(data[i])

        # Apply output nonlinearity. Log_softmax chosen as it is suited for classification tasks
        outputs.append(F.log_softmax(output, dim=1))
    
    loss = criterion(torch.cat(outputs, dim=0), torch.squeeze(label,1), weight=weight)
    
    loss.backward()

    optimizer.step()

    return loss.data.item()

In [60]:
plot_loss = []
plot_correct = []

num_batches = train_size//batch_size

for e in range(epochs):
    loss = 0.

    # Trains on every training data item individually each epoch
    for data, label in train_loader:
        optimizer.zero_grad()
        loss += train(net, criterion, optimizer, data, label)

    if (loss <= 0.001): break

    # Evaluate proportion of the test set correctly predicted.
    correct = 0
    for data, label in test_loader:
        output = net(data[0])
        if (torch.argmax(output.data) == label[0][0]): correct += 1
    accuracy = correct/test_size*100

    # Append loss and accuracy results to lists for later plotting.
    plot_loss.append(loss/num_batches)
    plot_correct.append(accuracy)
    
    # Print loss and accuracy every epoch.
    print("Epoch %02d, loss = %f, accuracy = %.2f%%" % (e+1, loss / num_batches, accuracy))

RuntimeError: Expected tensor for argument #1 'indices' to have one of the following scalar types: Long, Int; but got torch.FloatTensor instead (while checking arguments for embedding)

In [None]:
# Plot results
plt.plot(plot_loss)
plt.xlabel('Epoch')
plt.ylabel('Avg. Loss per Epoch (on Training Set)')
plt.show()

plt.plot(plot_correct)
plt.xlabel('Epoch')
plt.ylabel('Accuracy per Epoch (on Test Set)')
plt.show()