In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import matplotlib.pyplot as plt
import pandas as pd
import pickle

dev = "cpu"
#if torch.cuda.is_available(): dev = "cuda"

device = torch.device(dev)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Hyperparameters
train_proportion = 0.80
hidden_layer_size = 70
learning_rate = 0.0001
batch_size = 64
epochs = 10000

In [4]:
with open("encoded_dataframe", "rb") as encoded_dataframe:
    encoded_df = pickle.load(encoded_dataframe)

                                                      0            1
0     [[tensor(0.0123), tensor(0.0055), tensor(0.010...  [tensor(1)]
1     [[tensor(0.2849), tensor(0.1150), tensor(0.226...  [tensor(1)]
2     [[tensor(0.2849), tensor(0.1150), tensor(0.226...  [tensor(1)]
3     [[tensor(0.0073), tensor(0.0030), tensor(0.006...  [tensor(1)]
4     [[tensor(0.0637), tensor(0.0265), tensor(0.050...  [tensor(1)]
...                                                 ...          ...
5786  [[tensor(0.1038), tensor(0.0417), tensor(0.081...  [tensor(0)]
5787  [[tensor(0.0803), tensor(0.0315), tensor(0.064...  [tensor(0)]
5788  [[tensor(0.0366), tensor(0.0157), tensor(0.028...  [tensor(1)]
5789  [[tensor(0.0015), tensor(0.0019), tensor(0.001...  [tensor(1)]
5790  [[tensor(0.2135), tensor(0.0904), tensor(0.178...  [tensor(1)]

[5791 rows x 2 columns]


In [4]:
# Shuffle data and split into training and testing data
train_dataset = encoded_df.sample(frac = train_proportion)
test_dataset = encoded_df.drop(train_dataset.index)

train_size = train_dataset.shape[0]
test_size = test_dataset.shape[0]

train_tensor = torch.utils.data.TensorDataset(torch.stack(tuple(train_dataset[0])).type(torch.float32).to(device), torch.stack(tuple(train_dataset[1])).to(device))
test_tensor = torch.utils.data.TensorDataset(torch.stack(tuple(test_dataset[0])).type(torch.float32).to(device), torch.stack(tuple(test_dataset[1])).to(device))

train_loader = torch.utils.data.DataLoader(dataset = train_tensor, batch_size = batch_size, shuffle = True)
test_loader = torch.utils.data.DataLoader(dataset = test_tensor, batch_size = 1)

In [5]:
class SRN_model(nn.Module):
    def __init__(self, num_input, num_hid, num_out):
        super().__init__()
        self.num_hid = num_hid
        self.batch_size = 1
        self.H0= nn.Parameter(torch.Tensor(num_hid))
        self.W = nn.Parameter(torch.Tensor(num_input, num_hid))
        self.U = nn.Parameter(torch.Tensor(num_hid, num_hid))
        self.hid_bias = nn.Parameter(torch.Tensor(num_hid))
        self.V = nn.Parameter(torch.Tensor(num_hid, num_out))
        self.out_bias = nn.Parameter(torch.Tensor(num_out))

        # Various initialisation schemes. Initialisation is important.
        nn.init.zeros_(self.H0)
        nn.init.xavier_normal_(self.W)
        nn.init.xavier_normal_(self.U)
        nn.init.zeros_(self.hid_bias)
        nn.init.xavier_normal_(self.V)
        nn.init.zeros_(self.out_bias)

    def init_hidden(self):
        H0 = torch.tanh(self.H0)
        return(H0.unsqueeze(0))
 
    def forward(self, seq):
        seq_size, _ = seq.size()
        h_t = self.init_hidden()
        for t in range(seq_size):
            x_t = seq[t]
            c_t = x_t @ self.W + h_t @ self.U + self.hid_bias
            h_t = torch.tanh(c_t)
        output = h_t @ self.V + self.out_bias
        return output

In [6]:
net = SRN_model(len(encoded_df[0][0][0]),hidden_layer_size,2)

# Yes, I hard coded these values. Yes, this is because I'm lazy
num_positive = 3685
num_negative = 2106

net.to(device)
weight = torch.FloatTensor([num_positive/num_negative, num_positive/num_positive]).to(device)

# Negative log likelihood loss. Suited for classification tasks.
criterion = F.nll_loss

optimizer = optim.Adam(net.parameters(), lr=learning_rate, weight_decay=0.00001)

In [7]:
def train(net, criterion, optimizer, data, label):

    loss = 0
    outputs = []

    for i in range(data.shape[0]):

        # Forward
        output = net(data[i])

        # Apply output nonlinearity. Log_softmax chosen as it is suited for classification tasks
        outputs.append(F.log_softmax(output, dim=1))
    
    loss = criterion(torch.cat(outputs, dim=0), torch.squeeze(label,1), weight=weight)
    
    loss.backward()

    optimizer.step()

    return loss.data.item()

In [8]:
plot_loss = []
plot_correct = []

num_batches = train_size//batch_size

for e in range(epochs):
    loss = 0.

    # Trains on every training data item individually each epoch
    for data, label in train_loader:
        optimizer.zero_grad()
        loss += train(net, criterion, optimizer, data, label)

    if (loss <= 0.001): break

    # Evaluate proportion of the test set correctly predicted.
    correct = 0
    for data, label in test_loader:
        output = net(data[0])
        if (torch.argmax(output.data) == label[0][0]): correct += 1
    accuracy = correct/test_size*100

    # Append loss and accuracy results to lists for later plotting.
    plot_loss.append(loss/num_batches)
    plot_correct.append(accuracy)
    
    # Print loss and accuracy every epoch.
    print("Epoch %02d, loss = %f, accuracy = %.2f%%" % (e+1, loss / num_batches, accuracy))

Epoch 01, loss = 0.702170, accuracy = 55.01%
Epoch 02, loss = 0.697316, accuracy = 52.33%


KeyboardInterrupt: 

In [None]:
# Plot results
plt.plot(plot_loss)
plt.xlabel('Epoch')
plt.ylabel('Avg. Loss per Epoch (on Training Set)')
plt.show()

plt.plot(plot_correct)
plt.xlabel('Epoch')
plt.ylabel('Accuracy per Epoch (on Test Set)')
plt.show()