# Imports

In [144]:
import pandas as pd
import torch.nn as nn
import numpy as np
import torch
import random
from torchvision import transforms
import torchvision
import torch.nn.functional as F

# Preparations for GPU

In [145]:
torch.cuda.set_device(0)
device = torch.device('cuda')

# Seeding

In [146]:
SEED = 42
torch.manual_seed(SEED)

<torch._C.Generator at 0x7f7b2bd895d0>

# Getting data sets

In [147]:
from torch.nn.utils.rnn import pad_sequence

# Number of classes, we take for granted
CLASSES = 5

data = pd.read_pickle('./data/train.pkl')

# Sort of one-hot-encode labels ( 2 -> [0, 1, 0, 0, 0])
yy = [[1. if i == y else 0. for i in range(CLASSES)] for _, y in data]
# Transform x into tensor
xx = [torch.Tensor(x) for x, _ in data]
# Zip the padded x sequence, and encoded y
data = [[x, y] for x, y in zip(xx, yy)]



# Getting the test data



In [148]:
test_data = pd.read_pickle('./data/test_no_target.pkl')
test_data = [torch.Tensor(x) for x in test_data]

# Data padding


In [149]:
from torch.nn.utils.rnn import pad_sequence

def pad_collate(batch, pad_value=0):
    xx, yy = zip(*batch)
    x_lengths = [len(single_x) for single_x in xx]

    xx_pad = pad_sequence(xx, padding_value=pad_value)

    return xx_pad, torch.tensor(yy), x_lengths, 1

# Getting the dataloaders

In [150]:
from torch.utils.data import WeightedRandomSampler

# Sets params
batch_size = 32
train_to_valid = 0.8

# Get the index for splitting data into train and valid sets
valid_index = int(train_to_valid * len(data))

trainset, validset = torch.utils.data.random_split(data, [valid_index, len(data) - valid_index], generator=torch.Generator().manual_seed(SEED))

#Data sampling
classes = {}

# Proportional to classcounts
# for _, y in trainset:
#   y = np.argmax(y)
#   if y not in classes.keys():
#     classes[y] = 1
#   else:
#     classes[y] += 1
# for key in classes.keys():
#   classes[key] = 1./classes[key]

# Custom
classes = {0: 0.3, 1: 0.5, 2: 0.8, 3: 0.9, 4: 0.8}

y_classified = [np.argmax(y) for _, y in trainset]
example_weights = [classes[e] for e in y_classified]

sampler = WeightedRandomSampler(example_weights, len(trainset))

trainloader = torch.utils.data.DataLoader(trainset, sampler=sampler, batch_size=batch_size,
                                          num_workers=2, collate_fn=pad_collate)

validloader = torch.utils.data.DataLoader(validset, batch_size=batch_size,
                                          num_workers=2, collate_fn=pad_collate)

Quick note: After testing, proportional to class counts example_weights were performing amazingly well for all classes expect the biggest one - 0, achieving accuracy of about 13%. I've decided to skip mathematical formulas and just eyebal some 'custom' values and it went well enough.

In [151]:
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_hidden, num_layers, out_size):
        super().__init__()
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        self.num_hidden = num_hidden
        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers)
        
        # Linear layers
        self.fc1 = nn.Linear(hidden_size, num_hidden)
        self.act = nn.ReLU()
        self.fc2 = nn.Linear(num_hidden, out_size)

    def init_hidden(self, batch_size):
        hidden = torch.zeros(self.num_layers, batch_size, self.hidden_size)
        state = torch.zeros(self.num_layers, batch_size, self.hidden_size)
        return hidden, state

    def forward(self, x, hidden):
        # x dimensions should be [sequence_len, batch_size, output_size]

        all_outputs, hidden = self.lstm(x, hidden)
        all_outputs = all_outputs[-1] # Take the last prediction
        x = self.act(self.fc1(all_outputs))
        x = self.fc2(x)

        return x, hidden


In [152]:
import torch.optim as optim

net = LSTM(input_size=1, hidden_size=50, num_hidden=500, num_layers=1, out_size=5).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.003)


In [153]:
net.train()

epochs = 301
stateful = False

# Training loop
for epoch in range(epochs):
    running_loss = 0.0
    for x, targets, x_len, y_len in trainloader:
        # Get inputs (x) and change the dimension to fit the designated one
        x = x.to(device).unsqueeze(2)
        targets = targets.to(device)

        # Initialize hidden and state
        hidden, state = net.init_hidden(x.size(1))
        hidden, state = hidden.to(device), state.to(device)

        # Get predictions ( X -> [batch_size, sequence_length, output_size])
        preds, _ = net(x, (hidden, state))
        preds = preds.squeeze(1)

        # Back-propagate
        optimizer.zero_grad()
        loss = criterion(preds, targets)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
    if epoch % 10 == 0:
        print(f"Epoch: {epoch}, loss: {running_loss/batch_size:.3}")

Epoch: 0, loss: 3.43
Epoch: 10, loss: 3.42
Epoch: 20, loss: 3.35
Epoch: 30, loss: 3.36
Epoch: 40, loss: 3.36
Epoch: 50, loss: 3.34
Epoch: 60, loss: 3.32
Epoch: 70, loss: 3.31
Epoch: 80, loss: 3.14
Epoch: 90, loss: 3.0
Epoch: 100, loss: 2.7
Epoch: 110, loss: 2.61
Epoch: 120, loss: 2.43
Epoch: 130, loss: 2.18
Epoch: 140, loss: 2.27
Epoch: 150, loss: 2.02
Epoch: 160, loss: 1.86
Epoch: 170, loss: 1.81
Epoch: 180, loss: 1.97
Epoch: 190, loss: 2.84
Epoch: 200, loss: 2.55
Epoch: 210, loss: 2.16
Epoch: 220, loss: 2.03
Epoch: 230, loss: 1.93
Epoch: 240, loss: 1.81
Epoch: 250, loss: 1.65
Epoch: 260, loss: 1.57
Epoch: 270, loss: 1.49
Epoch: 280, loss: 1.99
Epoch: 290, loss: 1.36
Epoch: 300, loss: 1.22


# Testing on trainset

In [157]:
net.eval()

true_preds = {0: 0, 1: 0, 2: 0, 3: 0, 4: 0}
all_preds = {0: 0, 1: 0, 2: 0, 3: 0, 4: 0}
# Training loop
for x, targets, _, __ in trainloader:        
    x = x.to(device).unsqueeze(2)
    targets = targets.to(device)
    hidden, state = net.init_hidden(x.size(1))
    hidden, state = hidden.to(device), state.to(device)
    preds, _ = net(x, (hidden, state))
    preds = preds.squeeze(1)
    _, preds = torch.max(preds, 1)
    _, targets = torch.max(targets, 1)
    for pred, target in zip(preds, targets):
      pred, target = pred.item(), target.item()
      all_preds[target] += 1
      if pred == target:
        true_preds[target] += 1
acc = {}
for classname in range(5):
  acc = true_preds[classname]/all_preds[classname]
  print(f"Accuracy of {classname}: {100.0*acc:4.2f}%")

correct_total = 0
total_total = 0
for classsname in range(5):
  correct_total += true_preds[classname]
  total_total += all_preds[classname]
acc = correct_total/total_total
print(f"Accuracy of model: {100.0*acc:4.2f}%")

Accuracy of 0: 83.65%
Accuracy of 1: 78.70%
Accuracy of 2: 21.78%
Accuracy of 3: 98.26%
Accuracy of 4: 90.00%
Accuracy of model: 90.00%


## Testing on validset

In [158]:
net.eval()

true_preds = {0: 0, 1: 0, 2: 0, 3: 0, 4: 0}
all_preds = {0: 0, 1: 0, 2: 0, 3: 0, 4: 0}
# Training loop
for x, targets, _, __ in validloader:        
    x = x.to(device).unsqueeze(2)
    targets = targets.to(device)

    hidden, state = net.init_hidden(x.size(1))
    hidden, state = hidden.to(device), state.to(device)
    preds, _ = net(x, (hidden, state))
    preds = preds.squeeze(1)
    _, preds = torch.max(preds, 1)
    _, targets = torch.max(targets, 1)
    for pred, target in zip(preds, targets):
      pred, target = pred.item(), target.item()
      all_preds[target] += 1
      if pred == target:
        true_preds[target] += 1
acc = {}
for classname in range(5):
  acc = true_preds[classname]/all_preds[classname]
  print(f"Accuracy of {classname}: {100.0*acc:4.2f}%")

correct_total = 0
total_total = 0
for classsname in range(5):
  correct_total += true_preds[classname]
  total_total += all_preds[classname]
acc = correct_total/total_total
print(f"Accuracy of model: {100.0*acc:4.2f}%")

Accuracy of 0: 70.25%
Accuracy of 1: 66.32%
Accuracy of 2: 13.79%
Accuracy of 3: 74.16%
Accuracy of 4: 71.43%
Accuracy of model: 71.43%


# Getting the testset predictions

In [159]:
net.eval()

final_predictions = []

for input in test_data:
    input = input.to(device).unsqueeze(1).unsqueeze(2)
    hidden, state = net.init_hidden(input.size(1))
    hidden, state = hidden.to(device), state.to(device)
    preds, _ = net(input, (hidden, state))
    preds = preds.squeeze()
    preds = np.argmax(preds.cpu().detach().numpy())
    final_predictions.append(preds)
  
pd.DataFrame(final_predictions).to_csv("output.csv", index=False, header=None)