## Imports



In [1]:
import pandas as pd
import numpy as np

import random
import pickle

import torch
import torch.nn as nn
from torch import Tensor
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.nn.utils.rnn import pad_sequence
from torch.nn.utils.rnn import pack_padded_sequence
from torch.nn.utils.rnn import pad_packed_sequence

import matplotlib.pyplot as plt

##Seeds

In [2]:
torch.manual_seed(42)  # Sets up seed for both devices
np.random.seed(42)
random.seed(42)

## Device

In [69]:
if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

print(device)

cuda


## Properties

In [34]:
batch_size = 32
epochs = 50
vocab_size = 200
embedding_dim = 10

hidden_size = 128
n_layers = 3
out_size = 5
proj_size = 3

## Load data

In [35]:
train_file = open('train.pkl', 'rb')
dataset = pickle.load(train_file)
dataset = [(i[0]+1, i[1]) for i in dataset]

random.shuffle(dataset)

train_dataset = dataset[round(len(dataset)*0.3):]
test_dataset = dataset[:round(len(dataset)*0.3)]

In [36]:
def pad_collate(batch):
  (xx, yy) = zip(*batch)
  x_lens = Tensor([len(x) for x in xx])

  xx = [Tensor(i) for i in xx]
  xx_pad = pad_sequence(xx, batch_first=True, padding_value=0)
  yy = Tensor(yy)

  return xx_pad, x_lens, yy

In [37]:
data_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True, collate_fn=pad_collate)

## Model


In [38]:
class LSTM_Seq_Classifier(nn.Module):

    def __init__(self, embedding_dim, hidden_size, num_layers, out_size):
        super().__init__()

        #params
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        self.out_size = out_size

        #embedding
        self.embedding = nn.Embedding(vocab_size, embedding_dim)

        #lstm
        self.lstm = lstm = nn.LSTM(input_size = embedding_dim, hidden_size = hidden_size, num_layers = n_layers, batch_first=True, bidirectional=True)

        #dense layer
        self.dense1 = nn.Linear(2*hidden_size, out_size)
        self.act = nn.ReLU()
        self.soft = nn.Softmax(dim=1)

    def init_hidden(self, batch_size):
        hidden = torch.zeros(2*self.num_layers, batch_size, self.hidden_size)
        state = torch.zeros(2*self.num_layers, batch_size, self.hidden_size)
        return hidden, state
    
    def forward(self, x, x_lens, hidden):
        embeddings = self.embedding(x)
        packed_embeddings = pack_padded_sequence(embeddings, x_lens, batch_first=True, enforce_sorted=False)
        packed_out, hidden = self.lstm(packed_embeddings, hidden)
        out, _ = pad_packed_sequence(packed_out, batch_first=True)
        out_forward = out[range(len(out)), x_lens - 1, :self.hidden_size]
        out_reverse = out[:, 0, self.hidden_size:]
        out_reduced = torch.cat((out_forward, out_reverse), 1)
        lin1 = self.act(self.dense1(out_reduced))
        # soft = self.soft(lin1)
        return lin1, hidden

## Training Loop

In [39]:
model = LSTM_Seq_Classifier(embedding_dim, hidden_size, n_layers, out_size).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

for epoch in range(epochs):
  model.train()

  for i, (x_padded, x_lens, y) in enumerate(data_loader):
    x_padded, x_lens, y = x_padded.to(torch.int64), x_lens.to(torch.int64), y.to(torch.int64)
    x_padded, y = x_padded.to(device), y.to(device)
    optimizer.zero_grad()

    hidden, state = model.init_hidden(x_padded.size(1))
    hidden, state = hidden.to(device), state.to(device)
    
    out, last_hidden = model(x_padded, x_lens, (hidden, state))
    

    loss = criterion(out, y)
    loss.backward()
    optimizer.step()

    if i%20 == 0:
      model.eval()

      test_loss = 0.0
      test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=True, collate_fn=pad_collate)
      for j, (x_padded, x_lens, y) in enumerate(test_loader):
        x_padded, x_lens, y = x_padded.to(torch.int64), x_lens.to(torch.int64), y.to(torch.int64)
        x_padded, y = x_padded.to(device), y.to(device)
        hidden, state = model.init_hidden(x_padded.size(0))
        hidden, state = hidden.to(device), state.to(device)
        out, last_hidden = model(x_padded, x_lens, (hidden,state))
        test_loss = test_loss + criterion(out, y)
      
      print(f'Epoch: {epoch}, i: {i}, train_loss: {loss.item():.3}, test_loss: {test_loss.item()/j+1:.3}')

      model.train()

Epoch: 0, i: 0, train_loss: 1.6, test_loss: 2.64
Epoch: 0, i: 20, train_loss: 1.29, test_loss: 2.36
Epoch: 0, i: 40, train_loss: 1.02, test_loss: 2.22
Epoch: 0, i: 60, train_loss: 1.41, test_loss: 2.33
Epoch: 1, i: 0, train_loss: 1.15, test_loss: 2.25
Epoch: 1, i: 20, train_loss: 0.998, test_loss: 2.2
Epoch: 1, i: 40, train_loss: 1.03, test_loss: 2.19
Epoch: 1, i: 60, train_loss: 1.08, test_loss: 2.21
Epoch: 2, i: 0, train_loss: 1.19, test_loss: 2.19
Epoch: 2, i: 20, train_loss: 0.899, test_loss: 2.12
Epoch: 2, i: 40, train_loss: 0.942, test_loss: 2.11
Epoch: 2, i: 60, train_loss: 1.21, test_loss: 2.21
Epoch: 3, i: 0, train_loss: 0.934, test_loss: 2.12
Epoch: 3, i: 20, train_loss: 0.855, test_loss: 2.05
Epoch: 3, i: 40, train_loss: 0.868, test_loss: 2.09
Epoch: 3, i: 60, train_loss: 1.13, test_loss: 2.29
Epoch: 4, i: 0, train_loss: 1.06, test_loss: 2.28
Epoch: 4, i: 20, train_loss: 1.36, test_loss: 2.23
Epoch: 4, i: 40, train_loss: 1.23, test_loss: 2.19
Epoch: 4, i: 60, train_loss: 1.1

KeyboardInterrupt: ignored

In [40]:
#zapis
import pickle
from google.colab import files

torch.save(model.state_dict(), 'model.pk')

files.download('model.pk')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

## TEST

In [28]:
def measure_accuracy(test_loader, model, device):
    model.eval()
    correct = 0
    total = 0

    # with torch.no_grad():
    #     for x, labels in test_loader:
    #         x = x.to(device).unsqueeze(2)
    #         hidden = model.init_hidden(x.size(0))
    #         hidden = hidden.to(device)
    #         out, _ = model(x.float(), hidden)
    #         out = out.cpu()
    #         _, predicted = torch.max(out.data, 1)
    #         total += labels.size(0)
    #         correct += (predicted == labels).sum().item()

    with torch.no_grad():
        for x_padded, x_lens, y in test_loader:
            x_padded, x_lens, y = x_padded.to(torch.int64), x_lens.to(torch.int64), y.to(torch.int64)
            x_padded, y = x_padded.to(device), y.to(device)
            hidden, state = model.init_hidden(x_padded.size(0))
            hidden, state = hidden.to(device), state.to(device)

            out, last_hidden = model(x_padded, x_lens, (hidden, state))
            out = [i == max(i) for i in out]

            for i in range(len(y)):
                total += 1
                if out[i][y[i]]:
                    correct += 1

    return 100 * correct / total

In [41]:
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=True, collate_fn=pad_collate)
measure_accuracy(test_loader, model, device)

81.40589569160997

## Model answers

In [54]:
def test_pad_collate(batch):
  xx = batch
  x_lens = Tensor([len(x) for x in xx])

  xx = [Tensor(i) for i in xx]
  xx_pad = pad_sequence(xx, batch_first=True, padding_value=0)

  return xx_pad, x_lens

In [3]:
test_file = open('test_no_target.pkl', 'rb')
test_dataset = pickle.load(test_file)
test_dataset = [(i+1) for i in test_dataset]

In [55]:
test_not_data_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False, collate_fn=test_pad_collate)

In [87]:
outs = list()
with torch.no_grad():
        for x_padded, x_lens in test_not_data_loader:
            x_padded, x_lens = x_padded.to(torch.int64), x_lens.to(torch.int64)
            x_padded = x_padded.to(device)
            hidden, state = model.init_hidden(x_padded.size(0))
            hidden, state = hidden.to(device), state.to(device)
            out, last_hidden = model(x_padded, x_lens, (hidden,state))

            for i in out:
              outs.append(i.tolist())

In [97]:
outs = Tensor(outs).argmax(1)


In [101]:
outs_n = outs.numpy()
outs_n = pd.DataFrame(outs_n)
outs_n.to_csv('sol.csv', index=False, header=False)

In [100]:
outs_n

Unnamed: 0,0
0,0
1,1
2,3
3,3
4,0
...,...
1098,2
1099,0
1100,1
1101,0
