In [1]:
from data_reader import DataReader
import datasets
from tweet_to_vec import TweetToVec
import utils

import torch
from tqdm import tqdm
import numpy as np
from torch import nn, optim
from LSTM import LSTMModel
import torch
import torch.nn as nn
import torch.optim as optim

In [2]:
method = 'fixed_length_2d'
L = 12
batch_size = 32

In [3]:
dr = DataReader('nlkt')
dr.read_dataset(datasets.binary_classes)
dr.read_dataset(datasets.ternary_classes)
embeddings = dr.read_embeddings('embeddings/kraby.txt')
t2v = TweetToVec(embeddings, method, L)
batched_binary = t2v.vectorize_and_batch_dataset(dr.get_dataset('binary'), batch_size, True)
batched_trenary = t2v.vectorize_and_batch_dataset(dr.get_dataset('ternary'), batch_size, True)

  0%|          | 0/10041 [00:00<?, ?it/s]

100%|██████████| 10041/10041 [00:01<00:00, 5483.16it/s]
100%|██████████| 10041/10041 [00:00<00:00, 2267783.45it/s]
100%|██████████| 1000/1000 [00:00<00:00, 4889.96it/s]
100%|██████████| 1000/1000 [00:00<00:00, 1748355.15it/s]
100%|██████████| 10041/10041 [00:01<00:00, 5384.59it/s]
100%|██████████| 10041/10041 [00:00<00:00, 2241352.13it/s]
100%|██████████| 1000/1000 [00:00<00:00, 5767.02it/s]
100%|██████████| 1000/1000 [00:00<00:00, 906484.55it/s]
100%|██████████| 20067/20067 [00:00<00:00, 35771.45it/s]


In [8]:
# first comment to the model: it learned that returning 0 is a good solution and kept doing that all the time. 
# Thus, we altered the dataset to contain the same amount of results for 0, 1 and 2
class LSTMModel(nn.Module):
    def __init__(self, size = 32, embedding_dim = 100, num_layers = 3, number_of_output_classes = 2, device='cpu'):
        super(LSTMModel, self).__init__()
        self.lstm_size = size
        self.embedding_dim = embedding_dim
        self.num_layers = num_layers
        self.device = device
        
        self.lstm = nn.LSTM(
            input_size=self.embedding_dim,
            hidden_size=self.lstm_size,
            num_layers=self.num_layers,
            dropout=0.2,
        )
        self.fc = nn.Sequential(nn.Linear(self.lstm_size, number_of_output_classes))
        self.init_params()

    def forward(self, x, prev_state):
        output, state = self.lstm(x, prev_state)
        logits = self.fc(output)
        return logits, state

    def init_state(self, sequence_length):
        return (torch.zeros(self.num_layers, sequence_length, self.lstm_size).to(self.device),
                torch.zeros(self.num_layers, sequence_length, self.lstm_size).to(self.device))

    def init_params(self):

        with torch.no_grad():
            for name, p in self.named_parameters():
                if "weight" in name:
                    p.normal_(0, np.sqrt(1 / (2 * p.size(dim = 1))))
                elif "bias" in name:
                    p.zero_()
                    
    def predict(self, x):
        state_h, state_c = self.init_state(L)
        predictions, (_, _) = self(x, (state_h, state_c))
        return torch.argmax(predictions[:, -1, :], dim = 1)
    
    def train(self, training_data, training_classes, epochs):
        optimizer = optim.Adam(self.parameters(), lr=0.0001)
        scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.9)

        criterion = nn.CrossEntropyLoss()
    
        for epoch in tqdm(range(epochs)):
            tot_loss = 0
            losses = []
            for (input_batch, true_classes) in zip(training_data, training_classes):
                optimizer.zero_grad()
                state_h, state_c = self.init_state(L)
                y_pred, (state_h, state_c) = self(input_batch, (state_h, state_c))
                loss = criterion(y_pred[:, -1, :], true_classes)
                tot_loss += loss
                losses.append(float(loss))

                state_h = state_h.detach()
                state_c = state_c.detach()            

                loss.backward()
                optimizer.step()

            print(f'After epoch {epoch} tot_loss = {tot_loss}')
            # scheduler.step()
                
        

In [9]:
device = 'cpu'
model = LSTMModel()
model.to(device)
model.train(batched_binary['training tweets'], batched_binary['training tags'], 30)

  3%|▎         | 1/30 [00:12<05:57, 12.34s/it]

After epoch 0 tot_loss = 427.68035888671875


  7%|▋         | 2/30 [00:25<05:52, 12.57s/it]

After epoch 1 tot_loss = 388.9908142089844


 10%|█         | 3/30 [00:38<05:45, 12.79s/it]

After epoch 2 tot_loss = 376.66302490234375


 13%|█▎        | 4/30 [00:51<05:41, 13.15s/it]

After epoch 3 tot_loss = 320.18218994140625


 17%|█▋        | 5/30 [01:05<05:32, 13.29s/it]

After epoch 4 tot_loss = 244.76834106445312


 20%|██        | 6/30 [01:19<05:22, 13.43s/it]

After epoch 5 tot_loss = 190.25254821777344


 23%|██▎       | 7/30 [01:32<05:06, 13.31s/it]

After epoch 6 tot_loss = 160.84951782226562


 27%|██▋       | 8/30 [01:44<04:48, 13.11s/it]

After epoch 7 tot_loss = 142.71217346191406


 30%|███       | 9/30 [01:57<04:35, 13.13s/it]

After epoch 8 tot_loss = 123.35386657714844


 33%|███▎      | 10/30 [02:10<04:20, 13.01s/it]

After epoch 9 tot_loss = 115.07255554199219


 37%|███▋      | 11/30 [02:24<04:08, 13.10s/it]

After epoch 10 tot_loss = 107.65995025634766


 40%|████      | 12/30 [02:37<03:55, 13.09s/it]

After epoch 11 tot_loss = 101.9082260131836


 40%|████      | 12/30 [02:44<04:06, 13.71s/it]


KeyboardInterrupt: 

In [None]:
binary_predictions = model.predict(batched_binary['test tweets'])
utils.save_results(binary_predictions, 'results/binary_LSTM.txt')

In [None]:
!perl graders/evaluate1.pl results/binary_LSTM.txt

Precision = 25.00%
Recall = 0.75%
Balanced F-score = 1.45%
Accuracy = 86.40%
