In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim

In [None]:
from data_reader import *
from tweet_to_vec import TweetToVec
from utils import str_to_vector
from abstract_classifier import AbstractClassifier

In [None]:
dr = DataReader(tokenize_with_nlkt)
train_1 = dr.read_data('data/train_text_1.txt')
tags_train_1 = dr.read_tags('data/train_tags_1.txt')
train_2 = dr.read_data('data/train_text_2.txt')
tags_train_2 = dr.read_tags('data/train_tags_2.txt')

test_1 = dr.read_data('data/test_text_1.txt')
tags_test_1 = dr.read_tags('data/test_tags_1.txt')
test_2 = dr.read_data('data/test_text_2.txt')
tags_test_2 = dr.read_tags('data/test_tags_2.txt')

In [None]:
t2v = TweetToVec('fixed_length', L=8)
t2v.read_embeddings_from_file('embeddings/emb1.txt')

In [None]:
vectors = []
for tweet in train_1:
    vec = t2v.translate_to_vec(tweet)
    print(len(vec))
    vectors.append(vec)

In [None]:
def group_into_batches(vecs, batch_size = 32):
    
    batches = []
    current_batch = []
    
    for v in vecs:
        current_batch.append(v)
        print(v.shape)
        if len(current_batch) == batch_size:
            batches.append(np.array(current_batch))
            current_batch = []
    
    if len(current_batch):
        batches.append(np.array(current_batch))
    
    return batches

In [None]:
def to_tensor(data):
    #TODO: watch out for cpu/gpu!
    return torch.from_numpy(data)

In [None]:
batched_vectors = group_into_batches(vectors)
for batch in batched_vectors:
    print(batch.shape)
train_data = [to_tensor(batch) for batch in batched_vectors]
train_data_tags = to_tensor(tags_train_1)

In [None]:
class SimpleNetwork(nn.Module):
    
    def __init__(self, model, loss):
        
        super().__init__()
        self.model = model
        self.loss = loss
    
    def forward(self, x):
        
        return self.model(x)

    def train(self, training_data, training_classes, epochs, optimizer):
        
        optimizer = optimizer(self.model.parameters(), lr = 0.1)
        
        for epoch in range(epochs):
            for (input_batch, true_clssses) in zip(training_data, training_classes):
                tensor_batch = to_tensor(input_batch)
                preds = self.model(tensor_batch)
                loss = self.loss(preds, true_classes)
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                print(f'After epoch {epoch} loss = {loss}')
        
        

In [None]:
simple_model = nn.Sequential(
                nn.Linear(800, 2))

adam_opt = optim.Adam
ce_loss = torch.nn.CrossEntropyLoss()

In [None]:
simple_network = SimpleNetwork(simple_model, ce_loss)
simple_network.train(train_1, tags_train_1, 10, adam_opt)