In [1]:
from data_reader import DataReader
import datasets
from tweet_to_vec import TweetToVec
import utils

In [2]:
dr = DataReader('nlkt')

dr.read_dataset(datasets.binary_classes)
dr.read_dataset(datasets.ternary_classes)



100%|██████████████████████████████████| 10041/10041 [00:00<00:00, 11464.96it/s]
100%|████████████████████████████████| 10041/10041 [00:00<00:00, 3242109.81it/s]
100%|████████████████████████████████████| 1000/1000 [00:00<00:00, 11016.54it/s]
100%|██████████████████████████████████| 1000/1000 [00:00<00:00, 1844460.86it/s]
100%|██████████████████████████████████| 10041/10041 [00:00<00:00, 10916.32it/s]
100%|████████████████████████████████| 10041/10041 [00:00<00:00, 2918775.14it/s]
100%|████████████████████████████████████| 1000/1000 [00:00<00:00, 10742.67it/s]
100%|██████████████████████████████████| 1000/1000 [00:00<00:00, 3711773.45it/s]


In [3]:
embeddings = dr.read_embeddings('embeddings/kraby.txt')
method = 'fixed_length'
L = 12

t2v = TweetToVec(embeddings, method, L)

100%|██████████████████████████████████| 20067/20067 [00:00<00:00, 60998.56it/s]


In [4]:
batch_size = 32

binary_dataset = dr.get_dataset('binary')
ternary_dataset = dr.get_dataset('ternary')

vectorized_binary = t2v.vectorize_dataset(binary_dataset)
vectorized_ternary = t2v.vectorize_dataset(ternary_dataset)

batched_binary = t2v.batch_dataset(vectorized_binary, batch_size)
batched_ternary = t2v.batch_dataset(vectorized_ternary, batch_size)

In [5]:
from simple_network import SimpleNetwork

In [6]:
import torch
import torch.nn as nn
import torch.optim as optim

simple_model = nn.Sequential(
                nn.Linear(1200, 512),
                nn.ReLU(),
                nn.Linear(512, 256),
                nn.ReLU(),
                nn.Linear(256, 512),
                nn.ReLU(),
                nn.Linear(512, 2))

ce_loss = torch.nn.functional.cross_entropy

def my_optimizer_sgd(model_parameters):
    return optim.SGD(model_parameters, lr = 0.001, momentum = 0.9)

In [7]:
network = SimpleNetwork(simple_model, ce_loss)
network.train(batched_binary['training tweets'], batched_binary['training tags'], 20, my_optimizer_sgd)

After epoch 0 tot_loss = 96.83110809326172
After epoch 1 tot_loss = 85.69334411621094
After epoch 2 tot_loss = 79.9252700805664
After epoch 3 tot_loss = 73.58946990966797
After epoch 4 tot_loss = 66.43695831298828
After epoch 5 tot_loss = 58.78273391723633
After epoch 6 tot_loss = 50.78483200073242
After epoch 7 tot_loss = 42.92909622192383
After epoch 8 tot_loss = 34.87005615234375
After epoch 9 tot_loss = 27.406208038330078
After epoch 10 tot_loss = 20.51115608215332
After epoch 11 tot_loss = 15.039844512939453
After epoch 12 tot_loss = 10.889418601989746
After epoch 13 tot_loss = 8.036567687988281
After epoch 14 tot_loss = 6.148322582244873
After epoch 15 tot_loss = 4.833540439605713
After epoch 16 tot_loss = 3.94535756111145
After epoch 17 tot_loss = 3.2826082706451416
After epoch 18 tot_loss = 2.8556222915649414
After epoch 19 tot_loss = 2.5095582008361816


In [8]:
binary_predictions = network.predict(batched_binary['test tweets'])
utils.save_results(binary_predictions, 'results/binary_simple_network.txt')

In [9]:
!perl graders/evaluate1.pl results/binary_simple_network.txt

Precision = 41.67%
Recall = 37.31%
Balanced F-score = 39.37%
Accuracy = 84.60%


In [35]:
simple_model2 = nn.Sequential(
                nn.Linear(1200, 512),
                nn.ReLU(),
                nn.Linear(512, 256),
                nn.ReLU(),
                nn.Linear(256, 512),
                nn.ReLU(),
                nn.Linear(512, 3))

network2 = SimpleNetwork(simple_model2, ce_loss)
network2.train(batched_ternary['training tweets'], batched_ternary['training tags'], 70, my_optimizer_sgd)

After epoch 0 tot_loss = 120.25999450683594
After epoch 1 tot_loss = 101.39816284179688
After epoch 2 tot_loss = 94.51297760009766
After epoch 3 tot_loss = 87.89990234375
After epoch 4 tot_loss = 81.0728988647461
After epoch 5 tot_loss = 74.15080261230469
After epoch 6 tot_loss = 66.83546447753906
After epoch 7 tot_loss = 59.522762298583984
After epoch 8 tot_loss = 51.969539642333984
After epoch 9 tot_loss = 44.788692474365234
After epoch 10 tot_loss = 37.91055679321289
After epoch 11 tot_loss = 32.117454528808594
After epoch 12 tot_loss = 27.287281036376953
After epoch 13 tot_loss = 23.647613525390625
After epoch 14 tot_loss = 20.8614501953125
After epoch 15 tot_loss = 18.774599075317383
After epoch 16 tot_loss = 17.155044555664062
After epoch 17 tot_loss = 15.850600242614746
After epoch 18 tot_loss = 14.792484283447266
After epoch 19 tot_loss = 13.864879608154297
After epoch 20 tot_loss = 13.072214126586914
After epoch 21 tot_loss = 12.316146850585938
After epoch 22 tot_loss = 11.654

In [36]:
ternary_predictions = network2.predict(batched_ternary['test tweets'])
utils.save_results(ternary_predictions, 'results/ternary_simple_network.txt')

In [37]:
!perl graders/evaluate2.pl results/ternary_simple_network.txt

Micro-Average F-score = 86.80%
Macro-Average F-score = 46.37%
