In [1]:
MAX_LEN = 80
TASK = 0

In [2]:
datadir = "../data/"
trainpath = datadir+"olid-training-v1.0.tsv"
testpath = datadir+"testset-level{}.tsv".format("abc"[TASK])
labelpath = datadir+"labels-level{}.csv".format("abc"[TASK])

### Prerequisites

In [3]:
# !pip install --user wordninja
# import nltk
# nltk.download('averaged_perceptron_tagger')

In [4]:
from dataset import make_data_generator
from models import SimpleLSTM as IsaV1, Criterion
import torch
import matplotlib.pyplot as plt
plt.show()

In [5]:
training_set, training_gen = make_data_generator(trainpath, task=TASK, batch_size=32, n_workers=4,\
                                                 max_len=MAX_LEN, shuffle=True)

[info] 13240 data.


HBox(children=(IntProgress(value=0, max=13240), HTML(value='')))


[info] vocab size = 16375


In [6]:
VOCAB = (training_set.vocab, training_set.pos_vocab)
VOC_SIZE = len(VOCAB[0])
POS_SIZE = len(VOCAB[1])

In [7]:
model = IsaV1(demb=100, demb_pos=128, dmodel=120, dff=150, \
            voc_sz=VOC_SIZE, pos_voc_sz=POS_SIZE, max_len=MAX_LEN,\
            out_class=(3 if TASK==2 else 2), dropout=0.5)

model.train()
model.cuda()

SimpleLSTM(
  (encoder): Sequential(
    (0): Embedding(16375, 100)
    (1): LSTM(100, 120, batch_first=True, bidirectional=True)
  )
  (pos_encoder): Sequential(
    (0): Embedding(38, 128)
    (1): LSTM(128, 120, batch_first=True, bidirectional=True)
  )
  (dropout): Dropout(p=0.5)
  (FFN): Sequential(
    (0): Linear(in_features=480, out_features=150, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.5)
    (3): Linear(in_features=150, out_features=2, bias=True)
  )
)

In [8]:
criterion = Criterion(TASK, cuda=True)
optim = torch.optim.Adam(model.parameters(), lr=1e-3, betas=(0.9, 0.98), eps=1e-9)

In [9]:
EPOCHS = 0

for i,e in enumerate(range(EPOCHS)):
    running_loss = 0
    running_accu = 0
    running_f1 = 0
    running_step = 0
    
    for j, (x, pos, y) in enumerate(training_gen):
        x = x.cuda()
        pos = pos.cuda()
        y = y.cuda()

        logits = model.forward(x, pos)
        loss = criterion.loss_compute(logits, y)
        accu = criterion.accu_compute(logits, y)

        optim.zero_grad()
        loss.backward()
        optim.step()        
        
        running_loss += loss.item()
        running_accu += accu
        running_step += 1        
        print(loss.item(), end='\r')
        
    print("Epoch", i, "Loss:", running_loss/running_step, "Accu:", running_accu/running_step)
# torch.save(model, "tmp.pkt")

In [10]:
model = torch.load("tmp.pkt")
testing_set, testing_gen = make_data_generator(testpath, task=TASK, testlabel=labelpath, \
                                               batch_size=1024, n_workers=4, load_vocabs=VOCAB,\
                                               max_len=MAX_LEN, shuffle=False)

[info] 860 data.


HBox(children=(IntProgress(value=0, max=860), HTML(value='')))


[info] vocab size = 16375


In [11]:
running_loss = 0
running_accu = 0
running_f1 = 0
running_step = 0
with torch.no_grad(): # speeds up the evaluation
    model.eval() # affects dropout layer
    for j, (x, pos, y) in enumerate(testing_gen):
        x = x.cuda()
        pos = pos.cuda()
        y = y.cuda()

        logits = model.forward(x, pos)
        loss = criterion.loss_compute(logits, y)
        accu = criterion.accu_compute(logits, y)
        f1 = criterion.f1_compute(logits, y)

        running_loss += loss.item()
        running_accu += accu
        running_f1 += f1
        running_step += 1
        
print("Evaludation Loss:", running_loss/running_step, \
      "F1:", running_f1/running_step,\
      "Accu:", running_accu/running_step)

Evaludation Loss: 0.8978897929191589 F1: 0.6975351313516669 Accu: 0.7639535069465637


In [12]:
# Evaludation Loss: 0.8978897929191589 F1: 0.6975351313516669 Accu: 0.7639535069465637