In [1]:
MAX_LEN = 80
TASK = 0

In [2]:
datadir = "../data/"
trainpath = datadir+"olid-training-v1.0.tsv"
testpath = datadir+"testset-level{}.tsv".format("abc"[TASK])
labelpath = datadir+"labels-level{}.csv".format("abc"[TASK])

### Prerequisites

In [3]:
# !pip install --user wordninja
# import nltk
# nltk.download('averaged_perceptron_tagger')

In [4]:
from dataset import make_data_generator
from models import SimpleLSTM as IsaV1, SelfAttnLSTM as IsaV2, Criterion
import torch
import matplotlib.pyplot as plt
plt.show()

In [5]:
(training_set, training_gen),(validation_set, validation_gen)\
= make_data_generator(trainpath, task=TASK, val_split=0.1, batch_size=32, n_workers=4, max_len=MAX_LEN, shuffle=True)

[info] 13240 data.


HBox(children=(IntProgress(value=0, max=13240), HTML(value='')))


[info] vocab size = 16375
[info] 11916 train. 1324 valid.


In [6]:
VOCAB = (training_set.vocab, training_set.pos_vocab)
VOC_SIZE = len(VOCAB[0])
POS_SIZE = len(VOCAB[1])

In [7]:
# model = IsaV2(demb=100, demb_pos=128, dmodel=120, dff=150, \
#             voc_sz=VOC_SIZE, pos_voc_sz=POS_SIZE, max_len=MAX_LEN,\
#             out_class=(3 if TASK==2 else 2), dropout=0.5)
model = IsaV2(demb=50, demb_pos=50, dmodel=60, dff=75, \
            voc_sz=VOC_SIZE, pos_voc_sz=POS_SIZE, max_len=MAX_LEN,\
            out_class=(3 if TASK==2 else 2), dropout=0.5)

model.train()
model.cuda()

SelfAttnLSTM(
  (encoder): Sequential(
    (0): Embedding(16375, 50)
    (1): LSTM(50, 60, batch_first=True, bidirectional=True)
  )
  (pos_encoder): Sequential(
    (0): Embedding(38, 50)
    (1): LSTM(50, 60, batch_first=True, bidirectional=True)
  )
  (attention_proj): Linear(in_features=240, out_features=240, bias=True)
  (attention_softmax): Softmax()
  (dropout): Dropout(p=0.5)
  (FFN): Sequential(
    (0): Linear(in_features=240, out_features=75, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.5)
    (3): Linear(in_features=75, out_features=2, bias=True)
  )
)

In [8]:
criterion = Criterion(TASK, cuda=True)
optim = torch.optim.Adam(model.parameters(), lr=1e-3, betas=(0.9, 0.98), eps=1e-9, weight_decay=1e-4)

In [9]:
def validation(model, val_gen, criterion):
    model.eval()
    with torch.no_grad():
        first = True
        all_logits = None
        all_y = None
        
        for j, (x, pos, y) in enumerate(val_gen):
            x = x.cuda()
            pos = pos.cuda()
            y = y.cuda()

            logits = model.forward(x, pos)
            
            if first:
                all_logits = logits
                all_y = y
                first = False
            else:
                all_logits = torch.cat((all_logits, logits), dim=0)
                all_y = torch.cat((all_y, y), dim=0)
            
        loss = criterion.loss_compute(all_logits, all_y)
        accu = criterion.accu_compute(all_logits, all_y)
        f1 = criterion.f1_compute(all_logits, all_y)
    model.train()
    return loss.item(), accu, f1

In [10]:
EPOCHS = 20
print("Epoch Loss Accu ValLoss ValAccu ValF1")
for i,e in enumerate(range(EPOCHS)):
    running_loss = 0
    running_accu = 0
    running_f1 = 0
    running_step = 0
    
    for j, (x, pos, y) in enumerate(training_gen):
        x = x.cuda()
        pos = pos.cuda()
        y = y.cuda()

        logits = model.forward(x, pos)
        loss = criterion.loss_compute(logits, y)
        accu = criterion.accu_compute(logits, y)

        optim.zero_grad()
        loss.backward()
        optim.step()        
        
        running_loss += loss.item()
        running_accu += accu
        running_step += 1        
        print(loss.item(), end='\r')
        
    vloss, vaccu, vf1 = validation(model, validation_gen, criterion)
    print("{} {:.5f} {:.5f} {:.5f} {:.5f} {:.5f}".format(i, running_loss/running_step, \
                                                   running_accu/running_step, vloss, vaccu, vf1))
torch.save(model, "tmp.pkt")

Epoch Loss Accu ValLoss ValAccu ValF1
0 0.37278 0.67814 0.36109 0.70317 0.52208
1 0.34884 0.71509 0.35252 0.72054 0.59259
2 0.34034 0.72369 0.34892 0.71979 0.59692
3 0.32928 0.73112 0.33757 0.73036 0.62699
4 0.31397 0.74899 0.32460 0.73489 0.66551
5 0.29435 0.77304 0.31953 0.74622 0.68440
6 0.27825 0.78857 0.31227 0.74924 0.69317
7 0.26580 0.80052 0.30968 0.75604 0.69112
8 0.25581 0.81133 0.30731 0.75680 0.69904
9 0.24189 0.82364 0.31122 0.75982 0.71521


In [11]:
model = torch.load("tmp.pkt")
testing_set, testing_gen = make_data_generator(testpath, task=TASK, testlabel=labelpath, \
                                               batch_size=1024, n_workers=4, load_vocabs=VOCAB,\
                                               max_len=MAX_LEN, shuffle=False)

[info] 860 data.


HBox(children=(IntProgress(value=0, max=860), HTML(value='')))


[info] vocab size = 16375


In [12]:
running_loss = 0
running_accu = 0
running_f1 = 0
running_step = 0
with torch.no_grad(): # speeds up the evaluation
    model.eval() # affects dropout layer
    for j, (x, pos, y) in enumerate(testing_gen):
        x = x.cuda()
        pos = pos.cuda()
        y = y.cuda()

        logits = model.forward(x, pos)
        loss = criterion.loss_compute(logits, y)
        accu = criterion.accu_compute(logits, y)
        f1 = criterion.f1_compute(logits, y)

        running_loss += loss.item()
        running_accu += accu
        running_f1 += f1
        running_step += 1
        
print("Evaludation Loss:", running_loss/running_step, \
      "F1:", running_f1/running_step,\
      "Accu:", running_accu/running_step)

Evaludation Loss: 0.2653390169143677 F1: 0.7695749907788241 Accu: 0.8209302425384521


In [13]:
# no proj Evaludation Loss: 0.25462111830711365 F1: 0.758146574424895 Accu: 0.8081395030021667
# ya proj Evaludation Loss: 0.2653390169143677 F1: 0.7695749907788241 Accu: 0.8209302425384521