In [1]:
MAX_LEN = 80
TASK = 1

In [2]:
datadir = "../data/"
trainpath = datadir+"olid-training-v1.0.tsv"
testpath = datadir+"testset-level{}.tsv".format("abc"[TASK])
labelpath = datadir+"labels-level{}.csv".format("abc"[TASK])

### Prerequisites

In [3]:
# !pip install --user wordninja
# import nltk
# nltk.download('averaged_perceptron_tagger')

In [4]:
from dataset import make_data_generator
from models import SimpleLSTM as IsaV1, SelfAttnLSTM as IsaV2, Criterion
import torch
import matplotlib.pyplot as plt
plt.show()

In [5]:
(training_set, training_gen),(validation_set, validation_gen)\
= make_data_generator(trainpath, task=TASK, val_split=0.1, batch_size=32, n_workers=4, max_len=MAX_LEN, shuffle=True)

[info] 13240 data.


HBox(children=(IntProgress(value=0, max=13240), HTML(value='')))


[info] vocab size = 16375
[info] 11916 train. 1324 valid.


In [6]:
VOCAB = (training_set.vocab, training_set.pos_vocab)
VOC_SIZE = len(VOCAB[0])
POS_SIZE = len(VOCAB[1])

In [7]:
# model = IsaV2(demb=100, demb_pos=128, dmodel=120, dff=150, \
#             voc_sz=VOC_SIZE, pos_voc_sz=POS_SIZE, max_len=MAX_LEN,\
#             out_class=(3 if TASK==2 else 2), dropout=0.5)
# model = IsaV2(demb=50, demb_pos=50, dmodel=60, dff=75, \
#             voc_sz=VOC_SIZE, pos_voc_sz=POS_SIZE, max_len=MAX_LEN,\
#             out_class=(3 if TASK==2 else 2), dropout=0.5)
model = IsaV2(demb=100, demb_pos=100, dmodel=120, dff=150, \
            voc_sz=VOC_SIZE, pos_voc_sz=POS_SIZE, max_len=MAX_LEN,\
            out_class=(3 if TASK==2 else 2), dropout=0.5)

model.train()
model.cuda()

SelfAttnLSTM(
  (encoder): Sequential(
    (0): Embedding(16375, 100)
    (1): LSTM(100, 120, batch_first=True, bidirectional=True)
  )
  (pos_encoder): Sequential(
    (0): Embedding(38, 100)
    (1): LSTM(100, 120, batch_first=True, bidirectional=True)
  )
  (attention_proj): Linear(in_features=480, out_features=480, bias=True)
  (attention_softmax): Softmax()
  (dropout): Dropout(p=0.5)
  (FFN): Sequential(
    (0): Linear(in_features=480, out_features=150, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.5)
    (3): Linear(in_features=150, out_features=2, bias=True)
  )
)

In [8]:
criterion = Criterion(TASK, cuda=True)
optim = torch.optim.Adam(model.parameters(), lr=1e-3, betas=(0.9, 0.98), eps=1e-9, weight_decay=1e-4)

In [9]:
def validation(model, val_gen, criterion):
    model.eval()
    with torch.no_grad():
        first = True
        all_logits = None
        all_y = None
        
        for j, (x, pos, y) in enumerate(val_gen):
            x = x.cuda()
            pos = pos.cuda()
            y = y.cuda()

            logits = model.forward(x, pos)
            
            if first:
                all_logits = logits
                all_y = y
                first = False
            else:
                all_logits = torch.cat((all_logits, logits), dim=0)
                all_y = torch.cat((all_y, y), dim=0)
            
        loss = criterion.loss_compute(all_logits, all_y)
        accu = criterion.accu_compute(all_logits, all_y)
        f1 = criterion.f1_compute(all_logits, all_y)
    model.train()
    return loss.item(), accu, f1

In [10]:
EPOCHS = 7 # 10 for task A, 7 for task B
print("Epoch Loss Accu ValLoss ValAccu ValF1")
for i,e in enumerate(range(EPOCHS)):
    running_loss = 0
    running_accu = 0
    running_f1 = 0
    running_step = 0
    
    for j, (x, pos, y) in enumerate(training_gen):
        x = x.cuda()
        pos = pos.cuda()
        y = y.cuda()

        logits = model.forward(x, pos)
        loss = criterion.loss_compute(logits, y)
        accu = criterion.accu_compute(logits, y)

        optim.zero_grad()
        loss.backward()
        optim.step()        
        
        running_loss += loss.item()
        running_accu += accu
        running_step += 1        
        print(loss.item(), end='\r')
        
    vloss, vaccu, vf1 = validation(model, validation_gen, criterion)
    print("{} {:.5f} {:.5f} {:.5f} {:.5f} {:.5f}".format(i, running_loss/running_step, \
                                                   running_accu/running_step, vloss, vaccu, vf1))
torch.save(model, "tmp.pkt")

Epoch Loss Accu ValLoss ValAccu ValF1
0 0.36511 0.68440 0.34725 0.71601 0.55777
1 0.33500 0.72618 0.34395 0.72054 0.57316
2 0.32241 0.74629 0.32808 0.73338 0.61555
3 0.31047 0.75536 0.32701 0.74773 0.65140
4 0.29512 0.77195 0.32401 0.73792 0.67803
5 0.28103 0.78301 0.31962 0.74622 0.69513
6 0.26802 0.79591 0.30984 0.74622 0.69736


In [11]:
model = torch.load("tmp.pkt")
testing_set, testing_gen = make_data_generator(testpath, task=TASK, testlabel=labelpath, \
                                               batch_size=1024, n_workers=4, load_vocabs=VOCAB,\
                                               max_len=MAX_LEN, shuffle=False)

[info] 860 data.


HBox(children=(IntProgress(value=0, max=860), HTML(value='')))


[info] vocab size = 16375


In [12]:
running_loss = 0
running_accu = 0
running_f1 = 0
running_step = 0
with torch.no_grad(): # speeds up the evaluation
    model.eval() # affects dropout layer
    for j, (x, pos, y) in enumerate(testing_gen):
        x = x.cuda()
        pos = pos.cuda()
        y = y.cuda()

        logits = model.forward(x, pos)
        loss = criterion.loss_compute(logits, y)
        accu = criterion.accu_compute(logits, y)
        f1 = criterion.f1_compute(logits, y)

        running_loss += loss.item()
        running_accu += accu
        running_f1 += f1
        running_step += 1
        
print("Evaludation Loss:", running_loss/running_step, \
      "F1:", running_f1/running_step,\
      "Accu:", running_accu/running_step)

Evaludation Loss: 0.2724898159503937 F1: 0.7388543823326432 Accu: 0.7953488230705261


In [13]:
# no proj Evaludation Loss: 0.25462111830711365 F1: 0.758146574424895 Accu: 0.8081395030021667
# ya proj Evaludation Loss: 0.2653390169143677 F1: 0.7695749907788241 Accu: 0.8209302425384521