In [1]:
import torch
from torch import nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import gluonnlp as nlp
import numpy as np
from tqdm.notebook import tqdm
import random

In [2]:
from kobert.utils import get_tokenizer
from kobert.pytorch_kobert import get_pytorch_kobert_model

In [3]:
from transformers import AdamW
from transformers.optimization import get_cosine_schedule_with_warmup

In [4]:
##GPU 사용 시
device = torch.device("cuda:0")

In [5]:
bertmodel, vocab = get_pytorch_kobert_model()

using cached model
using cached model


In [6]:
#!wget https://www.dropbox.com/s/374ftkec978br3d/ratings_train.txt?dl=1
#!wget https://www.dropbox.com/s/977gbwh542gdy94/ratings_test.txt?dl=1

In [7]:
dataset_train = nlp.data.TSVDataset("train.tsv", field_indices=[1,2], num_discard_samples=1)
dataset_test = nlp.data.TSVDataset("val.tsv", field_indices=[1,2], num_discard_samples=1)

In [8]:
tokenizer = get_tokenizer()
tok = nlp.data.BERTSPTokenizer(tokenizer, vocab, lower=False)

using cached model


In [9]:
class BERTDataset(Dataset):
    def __init__(self, dataset, sent_idx, label_idx, bert_tokenizer, max_len,
                 pad, pair):
        transform = nlp.data.BERTSentenceTransform(
            bert_tokenizer, max_seq_length=max_len, pad=pad, pair=pair)

        self.sentences = [transform([i[sent_idx]]) for i in dataset]
        self.labels = [transform([i[label_idx]]) for i in dataset]

    def __getitem__(self, i):
        return (self.sentences[i] + self.labels[i])

    def __len__(self):
        return (len(self.labels))

In [10]:
## Setting parameters
max_len = 64
batch_size = 32
warmup_ratio = 0.1
num_epochs = 300
max_grad_norm = 1
log_interval = 50
learning_rate =  5e-5

In [11]:
data_train = BERTDataset(dataset_train, 0, 1, tok, max_len, True, False)
data_test = BERTDataset(dataset_test, 0, 1, tok, max_len, True, False)

In [12]:
train_dataloader = torch.utils.data.DataLoader(data_train, batch_size=batch_size, num_workers=5)
test_dataloader = torch.utils.data.DataLoader(data_test, batch_size=batch_size, num_workers=5)

In [13]:
class Attention(nn.Module):
    def __init__(self, enc_hid_dim, dec_hid_dim):
        super().__init__()
        
        self.attn = nn.Linear((enc_hid_dim) + dec_hid_dim, dec_hid_dim)
        self.v = nn.Linear(dec_hid_dim, 1, bias = False)
        
    def forward(self, hidden, encoder_outputs):
        
        #hidden = [batch size, dec hid dim]
        #encoder_outputs = [src len, batch size, enc hid dim * 2]
        
        batch_size = encoder_outputs.shape[1]
        src_len = encoder_outputs.shape[0]
        
        #repeat decoder hidden state src_len times
        hidden = hidden.unsqueeze(1).repeat(1, src_len, 1)
        
        encoder_outputs = encoder_outputs.permute(1, 0, 2)
        
        #hidden = [batch size, src len, dec hid dim]
        #encoder_outputs = [batch size, src len, enc hid dim * 2]
        
        energy = torch.tanh(self.attn(torch.cat((hidden, encoder_outputs), dim = 2))) 
        
        #energy = [batch size, src len, dec hid dim]

        attention = self.v(energy).squeeze(2)
        
        #attention= [batch size, src len]
        
        return F.softmax(attention, dim=1)

In [131]:
class BERTSeq2Seq(nn.Module):
    def __init__(self,
                 bert, attention,
                 hidden_size = 768,
                 num_classes=2,
                 dr_rate=None,
                 params=None):
        super(BERTSeq2Seq, self).__init__()
        self.bert = bert
        self.dr_rate = dr_rate
        
        self.embed_size = 768                 
        self.embedding = nn.Embedding(len(vocab), self.embed_size)
        #self.classifier = nn.Linear(hidden_size , num_classes)
        
        self.attention = attention
        
        self.decoder = nn.GRU(self.embed_size + hidden_size, hidden_size)
        self.out = nn.Linear(hidden_size * 2 + self.embed_size, len(vocab))
        if dr_rate:
            self.dropout = nn.Dropout(p=dr_rate)
    
    def gen_attention_mask(self, token_ids, valid_length):
        attention_mask = torch.zeros_like(token_ids)
        for i, v in enumerate(valid_length):
            attention_mask[i][:v] = 1
        return attention_mask.float()

    def forward(self, token_ids, valid_length, segment_ids, output_ids, output_valid_lengths, teacher_forcing_ratio=0.0):
        attention_mask = self.gen_attention_mask(token_ids, valid_length)
        
        sequence_output, pooler = self.bert(input_ids = token_ids, token_type_ids = segment_ids.long(), attention_mask = attention_mask.float().to(token_ids.device), return_dict=False)
        if self.dr_rate:
            hidden = self.dropout(pooler)
            
        hidden = hidden                      
            
        output_id = output_ids[0,:]        
        trg_len = output_ids.shape[0]
        batch_size = output_ids.shape[1]
        trg_vocab_size = len(vocab)
        
        outputs = torch.zeros(trg_len, batch_size, trg_vocab_size).to(device)
        
        for t in range(1, trg_len):
            
            sequence_output = sequence_output.permute(1, 0, 2)
            
            #insert input token embedding, previous hidden and previous cell states
            #receive output tensor (predictions) and new hidden and cell states
            
            output_id_embedded = self.bert.embeddings.word_embeddings(output_id.unsqueeze(0))
            
            a = self.attention(hidden, sequence_output)
            
            a = a.unsqueeze(1)
        
            #a = [batch size, 1, src len]

            sequence_output = sequence_output.permute(1, 0, 2)

            #encoder_outputs = [batch size, src len, enc hid dim * 2]

            weighted = torch.bmm(a, sequence_output)

            #weighted = [batch size, 1, enc hid dim * 2]

            weighted = weighted.permute(1, 0, 2)

            #weighted = [1, batch size, enc hid dim * 2]

            rnn_input = torch.cat((output_id_embedded, weighted), dim = 2)
            
            
            output, hidden = self.decoder(rnn_input, hidden.unsqueeze(0))
            
            assert (output == hidden).all()
        
            output_id_embedded = output_id_embedded.squeeze(0)
            
            hidden = hidden.squeeze(0)
            output = output.squeeze(0)
            weighted = weighted.squeeze(0)
            

            pred = self.out(torch.cat((output, weighted, output_id_embedded), dim = 1))
            
            #place predictions in a tensor holding predictions for each token
            outputs[t] = pred
            
            #decide if we are going to use teacher forcing or not
            teacher_force = random.random() < teacher_forcing_ratio
            
            #get the highest predicted token from our predictions
            top1 = pred.argmax(1) 
            
            #if teacher forcing, use actual next token as next input
            #if not, use predicted token
            output_id = output_ids[t] if teacher_force else top1
        
        return outputs

In [132]:
attn = Attention(768, 768)
model = BERTSeq2Seq(bertmodel, attn, dr_rate=0.5).to(device)

In [133]:
# Prepare optimizer and schedule (linear warmup and decay)
no_decay = ['bias', 'LayerNorm.weight']
optimizer_grouped_parameters = [
    {'params': [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
    {'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
]

In [134]:
optimizer = AdamW(optimizer_grouped_parameters, lr=learning_rate)
loss_fn = nn.CrossEntropyLoss()

In [135]:
t_total = len(train_dataloader) * num_epochs
warmup_step = int(t_total * warmup_ratio)

In [136]:
scheduler = get_cosine_schedule_with_warmup(optimizer, num_warmup_steps=warmup_step, num_training_steps=t_total)

In [137]:
def calc_accuracy(X,Y):
    
    flatten_X = X.view(-1, out.shape[2])
    flatten_Y = Y.flatten()
    
    max_vals, max_indices = torch.max(flatten_X, 1)    
    
    token_acc = (((max_indices == flatten_Y) * (flatten_Y.flatten() != 1)).sum().data.cpu().numpy())/ (flatten_Y.flatten() != 1).sum().data.cpu()
    sentence_acc = ((X.transpose(1, 0).argmax(-1)[:,1:] != Y.transpose(1, 0)[:,1:]).sum(-1) == 0).float().mean()
    
    return token_acc, sentence_acc

In [138]:
for e in range(num_epochs):
    train_token_acc = 0.0
    train_sentence_acc = 0.0
    test_token_acc = 0.0
    test_sentence_acc = 0.0
    
    model.train()
    
    for batch_id, (token_ids, valid_length, segment_ids, output_ids, output_valid_lengths, _) in enumerate(tqdm(train_dataloader)):
        optimizer.zero_grad()
        token_ids = token_ids.long().to(device)
        segment_ids = segment_ids.long().to(device)
        valid_length= valid_length
                       
        label_ids = output_ids.transpose(1, 0).long().to(device)
        
        output_valid_lengths = output_valid_lengths
        
        out = model(token_ids, valid_length, segment_ids, label_ids, output_valid_lengths)        
                
        loss = loss_fn(out.view(-1, out.shape[2]), label_ids.flatten())
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm)
        
        optimizer.step()
        scheduler.step()  # Update learning rate schedule
        
        token_acc, sentence_acc = calc_accuracy(out, label_ids)
        
        train_token_acc += token_acc
        train_sentence_acc += sentence_acc
        
        if batch_id % log_interval == 0:
            print("epoch {} batch id {} loss {} train token acc {} train sentence acc {}".format(e+1, batch_id+1, loss.data.cpu().numpy(), train_token_acc / (batch_id+1), train_sentence_acc / (batch_id+1)))
    print("epoch {} train token acc {} train sentence acc {}".format(e+1, train_token_acc / (batch_id+1), train_sentence_acc / (batch_id+1)))
    model.eval()
    for batch_id, (token_ids, valid_length, segment_ids, output_ids, output_valid_lengths, _) in enumerate(tqdm(test_dataloader)):
        token_ids = token_ids.long().to(device)
        segment_ids = segment_ids.long().to(device)
        valid_length= valid_length
                       
        label_ids = output_ids.transpose(1, 0).long().to(device)
        
        output_valid_lengths = output_valid_lengths
        
        out = model(token_ids, valid_length, segment_ids, label_ids, output_valid_lengths)        
        
        token_acc, sentence_acc = calc_accuracy(out, label_ids)
        
        test_token_acc += token_acc
        test_sentence_acc += sentence_acc
        
    print("epoch {} test token acc {} test sentence acc {}".format(e+1, test_token_acc / (batch_id+1), test_sentence_acc / (batch_id+1)))

HBox(children=(FloatProgress(value=0.0, max=219.0), HTML(value='')))

epoch 1 batch id 1 loss 9.065226554870605 train token acc 0.0 train sentence acc 0.0
epoch 1 batch id 51 loss 8.966920852661133 train token acc 0.0 train sentence acc 0.0
epoch 1 batch id 101 loss 8.607077598571777 train token acc 0.0 train sentence acc 0.0
epoch 1 batch id 151 loss 8.054262161254883 train token acc 0.0 train sentence acc 0.0
epoch 1 batch id 201 loss 7.271828651428223 train token acc 0.0 train sentence acc 0.0

epoch 1 train token acc 0.0 train sentence acc 0.0


HBox(children=(FloatProgress(value=0.0, max=34.0), HTML(value='')))


epoch 1 test token acc 0.0 test sentence acc 0.0


HBox(children=(FloatProgress(value=0.0, max=219.0), HTML(value='')))

epoch 2 batch id 1 loss 6.795969486236572 train token acc 0.0 train sentence acc 0.0
epoch 2 batch id 51 loss 5.531278133392334 train token acc 0.0 train sentence acc 0.0
epoch 2 batch id 101 loss 5.11005163192749 train token acc 0.0 train sentence acc 0.0
epoch 2 batch id 151 loss 4.835213661193848 train token acc 0.0 train sentence acc 0.0
epoch 2 batch id 201 loss 4.316561698913574 train token acc 0.0 train sentence acc 0.0

epoch 2 train token acc 0.0 train sentence acc 0.0


HBox(children=(FloatProgress(value=0.0, max=34.0), HTML(value='')))


epoch 2 test token acc 0.0 test sentence acc 0.0


HBox(children=(FloatProgress(value=0.0, max=219.0), HTML(value='')))

epoch 3 batch id 1 loss 3.795297145843506 train token acc 0.0 train sentence acc 0.0
epoch 3 batch id 51 loss 2.3523166179656982 train token acc 0.0 train sentence acc 0.0
epoch 3 batch id 101 loss 2.2688632011413574 train token acc 0.0006508568398617577 train sentence acc 0.0
epoch 3 batch id 151 loss 2.3723011016845703 train token acc 0.003325147239725716 train sentence acc 0.0
epoch 3 batch id 201 loss 2.2746994495391846 train token acc 0.007910253641089026 train sentence acc 0.0

epoch 3 train token acc 0.011916998313999306 train sentence acc 0.0


HBox(children=(FloatProgress(value=0.0, max=34.0), HTML(value='')))


epoch 3 test token acc 0.1027179891928373 test sentence acc 0.0


HBox(children=(FloatProgress(value=0.0, max=219.0), HTML(value='')))

epoch 4 batch id 1 loss 1.9442696571350098 train token acc 0.03924646880477667 train sentence acc 0.0
epoch 4 batch id 51 loss 1.2680113315582275 train token acc 0.09050311838441036 train sentence acc 0.0
epoch 4 batch id 101 loss 1.4375646114349365 train token acc 0.10969339512025772 train sentence acc 0.0
epoch 4 batch id 151 loss 1.6619046926498413 train token acc 0.11860968709988087 train sentence acc 0.0
epoch 4 batch id 201 loss 1.7143751382827759 train token acc 0.1253262784694604 train sentence acc 0.0

epoch 4 train token acc 0.12822583035223578 train sentence acc 0.0


HBox(children=(FloatProgress(value=0.0, max=34.0), HTML(value='')))


epoch 4 test token acc 0.16234588899362065 test sentence acc 0.0


HBox(children=(FloatProgress(value=0.0, max=219.0), HTML(value='')))

epoch 5 batch id 1 loss 1.5108869075775146 train token acc 0.12558870017528534 train sentence acc 0.0
epoch 5 batch id 51 loss 1.0266401767730713 train token acc 0.15625883335037632 train sentence acc 0.0
epoch 5 batch id 101 loss 1.2279949188232422 train token acc 0.1646097377772526 train sentence acc 0.0
epoch 5 batch id 151 loss 1.4758574962615967 train token acc 0.1680787198950185 train sentence acc 0.0
epoch 5 batch id 201 loss 1.5309274196624756 train token acc 0.17096146267597837 train sentence acc 0.0

epoch 5 train token acc 0.17253439082523817 train sentence acc 0.0


HBox(children=(FloatProgress(value=0.0, max=34.0), HTML(value='')))


epoch 5 test token acc 0.19258744201129852 test sentence acc 0.0


HBox(children=(FloatProgress(value=0.0, max=219.0), HTML(value='')))

epoch 6 batch id 1 loss 1.356757640838623 train token acc 0.16640502773225307 train sentence acc 0.0
epoch 6 batch id 51 loss 0.9212541580200195 train token acc 0.1890292037209934 train sentence acc 0.0
epoch 6 batch id 101 loss 1.121208906173706 train token acc 0.1946726965724887 train sentence acc 0.0
epoch 6 batch id 151 loss 1.3677513599395752 train token acc 0.19744114170316607 train sentence acc 0.0
epoch 6 batch id 201 loss 1.4245549440383911 train token acc 0.19907444045156128 train sentence acc 0.0

epoch 6 train token acc 0.19983363114658861 train sentence acc 0.0


HBox(children=(FloatProgress(value=0.0, max=34.0), HTML(value='')))


epoch 6 test token acc 0.21540292358943536 test sentence acc 0.0


HBox(children=(FloatProgress(value=0.0, max=219.0), HTML(value='')))

epoch 7 batch id 1 loss 1.2889200448989868 train token acc 0.20565149653702974 train sentence acc 0.0
epoch 7 batch id 51 loss 0.8822018504142761 train token acc 0.2075894726554443 train sentence acc 0.0
epoch 7 batch id 101 loss 1.0492933988571167 train token acc 0.21193620129280935 train sentence acc 0.0
epoch 7 batch id 151 loss 1.3123912811279297 train token acc 0.21397568681046045 train sentence acc 0.0
epoch 7 batch id 201 loss 1.3604505062103271 train token acc 0.21483127060010382 train sentence acc 0.0

epoch 7 train token acc 0.21547586560625684 train sentence acc 0.0


HBox(children=(FloatProgress(value=0.0, max=34.0), HTML(value='')))


epoch 7 test token acc 0.21187324490507736 test sentence acc 0.0


HBox(children=(FloatProgress(value=0.0, max=219.0), HTML(value='')))

epoch 8 batch id 1 loss 1.2482857704162598 train token acc 0.1930926265195012 train sentence acc 0.0
epoch 8 batch id 51 loss 0.8447011709213257 train token acc 0.2187932768366391 train sentence acc 0.0
epoch 8 batch id 101 loss 1.0094032287597656 train token acc 0.22217617617772506 train sentence acc 0.0
epoch 8 batch id 151 loss 1.2691508531570435 train token acc 0.2238700928587454 train sentence acc 0.0
epoch 8 batch id 201 loss 1.3210158348083496 train token acc 0.22450384622108452 train sentence acc 0.0

epoch 8 train token acc 0.22483895999723919 train sentence acc 0.0


HBox(children=(FloatProgress(value=0.0, max=34.0), HTML(value='')))


epoch 8 test token acc 0.2277528242317631 test sentence acc 0.0


HBox(children=(FloatProgress(value=0.0, max=219.0), HTML(value='')))

epoch 9 batch id 1 loss 1.2166693210601807 train token acc 0.22762951906770468 train sentence acc 0.0
epoch 9 batch id 51 loss 0.7922804951667786 train token acc 0.22754561663924844 train sentence acc 0.0
epoch 9 batch id 101 loss 0.9726096987724304 train token acc 0.23141014417010997 train sentence acc 0.0
epoch 9 batch id 151 loss 1.229044795036316 train token acc 0.2318006676031178 train sentence acc 0.0
epoch 9 batch id 201 loss 1.2655280828475952 train token acc 0.2318166376998537 train sentence acc 0.0

epoch 9 train token acc 0.23231684420253554 train sentence acc 0.0


HBox(children=(FloatProgress(value=0.0, max=34.0), HTML(value='')))


epoch 9 test token acc 0.21531198800692117 test sentence acc 0.0


HBox(children=(FloatProgress(value=0.0, max=219.0), HTML(value='')))

epoch 10 batch id 1 loss 1.2147129774093628 train token acc 0.20094192028045654 train sentence acc 0.0
epoch 10 batch id 51 loss 0.7803065776824951 train token acc 0.23321878110907754 train sentence acc 0.0
epoch 10 batch id 101 loss 0.955424964427948 train token acc 0.23810688658193271 train sentence acc 0.0
epoch 10 batch id 151 loss 1.1977331638336182 train token acc 0.23887245526339934 train sentence acc 0.0
epoch 10 batch id 201 loss 1.2251269817352295 train token acc 0.23941262257725593 train sentence acc 0.0

epoch 10 train token acc 0.23995767918906544 train sentence acc 0.0


HBox(children=(FloatProgress(value=0.0, max=34.0), HTML(value='')))


epoch 10 test token acc 0.2285404018852312 test sentence acc 0.0


HBox(children=(FloatProgress(value=0.0, max=219.0), HTML(value='')))

epoch 11 batch id 1 loss 1.1117349863052368 train token acc 0.2307692365720868 train sentence acc 0.0
epoch 11 batch id 51 loss 0.7446746826171875 train token acc 0.24476060980483524 train sentence acc 0.0
epoch 11 batch id 101 loss 0.9222123026847839 train token acc 0.24764216608151166 train sentence acc 0.0
epoch 11 batch id 151 loss 1.1654942035675049 train token acc 0.24879091330773093 train sentence acc 0.0
epoch 11 batch id 201 loss 1.2560385465621948 train token acc 0.24933993519429784 train sentence acc 0.0

epoch 11 train token acc 0.2501066940221042 train sentence acc 0.0


HBox(children=(FloatProgress(value=0.0, max=34.0), HTML(value='')))


epoch 11 test token acc 0.23953089070068123 test sentence acc 0.0


HBox(children=(FloatProgress(value=0.0, max=219.0), HTML(value='')))

epoch 12 batch id 1 loss 1.0877605676651 train token acc 0.259026694111526 train sentence acc 0.0
epoch 12 batch id 51 loss 0.7238097786903381 train token acc 0.2582063151377381 train sentence acc 0.0
epoch 12 batch id 101 loss 0.9036523699760437 train token acc 0.26184180276099556 train sentence acc 0.0
epoch 12 batch id 151 loss 1.1494909524917603 train token acc 0.263581682691992 train sentence acc 0.0
epoch 12 batch id 201 loss 1.1728354692459106 train token acc 0.2646344223057397 train sentence acc 0.0

epoch 12 train token acc 0.2657396808547312 train sentence acc 0.0


HBox(children=(FloatProgress(value=0.0, max=34.0), HTML(value='')))


epoch 12 test token acc 0.257184183509226 test sentence acc 0.0


HBox(children=(FloatProgress(value=0.0, max=219.0), HTML(value='')))

epoch 13 batch id 1 loss 1.0472400188446045 train token acc 0.2700157053768635 train sentence acc 0.0
epoch 13 batch id 51 loss 0.701695442199707 train token acc 0.2775418465071376 train sentence acc 0.0
epoch 13 batch id 101 loss 0.8756144046783447 train token acc 0.2795937868678887 train sentence acc 0.0
epoch 13 batch id 151 loss 1.1134283542633057 train token acc 0.28193997223522294 train sentence acc 0.00020695364219136536
epoch 13 batch id 201 loss 1.1657183170318604 train token acc 0.28310982700071263 train sentence acc 0.000155472633196041

epoch 13 train token acc 0.2841958800212748 train sentence acc 0.0001426940580131486


HBox(children=(FloatProgress(value=0.0, max=34.0), HTML(value='')))


epoch 13 test token acc 0.2672429156715653 test sentence acc 0.0


HBox(children=(FloatProgress(value=0.0, max=219.0), HTML(value='')))

epoch 14 batch id 1 loss 1.030996322631836 train token acc 0.28100471664220095 train sentence acc 0.0
epoch 14 batch id 51 loss 0.6733645796775818 train token acc 0.29420537612510517 train sentence acc 0.0
epoch 14 batch id 101 loss 0.8628202676773071 train token acc 0.29583079973581516 train sentence acc 0.0
epoch 14 batch id 151 loss 1.102765679359436 train token acc 0.29706010934264576 train sentence acc 0.0
epoch 14 batch id 201 loss 1.1032427549362183 train token acc 0.2975410945905808 train sentence acc 0.0

epoch 14 train token acc 0.29762399699655895 train sentence acc 0.0


HBox(children=(FloatProgress(value=0.0, max=34.0), HTML(value='')))


epoch 14 test token acc 0.2954136134152684 test sentence acc 0.0009191176504828036


HBox(children=(FloatProgress(value=0.0, max=219.0), HTML(value='')))

epoch 15 batch id 1 loss 1.019868016242981 train token acc 0.3265306204557419 train sentence acc 0.0
epoch 15 batch id 51 loss 0.6719481945037842 train token acc 0.30240928103654263 train sentence acc 0.0
epoch 15 batch id 101 loss 0.8365396857261658 train token acc 0.3032771548300078 train sentence acc 0.0003094059356953949
epoch 15 batch id 151 loss 1.0775705575942993 train token acc 0.30454945757935775 train sentence acc 0.00020695364219136536
epoch 15 batch id 201 loss 1.0729990005493164 train token acc 0.30467117321040404 train sentence acc 0.000310945266392082

epoch 15 train token acc 0.3057769221279209 train sentence acc 0.0002853881160262972


HBox(children=(FloatProgress(value=0.0, max=34.0), HTML(value='')))


epoch 15 test token acc 0.2883871435393196 test sentence acc 0.0009191176504828036


HBox(children=(FloatProgress(value=0.0, max=219.0), HTML(value='')))

epoch 16 batch id 1 loss 0.9759472012519836 train token acc 0.29042386915534735 train sentence acc 0.0
epoch 16 batch id 51 loss 0.6530511975288391 train token acc 0.31149458681808456 train sentence acc 0.0
epoch 16 batch id 101 loss 0.8137127757072449 train token acc 0.31395272413005887 train sentence acc 0.0003094059356953949
epoch 16 batch id 151 loss 1.0532920360565186 train token acc 0.3146729794668576 train sentence acc 0.00020695364219136536
epoch 16 batch id 201 loss 1.0805652141571045 train token acc 0.31450409736004725 train sentence acc 0.000310945266392082

epoch 16 train token acc 0.3153175984203101 train sentence acc 0.0002853881160262972


HBox(children=(FloatProgress(value=0.0, max=34.0), HTML(value='')))


epoch 16 test token acc 0.2907458619105027 test sentence acc 0.0


HBox(children=(FloatProgress(value=0.0, max=219.0), HTML(value='')))

epoch 17 batch id 1 loss 0.9733313322067261 train token acc 0.3029827391728759 train sentence acc 0.0
epoch 17 batch id 51 loss 0.6247093677520752 train token acc 0.3217567998856562 train sentence acc 0.0006127451197244227
epoch 17 batch id 101 loss 0.7745358347892761 train token acc 0.3232068886904341 train sentence acc 0.0006188118713907897
epoch 17 batch id 151 loss 1.037583351135254 train token acc 0.3230961768823176 train sentence acc 0.0004139072843827307
epoch 17 batch id 201 loss 1.028419852256775 train token acc 0.32294114354756936 train sentence acc 0.000310945266392082

epoch 17 train token acc 0.32369651916100894 train sentence acc 0.0002853881160262972


HBox(children=(FloatProgress(value=0.0, max=34.0), HTML(value='')))


epoch 17 test token acc 0.29051429607138474 test sentence acc 0.0018382353009656072


HBox(children=(FloatProgress(value=0.0, max=219.0), HTML(value='')))

epoch 18 batch id 1 loss 0.9620262384414673 train token acc 0.3124018916860223 train sentence acc 0.0
epoch 18 batch id 51 loss 0.6082865595817566 train token acc 0.32771927015353725 train sentence acc 0.0006127451197244227
epoch 18 batch id 101 loss 0.7719690799713135 train token acc 0.3291255716709328 train sentence acc 0.0003094059356953949
epoch 18 batch id 151 loss 1.0232226848602295 train token acc 0.3300288640932544 train sentence acc 0.00020695364219136536
epoch 18 batch id 201 loss 1.0391405820846558 train token acc 0.32918747530355863 train sentence acc 0.00046641789958812296

epoch 18 train token acc 0.3300814705562765 train sentence acc 0.00042808218859136105


HBox(children=(FloatProgress(value=0.0, max=34.0), HTML(value='')))


epoch 18 test token acc 0.3103977021357209 test sentence acc 0.0


HBox(children=(FloatProgress(value=0.0, max=219.0), HTML(value='')))

epoch 19 batch id 1 loss 0.9513389468193054 train token acc 0.329670337960124 train sentence acc 0.0
epoch 19 batch id 51 loss 0.5974893569946289 train token acc 0.3397042330322495 train sentence acc 0.001838235417380929
epoch 19 batch id 101 loss 0.7420901656150818 train token acc 0.3403417591598014 train sentence acc 0.0012376237427815795
epoch 19 batch id 151 loss 0.9956182241439819 train token acc 0.3409305550211991 train sentence acc 0.0010347681818529963
epoch 19 batch id 201 loss 0.9890186786651611 train token acc 0.3407643104524616 train sentence acc 0.0009328357991762459

epoch 19 train token acc 0.3413271926158452 train sentence acc 0.0009988583624362946


HBox(children=(FloatProgress(value=0.0, max=34.0), HTML(value='')))


epoch 19 test token acc 0.3177245295681881 test sentence acc 0.0009191176504828036


HBox(children=(FloatProgress(value=0.0, max=219.0), HTML(value='')))

epoch 20 batch id 1 loss 0.92276930809021 train token acc 0.3485086429864168 train sentence acc 0.0
epoch 20 batch id 51 loss 0.5960212349891663 train token acc 0.3476344631943742 train sentence acc 0.0006127451197244227
epoch 20 batch id 101 loss 0.745288074016571 train token acc 0.3489642937651531 train sentence acc 0.0006188118713907897
epoch 20 batch id 151 loss 0.9650366306304932 train token acc 0.3505589421406783 train sentence acc 0.0004139072843827307
epoch 20 batch id 201 loss 0.9751166105270386 train token acc 0.3503414093984745 train sentence acc 0.000310945266392082

epoch 20 train token acc 0.3508307389669401 train sentence acc 0.00042808218859136105


HBox(children=(FloatProgress(value=0.0, max=34.0), HTML(value='')))


epoch 20 test token acc 0.33788874824120496 test sentence acc 0.0


HBox(children=(FloatProgress(value=0.0, max=219.0), HTML(value='')))

epoch 21 batch id 1 loss 0.9114807844161987 train token acc 0.3406593492254615 train sentence acc 0.0
epoch 21 batch id 51 loss 0.5607390403747559 train token acc 0.36465661894466145 train sentence acc 0.0012254902394488454
epoch 21 batch id 101 loss 0.7115365862846375 train token acc 0.36434146750598484 train sentence acc 0.0009282177779823542
epoch 21 batch id 151 loss 0.9740627408027649 train token acc 0.36547723356696055 train sentence acc 0.0008278145687654614
epoch 21 batch id 201 loss 0.9761787056922913 train token acc 0.36349310374364663 train sentence acc 0.0009328357991762459

epoch 21 train token acc 0.36405654544367977 train sentence acc 0.0008561643771827221


HBox(children=(FloatProgress(value=0.0, max=34.0), HTML(value='')))


epoch 21 test token acc 0.3427150470396394 test sentence acc 0.0009191176504828036


HBox(children=(FloatProgress(value=0.0, max=219.0), HTML(value='')))

epoch 22 batch id 1 loss 0.8917921781539917 train token acc 0.3437990667298436 train sentence acc 0.0
epoch 22 batch id 51 loss 0.5596989393234253 train token acc 0.37809164597260236 train sentence acc 0.0012254902394488454
epoch 22 batch id 101 loss 0.73955899477005 train token acc 0.37672392787102904 train sentence acc 0.0018564355559647083
epoch 22 batch id 151 loss 0.9608380794525146 train token acc 0.37783630607674357 train sentence acc 0.001448675524443388
epoch 22 batch id 201 loss 0.9378842115402222 train token acc 0.37791895080677496 train sentence acc 0.0015547263901680708

epoch 22 train token acc 0.37902765349988915 train sentence acc 0.0014269405510276556


HBox(children=(FloatProgress(value=0.0, max=34.0), HTML(value='')))


epoch 22 test token acc 0.3488092871608815 test sentence acc 0.0009191176504828036


HBox(children=(FloatProgress(value=0.0, max=219.0), HTML(value='')))

epoch 23 batch id 1 loss 0.8777759671211243 train token acc 0.3390894904732704 train sentence acc 0.0
epoch 23 batch id 51 loss 0.5566834211349487 train token acc 0.3877584770203148 train sentence acc 0.001838235417380929
epoch 23 batch id 101 loss 0.6822925806045532 train token acc 0.3886842254680664 train sentence acc 0.0015470297075808048
epoch 23 batch id 151 loss 0.9212672710418701 train token acc 0.3900325272258507 train sentence acc 0.0016556291375309229
epoch 23 batch id 201 loss 0.9032666683197021 train token acc 0.3885486504840271 train sentence acc 0.0015547263901680708

epoch 23 train token acc 0.3897211428739122 train sentence acc 0.0017123287543654442


HBox(children=(FloatProgress(value=0.0, max=34.0), HTML(value='')))


epoch 23 test token acc 0.36510747613604455 test sentence acc 0.0009191176504828036


HBox(children=(FloatProgress(value=0.0, max=219.0), HTML(value='')))

epoch 24 batch id 1 loss 0.8762887716293335 train token acc 0.376766100525856 train sentence acc 0.0
epoch 24 batch id 51 loss 0.5264749526977539 train token acc 0.4051745060452388 train sentence acc 0.0012254902394488454
epoch 24 batch id 101 loss 0.659697949886322 train token acc 0.40211057530181243 train sentence acc 0.0015470297075808048
epoch 24 batch id 151 loss 0.9095542430877686 train token acc 0.403074059959205 train sentence acc 0.0016556291375309229
epoch 24 batch id 201 loss 0.8860249519348145 train token acc 0.40196428843078885 train sentence acc 0.0023320894688367844

epoch 24 train token acc 0.4031958348478178 train sentence acc 0.00242579891346395


HBox(children=(FloatProgress(value=0.0, max=34.0), HTML(value='')))


epoch 24 test token acc 0.3861577119370101 test sentence acc 0.0018382353009656072


HBox(children=(FloatProgress(value=0.0, max=219.0), HTML(value='')))

epoch 25 batch id 1 loss 0.8802366852760315 train token acc 0.3579277954995632 train sentence acc 0.0
epoch 25 batch id 51 loss 0.5077252984046936 train token acc 0.40884866279677723 train sentence acc 0.003676470834761858
epoch 25 batch id 101 loss 0.6377174258232117 train token acc 0.4117180918629217 train sentence acc 0.004331683274358511
epoch 25 batch id 151 loss 0.9114893674850464 train token acc 0.41514761609392276 train sentence acc 0.004552979953587055
epoch 25 batch id 201 loss 0.8726415634155273 train token acc 0.41380768378765614 train sentence acc 0.004975124262273312

epoch 25 train token acc 0.4146241908877042 train sentence acc 0.0048515978269279


HBox(children=(FloatProgress(value=0.0, max=34.0), HTML(value='')))


epoch 25 test token acc 0.40059531759128303 test sentence acc 0.014705882407724857


HBox(children=(FloatProgress(value=0.0, max=219.0), HTML(value='')))

epoch 26 batch id 1 loss 0.8525104522705078 train token acc 0.3626373717561364 train sentence acc 0.0
epoch 26 batch id 51 loss 0.4954870045185089 train token acc 0.43038363443851907 train sentence acc 0.00857843179255724
epoch 26 batch id 101 loss 0.6220730543136597 train token acc 0.43134780664292005 train sentence acc 0.01113861333578825
epoch 26 batch id 151 loss 0.8993209600448608 train token acc 0.4300246906485285 train sentence acc 0.008692053146660328
epoch 26 batch id 201 loss 0.8763247728347778 train token acc 0.4272336115775658 train sentence acc 0.00777363171800971

epoch 26 train token acc 0.4283783312065136 train sentence acc 0.007610349450260401


HBox(children=(FloatProgress(value=0.0, max=34.0), HTML(value='')))


epoch 26 test token acc 0.40415760247817484 test sentence acc 0.011029412038624287


HBox(children=(FloatProgress(value=0.0, max=219.0), HTML(value='')))

epoch 27 batch id 1 loss 0.8309921026229858 train token acc 0.400313981808722 train sentence acc 0.0
epoch 27 batch id 51 loss 0.47384336590766907 train token acc 0.4414325930365343 train sentence acc 0.011642157100141048
epoch 27 batch id 101 loss 0.6125805974006653 train token acc 0.44156424341709494 train sentence acc 0.015160891227424145
epoch 27 batch id 151 loss 0.8695425987243652 train token acc 0.4437769766463682 train sentence acc 0.013038079254329205
epoch 27 batch id 201 loss 0.8274043202400208 train token acc 0.443075741053355 train sentence acc 0.014303482137620449

epoch 27 train token acc 0.44367369159803083 train sentence acc 0.013746194541454315


HBox(children=(FloatProgress(value=0.0, max=34.0), HTML(value='')))


epoch 27 test token acc 0.41932525651817043 test sentence acc 0.014705882407724857


HBox(children=(FloatProgress(value=0.0, max=219.0), HTML(value='')))

epoch 28 batch id 1 loss 0.841300368309021 train token acc 0.37519624177366495 train sentence acc 0.0
epoch 28 batch id 51 loss 0.4606000483036041 train token acc 0.45803840827041614 train sentence acc 0.01776960864663124
epoch 28 batch id 101 loss 0.5841841697692871 train token acc 0.4585754462497051 train sentence acc 0.024443069472908974
epoch 28 batch id 151 loss 0.8551368117332458 train token acc 0.4601833044037852 train sentence acc 0.021937085315585136
epoch 28 batch id 201 loss 0.8193954229354858 train token acc 0.459341340240749 train sentence acc 0.02363184094429016

epoch 28 train token acc 0.4605306607865639 train sentence acc 0.022735919803380966


HBox(children=(FloatProgress(value=0.0, max=34.0), HTML(value='')))


epoch 28 test token acc 0.4312418991295785 test sentence acc 0.021139705553650856


HBox(children=(FloatProgress(value=0.0, max=219.0), HTML(value='')))

epoch 29 batch id 1 loss 0.8150801658630371 train token acc 0.4207221455872059 train sentence acc 0.0
epoch 29 batch id 51 loss 0.4413900375366211 train token acc 0.4756827874282631 train sentence acc 0.02083333395421505
epoch 29 batch id 101 loss 0.5496856570243835 train token acc 0.4743297300589649 train sentence acc 0.02939356304705143
epoch 29 batch id 151 loss 0.8283250331878662 train token acc 0.47508574097065737 train sentence acc 0.02773178741335869
epoch 29 batch id 201 loss 0.8082219362258911 train token acc 0.47235293282643054 train sentence acc 0.029073381796479225

epoch 29 train token acc 0.4727237396033642 train sentence acc 0.02744482271373272


HBox(children=(FloatProgress(value=0.0, max=34.0), HTML(value='')))


epoch 29 test token acc 0.43487699513378386 test sentence acc 0.02389705926179886


HBox(children=(FloatProgress(value=0.0, max=219.0), HTML(value='')))

epoch 30 batch id 1 loss 0.8247953653335571 train token acc 0.3642072305083275 train sentence acc 0.0
epoch 30 batch id 51 loss 0.4326833486557007 train token acc 0.4863648668374391 train sentence acc 0.02818627469241619
epoch 30 batch id 101 loss 0.5506258010864258 train token acc 0.482882440876271 train sentence acc 0.032487623393535614
epoch 30 batch id 151 loss 0.8235305547714233 train token acc 0.48365611164824457 train sentence acc 0.030215231701731682
epoch 30 batch id 201 loss 0.7852671146392822 train token acc 0.48150651250439996 train sentence acc 0.03156094625592232

epoch 30 train token acc 0.4824363682908112 train sentence acc 0.030441397801041603


HBox(children=(FloatProgress(value=0.0, max=34.0), HTML(value='')))


epoch 30 test token acc 0.4501114686170374 test sentence acc 0.024816175922751427


HBox(children=(FloatProgress(value=0.0, max=219.0), HTML(value='')))

epoch 31 batch id 1 loss 0.7821338772773743 train token acc 0.4065934168174863 train sentence acc 0.0
epoch 31 batch id 51 loss 0.4233091473579407 train token acc 0.5005332175824865 train sentence acc 0.03125
epoch 31 batch id 101 loss 0.5211725831031799 train token acc 0.49497305113582474 train sentence acc 0.0358910895884037
epoch 31 batch id 151 loss 0.8037993907928467 train token acc 0.49509835241991557 train sentence acc 0.0339403972029686
epoch 31 batch id 201 loss 0.7564810514450073 train token acc 0.4916221422047137 train sentence acc 0.033271145075559616

epoch 31 train token acc 0.4918774907837609 train sentence acc 0.0318683385848999


HBox(children=(FloatProgress(value=0.0, max=34.0), HTML(value='')))


epoch 31 test token acc 0.45452647023450804 test sentence acc 0.025735294446349144


HBox(children=(FloatProgress(value=0.0, max=219.0), HTML(value='')))

epoch 32 batch id 1 loss 0.7545213103294373 train token acc 0.43171115685254335 train sentence acc 0.0
epoch 32 batch id 51 loss 0.4088675081729889 train token acc 0.4996357921752896 train sentence acc 0.03125
epoch 32 batch id 101 loss 0.511702299118042 train token acc 0.5003680658393014 train sentence acc 0.03650990128517151
epoch 32 batch id 151 loss 0.8069686889648438 train token acc 0.5009832404845314 train sentence acc 0.03414735198020935
epoch 32 batch id 201 loss 0.7240219116210938 train token acc 0.49912093837272875 train sentence acc 0.035136815160512924

epoch 32 train token acc 0.4993934687280168 train sentence acc 0.03443683311343193


HBox(children=(FloatProgress(value=0.0, max=34.0), HTML(value='')))


epoch 32 test token acc 0.46692496062229005 test sentence acc 0.024816175922751427


HBox(children=(FloatProgress(value=0.0, max=219.0), HTML(value='')))

epoch 33 batch id 1 loss 0.7948117256164551 train token acc 0.4379905918613076 train sentence acc 0.0
epoch 33 batch id 51 loss 0.4095352590084076 train token acc 0.5063454864852532 train sentence acc 0.03492647036910057
epoch 33 batch id 101 loss 0.5187591910362244 train token acc 0.5062149738740767 train sentence acc 0.04022277146577835
epoch 33 batch id 151 loss 0.7961127161979675 train token acc 0.5095817309924531 train sentence acc 0.03745860978960991
epoch 33 batch id 201 loss 0.699302077293396 train token acc 0.5099520216445759 train sentence acc 0.0399564653635025

epoch 33 train token acc 0.5113192071891539 train sentence acc 0.03876521810889244


HBox(children=(FloatProgress(value=0.0, max=34.0), HTML(value='')))


epoch 33 test token acc 0.47673896157442974 test sentence acc 0.028492646291851997


HBox(children=(FloatProgress(value=0.0, max=219.0), HTML(value='')))

epoch 34 batch id 1 loss 0.7417526245117188 train token acc 0.45996861439198256 train sentence acc 0.0
epoch 34 batch id 51 loss 0.4231113791465759 train token acc 0.528373462606824 train sentence acc 0.03737745061516762
epoch 34 batch id 101 loss 0.5312628746032715 train token acc 0.5259514585826638 train sentence acc 0.04517326503992081
epoch 34 batch id 151 loss 0.7923459410667419 train token acc 0.5250378895052925 train sentence acc 0.04139072820544243
epoch 34 batch id 201 loss 0.7066652774810791 train token acc 0.5229883074102488 train sentence acc 0.0399564653635025

epoch 34 train token acc 0.5239586117400971 train sentence acc 0.03933599218726158


HBox(children=(FloatProgress(value=0.0, max=34.0), HTML(value='')))


epoch 34 test token acc 0.4839684408301871 test sentence acc 0.028492646291851997


HBox(children=(FloatProgress(value=0.0, max=219.0), HTML(value='')))

epoch 35 batch id 1 loss 0.7528502941131592 train token acc 0.4489796031266451 train sentence acc 0.0
epoch 35 batch id 51 loss 0.3893051743507385 train token acc 0.5358089539747429 train sentence acc 0.04473039507865906
epoch 35 batch id 101 loss 0.49458047747612 train token acc 0.5332444105896693 train sentence acc 0.047029703855514526
epoch 35 batch id 151 loss 0.7742671966552734 train token acc 0.5358408870289584 train sentence acc 0.04366721957921982
epoch 35 batch id 201 loss 0.6834273338317871 train token acc 0.5335181173561512 train sentence acc 0.04462064430117607

epoch 35 train token acc 0.5344157229803851 train sentence acc 0.04418759047985077


HBox(children=(FloatProgress(value=0.0, max=34.0), HTML(value='')))


epoch 35 test token acc 0.4829144397209508 test sentence acc 0.03216911852359772


HBox(children=(FloatProgress(value=0.0, max=219.0), HTML(value='')))

epoch 36 batch id 1 loss 0.7184334993362427 train token acc 0.4583987556397915 train sentence acc 0.0
epoch 36 batch id 51 loss 0.3881373405456543 train token acc 0.5418877661908411 train sentence acc 0.04227941483259201
epoch 36 batch id 101 loss 0.46825772523880005 train token acc 0.5392889479615514 train sentence acc 0.0491955429315567
epoch 36 batch id 151 loss 0.7846871614456177 train token acc 0.5407900541707787 train sentence acc 0.04470198601484299
epoch 36 batch id 201 loss 0.6685759425163269 train token acc 0.5397299745610095 train sentence acc 0.04617537185549736

epoch 36 train token acc 0.5410473101323717 train sentence acc 0.045852359384298325


HBox(children=(FloatProgress(value=0.0, max=34.0), HTML(value='')))


epoch 36 test token acc 0.4897346591323559 test sentence acc 0.029411764815449715


HBox(children=(FloatProgress(value=0.0, max=219.0), HTML(value='')))

epoch 37 batch id 1 loss 0.7118141651153564 train token acc 0.4678179081529379 train sentence acc 0.03125
epoch 37 batch id 51 loss 0.39449959993362427 train token acc 0.5417914148169004 train sentence acc 0.050245098769664764
epoch 37 batch id 101 loss 0.4602295756340027 train token acc 0.5418287980206551 train sentence acc 0.05445544421672821
epoch 37 batch id 151 loss 0.7785297632217407 train token acc 0.5428619387571928 train sentence acc 0.05070364102721214
epoch 37 batch id 201 loss 0.6523447036743164 train token acc 0.5411481929691601 train sentence acc 0.05223880335688591

epoch 37 train token acc 0.5425970039881136 train sentence acc 0.051607683300971985


HBox(children=(FloatProgress(value=0.0, max=34.0), HTML(value='')))


epoch 37 test token acc 0.4917094401624875 test sentence acc 0.030330883339047432


HBox(children=(FloatProgress(value=0.0, max=219.0), HTML(value='')))

epoch 38 batch id 1 loss 0.723460853099823 train token acc 0.4615384731441736 train sentence acc 0.0
epoch 38 batch id 51 loss 0.36999163031578064 train token acc 0.5490003271694934 train sentence acc 0.050245098769664764
epoch 38 batch id 101 loss 0.4614155888557434 train token acc 0.5483408116926854 train sentence acc 0.05631187930703163
epoch 38 batch id 151 loss 0.7728212475776672 train token acc 0.550809656564356 train sentence acc 0.053187087178230286
epoch 38 batch id 201 loss 0.6311085820198059 train token acc 0.5486822828942381 train sentence acc 0.05519278347492218

epoch 38 train token acc 0.5499674841154528 train sentence acc 0.054842084646224976


HBox(children=(FloatProgress(value=0.0, max=34.0), HTML(value='')))


epoch 38 test token acc 0.4914819538997322 test sentence acc 0.03400735184550285


HBox(children=(FloatProgress(value=0.0, max=219.0), HTML(value='')))

epoch 39 batch id 1 loss 0.7097795009613037 train token acc 0.4662480494007468 train sentence acc 0.03125
epoch 39 batch id 51 loss 0.3530501425266266 train token acc 0.5582385382765684 train sentence acc 0.06188725680112839
epoch 39 batch id 101 loss 0.4692060947418213 train token acc 0.5545566981476224 train sentence acc 0.06528465449810028
epoch 39 batch id 151 loss 0.7464849948883057 train token acc 0.5547387031969362 train sentence acc 0.060430463403463364
epoch 39 batch id 201 loss 0.6521381735801697 train token acc 0.5526931287468276 train sentence acc 0.060167908668518066

epoch 39 train token acc 0.5534072235284041 train sentence acc 0.0596461147069931


HBox(children=(FloatProgress(value=0.0, max=34.0), HTML(value='')))


epoch 39 test token acc 0.49283082297359426 test sentence acc 0.03584558889269829


HBox(children=(FloatProgress(value=0.0, max=219.0), HTML(value='')))

epoch 40 batch id 1 loss 0.6905007362365723 train token acc 0.47409734316170216 train sentence acc 0.0
epoch 40 batch id 51 loss 0.3561279773712158 train token acc 0.5610342238134906 train sentence acc 0.05943627655506134
epoch 40 batch id 101 loss 0.45206379890441895 train token acc 0.5576967823867788 train sentence acc 0.06621287018060684
epoch 40 batch id 151 loss 0.7233769297599792 train token acc 0.5600219063348266 train sentence acc 0.06539735198020935
epoch 40 batch id 201 loss 0.6283316016197205 train token acc 0.5590792182538733 train sentence acc 0.06856343150138855

epoch 40 train token acc 0.5589298303313468 train sentence acc 0.06744672358036041


HBox(children=(FloatProgress(value=0.0, max=34.0), HTML(value='')))


epoch 40 test token acc 0.4937951658331953 test sentence acc 0.036764707416296005


HBox(children=(FloatProgress(value=0.0, max=219.0), HTML(value='')))

epoch 41 batch id 1 loss 0.6911417245864868 train token acc 0.47880691941827536 train sentence acc 0.0
epoch 41 batch id 51 loss 0.3561770021915436 train token acc 0.5680965097625649 train sentence acc 0.0674019604921341
epoch 41 batch id 101 loss 0.44479435682296753 train token acc 0.5654184131601844 train sentence acc 0.07240098714828491
epoch 41 batch id 151 loss 0.7015592455863953 train token acc 0.5677920581459913 train sentence acc 0.07077814638614655
epoch 41 batch id 201 loss 0.632036030292511 train token acc 0.5652426104489899 train sentence acc 0.07136193662881851

epoch 41 train token acc 0.5652501825705707 train sentence acc 0.07039573788642883


HBox(children=(FloatProgress(value=0.0, max=34.0), HTML(value='')))


epoch 41 test token acc 0.501878958534422 test sentence acc 0.03860294073820114


HBox(children=(FloatProgress(value=0.0, max=219.0), HTML(value='')))

epoch 42 batch id 1 loss 0.6808648705482483 train token acc 0.48351649567484856 train sentence acc 0.0
epoch 42 batch id 51 loss 0.3459080457687378 train token acc 0.572959799956366 train sentence acc 0.07352941483259201
epoch 42 batch id 101 loss 0.44054895639419556 train token acc 0.5677981148427813 train sentence acc 0.07611385732889175
epoch 42 batch id 151 loss 0.7088806629180908 train token acc 0.5693468113964892 train sentence acc 0.07450331002473831
epoch 42 batch id 201 loss 0.6138821840286255 train token acc 0.5657025884063127 train sentence acc 0.07493780553340912

epoch 42 train token acc 0.566125760444963 train sentence acc 0.0741533488035202


HBox(children=(FloatProgress(value=0.0, max=34.0), HTML(value='')))


epoch 42 test token acc 0.4935790365527603 test sentence acc 0.040441177785396576


HBox(children=(FloatProgress(value=0.0, max=219.0), HTML(value='')))

epoch 43 batch id 1 loss 0.6733185648918152 train token acc 0.46467819064855576 train sentence acc 0.0
epoch 43 batch id 51 loss 0.3836461305618286 train token acc 0.5664793564435825 train sentence acc 0.06924019753932953
epoch 43 batch id 101 loss 0.43027356266975403 train token acc 0.5655806611285148 train sentence acc 0.07425742596387863
epoch 43 batch id 151 loss 0.6970284581184387 train token acc 0.570187157427041 train sentence acc 0.07264073193073273
epoch 43 batch id 201 loss 0.6184921860694885 train token acc 0.5685562230727695 train sentence acc 0.07649253308773041

epoch 43 train token acc 0.5693008494807926 train sentence acc 0.07643645256757736


HBox(children=(FloatProgress(value=0.0, max=34.0), HTML(value='')))


epoch 43 test token acc 0.5037515539785519 test sentence acc 0.045036766678094864


HBox(children=(FloatProgress(value=0.0, max=219.0), HTML(value='')))

epoch 44 batch id 1 loss 0.6580211520195007 train token acc 0.4725274844095111 train sentence acc 0.03125
epoch 44 batch id 51 loss 0.3523555397987366 train token acc 0.5739893317564994 train sentence acc 0.0729166716337204
epoch 44 batch id 101 loss 0.41413986682891846 train token acc 0.5739615791820746 train sentence acc 0.0798267349600792
epoch 44 batch id 151 loss 0.6924027800559998 train token acc 0.5764006641485253 train sentence acc 0.08029801398515701
epoch 44 batch id 201 loss 0.6121422052383423 train token acc 0.5730506085294915 train sentence acc 0.0814676582813263

epoch 44 train token acc 0.5735452009431942 train sentence acc 0.07967084646224976


HBox(children=(FloatProgress(value=0.0, max=34.0), HTML(value='')))


epoch 44 test token acc 0.5025277739829 test sentence acc 0.04411764815449715


HBox(children=(FloatProgress(value=0.0, max=219.0), HTML(value='')))

epoch 45 batch id 1 loss 0.6571794748306274 train token acc 0.49764522444456816 train sentence acc 0.03125
epoch 45 batch id 51 loss 0.3396976590156555 train token acc 0.5766722012297524 train sentence acc 0.07720588147640228
epoch 45 batch id 101 loss 0.43067261576652527 train token acc 0.5782776009637198 train sentence acc 0.08230198174715042
epoch 45 batch id 151 loss 0.686854362487793 train token acc 0.5801565985485221 train sentence acc 0.08091887086629868
epoch 45 batch id 201 loss 0.601203203201294 train token acc 0.5786200619808894 train sentence acc 0.08395522087812424

epoch 45 train token acc 0.5795963733099809 train sentence acc 0.08328577131032944


HBox(children=(FloatProgress(value=0.0, max=34.0), HTML(value='')))


epoch 45 test token acc 0.505733357009697 test sentence acc 0.05147058889269829


HBox(children=(FloatProgress(value=0.0, max=219.0), HTML(value='')))

epoch 46 batch id 1 loss 0.6677561402320862 train token acc 0.47095762565732 train sentence acc 0.03125
epoch 46 batch id 51 loss 0.3310631215572357 train token acc 0.5835093387754122 train sentence acc 0.09252451360225677
epoch 46 batch id 101 loss 0.4352940618991852 train token acc 0.5852398210444885 train sentence acc 0.09560643136501312
epoch 46 batch id 151 loss 0.7262073755264282 train token acc 0.586972474597783 train sentence acc 0.09271523356437683
epoch 46 batch id 201 loss 0.6356163024902344 train token acc 0.5854970214891928 train sentence acc 0.09530472755432129

epoch 46 train token acc 0.5861129569518061 train sentence acc 0.09479641914367676


HBox(children=(FloatProgress(value=0.0, max=34.0), HTML(value='')))


epoch 46 test token acc 0.52164904930396 test sentence acc 0.05514705926179886


HBox(children=(FloatProgress(value=0.0, max=219.0), HTML(value='')))

epoch 47 batch id 1 loss 0.6499083042144775 train token acc 0.48822607193142176 train sentence acc 0.0625

epoch 68 train token acc 0.6823941917875229 train sentence acc 0.21556314826011658


HBox(children=(FloatProgress(value=0.0, max=34.0), HTML(value='')))


epoch 68 test token acc 0.5665211335255984 test sentence acc 0.12591911852359772


HBox(children=(FloatProgress(value=0.0, max=219.0), HTML(value='')))

epoch 69 batch id 1 loss 0.5296856164932251 train token acc 0.5839874558150768 train sentence acc 0.125
epoch 69 batch id 51 loss 0.2747657597064972 train token acc 0.6830735728555524 train sentence acc 0.20098039507865906
epoch 69 batch id 101 loss 0.3166663944721222 train token acc 0.6846536956439347 train sentence acc 0.2147277146577835
epoch 69 batch id 151 loss 0.5476694107055664 train token acc 0.6857444032231063 train sentence acc 0.21502482891082764
epoch 69 batch id 201 loss 0.42471709847450256 train token acc 0.6847694177390192 train sentence acc 0.22092661261558533

epoch 69 train token acc 0.6858784757399311 train sentence acc 0.22070012986660004


HBox(children=(FloatProgress(value=0.0, max=34.0), HTML(value='')))


epoch 69 test token acc 0.574942933891982 test sentence acc 0.12683823704719543


HBox(children=(FloatProgress(value=0.0, max=219.0), HTML(value='')))

epoch 70 batch id 1 loss 0.5780009031295776 train token acc 0.5510204220190644 train sentence acc 0.25
epoch 70 batch id 51 loss 0.2743583619594574 train token acc 0.6805828927819818 train sentence acc 0.20955882966518402
epoch 70 batch id 101 loss 0.31180539727211 train token acc 0.6834144793499243 train sentence acc 0.21844059228897095

epoch 71 train token acc 0.688553360555019 train sentence acc 0.23268644511699677


HBox(children=(FloatProgress(value=0.0, max=34.0), HTML(value='')))


epoch 71 test token acc 0.5681527300381705 test sentence acc 0.1305147111415863


HBox(children=(FloatProgress(value=0.0, max=219.0), HTML(value='')))

epoch 72 batch id 1 loss 0.5374723672866821 train token acc 0.5824175970628858 train sentence acc 0.25
epoch 72 batch id 51 loss 0.30181390047073364 train token acc 0.6911212576589748 train sentence acc 0.2346813827753067
epoch 72 batch id 101 loss 0.33322209119796753 train token acc 0.6918940759001134 train sentence acc 0.2413366287946701
epoch 72 batch id 151 loss 0.5148648023605347 train token acc 0.6917450797785727 train sentence acc 0.23965232074260712
epoch 72 batch id 201 loss 0.42241960763931274 train token acc 0.6904662830357796 train sentence acc 0.2431592047214508

epoch 72 train token acc 0.692070244093315 train sentence acc 0.24186642467975616


HBox(children=(FloatProgress(value=0.0, max=34.0), HTML(value='')))


epoch 72 test token acc 0.5757833428900031 test sentence acc 0.13419117033481598


HBox(children=(FloatProgress(value=0.0, max=219.0), HTML(value='')))

epoch 73 batch id 1 loss 0.5173515677452087 train token acc 0.5777080208063126 train sentence acc 0.25
epoch 73 batch id 51 loss 0.27540484070777893 train token acc 0.6985403877231019 train sentence acc 0.2506127655506134
epoch 73 batch id 101 loss 0.3183104395866394 train token acc 0.6975356528897411 train sentence acc 0.24969059228897095
epoch 73 batch id 151 loss 0.5198671817779541 train token acc 0.7005879082393197 train sentence acc 0.24586093425750732
epoch 73 batch id 201 loss 0.3773671090602875 train token acc 0.6985078476148134 train sentence acc 0.24611318111419678

epoch 73 train token acc 0.6991750110944385 train sentence acc 0.24619482457637787


HBox(children=(FloatProgress(value=0.0, max=34.0), HTML(value='')))


epoch 73 test token acc 0.5675301522988936 test sentence acc 0.13694852590560913


HBox(children=(FloatProgress(value=0.0, max=219.0), HTML(value='')))

epoch 74 batch id 1 loss 0.5267577767372131 train token acc 0.5808477383106947 train sentence acc 0.25
epoch 74 batch id 51 loss 0.25338155031204224 train token acc 0.7001488162638327 train sentence acc 0.25
epoch 74 batch id 101 loss 0.30570027232170105 train token acc 0.6973791011761283 train sentence acc 0.25309404730796814
epoch 74 batch id 151 loss 0.5125669836997986 train token acc 0.6982751415821952 train sentence acc 0.2512417137622833
epoch 74 batch id 201 loss 0.373234361410141 train token acc 0.6963085542324078 train sentence acc 0.25062188506126404

epoch 74 train token acc 0.6976499020618484 train sentence acc 0.25095129013061523


HBox(children=(FloatProgress(value=0.0, max=34.0), HTML(value='')))


epoch 74 test token acc 0.5742879661265761 test sentence acc 0.13878676295280457


HBox(children=(FloatProgress(value=0.0, max=219.0), HTML(value='')))

epoch 75 batch id 1 loss 0.5368293523788452 train token acc 0.5510204220190644 train sentence acc 0.25
epoch 75 batch id 51 loss 0.24940690398216248 train token acc 0.7013770093977013 train sentence acc 0.2567402124404907
epoch 75 batch id 101 loss 0.3075416386127472 train token acc 0.7018420346447792 train sentence acc 0.26361384987831116
epoch 75 batch id 151 loss 0.5030395984649658 train token acc 0.7022387181826565 train sentence acc 0.25434601306915283
epoch 97 batch id 151 loss 0.4619581997394562 train token acc 0.7484692807096543 train sentence acc 0.3402317762374878
epoch 97 batch id 201 loss 0.36323288083076477 train token acc 0.7433481585432483 train sentence acc 0.33597636222839355

epoch 97 train token acc 0.7439410749591088 train sentence acc 0.3338565528392792


HBox(children=(FloatProgress(value=0.0, max=34.0), HTML(value='')))


epoch 97 test token acc 0.5676618374315748 test sentence acc 0.15808823704719543


HBox(children=(FloatProgress(value=0.0, max=219.0), HTML(value='')))

epoch 98 batch id 1 loss 0.48134171962738037 train token acc 0.5981161845847964 train sentence acc 0.3125
epoch 98 batch id 51 loss 0.2233792394399643 train token acc 0.7406488669597927 train sentence acc 0.3327206075191498
epoch 98 batch id 101 loss 0.2716282606124878 train token acc 0.7473946173945073 train sentence acc 0.3477722704410553
epoch 98 batch id 151 loss 0.41856691241264343 train token acc 0.7489205740796192 train sentence acc 0.3406457006931305
epoch 98 batch id 201 loss 0.3030010759830475 train token acc 0.7471896926739221 train sentence acc 0.3417288362979889

epoch 98 train token acc 0.7485714328255224 train sentence acc 0.34132418036460876


HBox(children=(FloatProgress(value=0.0, max=34.0), HTML(value='')))


epoch 98 test token acc 0.5670964780044468 test sentence acc 0.15441176295280457


HBox(children=(FloatProgress(value=0.0, max=219.0), HTML(value='')))

epoch 99 batch id 1 loss 0.4490870535373688 train token acc 0.6248037833720446 train sentence acc 0.3125
epoch 99 batch id 51 loss 0.2178868055343628 train token acc 0.7524268041757465 train sentence acc 0.34375
epoch 99 batch id 101 loss 0.25919997692108154 train token acc 0.7509124419896283 train sentence acc 0.345915824174881
epoch 99 batch id 151 loss 0.4295506477355957 train token acc 0.7527219130301429 train sentence acc 0.34271523356437683
epoch 99 batch id 201 loss 0.32312989234924316 train token acc 0.7487695157899757 train sentence acc 0.33908581733703613

epoch 99 train token acc 0.7501383994484839 train sentence acc 0.33885082602500916


HBox(children=(FloatProgress(value=0.0, max=34.0), HTML(value='')))


epoch 99 test token acc 0.5642279920661274 test sentence acc 0.15349264442920685


HBox(children=(FloatProgress(value=0.0, max=219.0), HTML(value='')))

epoch 100 batch id 1 loss 0.46425169706344604 train token acc 0.6232339246198535 train sentence acc 0.375
epoch 100 batch id 51 loss 0.21254107356071472 train token acc 0.7527137682704693 train sentence acc 0.36213237047195435
epoch 100 batch id 101 loss 0.2599426805973053 train token acc 0.7525787546580639 train sentence acc 0.3582920730113983
epoch 100 batch id 151 loss 0.42803841829299927 train token acc 0.754706150519088 train sentence acc 0.3553394079208374
epoch 100 batch id 201 loss 0.32765597105026245 train token acc 0.7523692503481159 train sentence acc 0.3523010015487671

epoch 100 train token acc 0.7542017457839825 train sentence acc 0.3514554500579834


HBox(children=(FloatProgress(value=0.0, max=34.0), HTML(value='')))


epoch 100 test token acc 0.5733664900694481 test sentence acc 0.16544117033481598


HBox(children=(FloatProgress(value=0.0, max=219.0), HTML(value='')))

epoch 101 batch id 1 loss 0.4444153904914856 train token acc 0.6467818059027195 train sentence acc 0.40625
epoch 101 batch id 51 loss 0.21937309205532074 train token acc 0.7541881884703887 train sentence acc 0.3658088445663452
epoch 101 batch id 101 loss 0.25786063075065613 train token acc 0.758523697028914 train sentence acc 0.36324256658554077
epoch 101 batch id 151 loss 0.4157450795173645 train token acc 0.7603147127842901 train sentence acc 0.36051324009895325
epoch 101 batch id 201 loss 0.30168992280960083 train token acc 0.7580039401374182 train sentence acc 0.3606964945793152

epoch 101 train token acc 0.759069441680554 train sentence acc 0.3591609299182892


HBox(children=(FloatProgress(value=0.0, max=34.0), HTML(value='')))


epoch 101 test token acc 0.5788400542258065 test sentence acc 0.16268382966518402


HBox(children=(FloatProgress(value=0.0, max=219.0), HTML(value='')))

epoch 102 batch id 1 loss 0.4634250998497009 train token acc 0.635792794637382 train sentence acc 0.34375
epoch 102 batch id 51 loss 0.25046926736831665 train token acc 0.7561832686438791 train sentence acc 0.3584558963775635
epoch 102 batch id 101 loss 0.25624316930770874 train token acc 0.7571896794805358 train sentence acc 0.35674503445625305
epoch 102 batch id 151 loss 0.40818125009536743 train token acc 0.7585133728247432 train sentence acc 0.3495447039604187
epoch 102 batch id 201 loss 0.28771889209747314 train token acc 0.7562022522347863 train sentence acc 0.35090172290802

epoch 102 train token acc 0.7574033044320084 train sentence acc 0.349743127822876


HBox(children=(FloatProgress(value=0.0, max=34.0), HTML(value='')))


epoch 102 test token acc 0.5847038694465643 test sentence acc 0.1617647111415863


HBox(children=(FloatProgress(value=0.0, max=219.0), HTML(value='')))

epoch 103 batch id 1 loss 0.5094813108444214 train token acc 0.6138147721067071 train sentence acc 0.3125
epoch 103 batch id 51 loss 0.23015214502811432 train token acc 0.7544145923651134 train sentence acc 0.356004923582077
epoch 103 batch id 101 loss 0.25985899567604065 train token acc 0.7505111636600854 train sentence acc 0.3505569398403168
epoch 103 batch id 151 loss 0.4414800703525543 train token acc 0.751495172830903 train sentence acc 0.34850993752479553
epoch 103 batch id 201 loss 0.37101632356643677 train token acc 0.7502084700535492 train sentence acc 0.3490360677242279

epoch 103 train token acc 0.7525057001214807 train sentence acc 0.3498382866382599


HBox(children=(FloatProgress(value=0.0, max=34.0), HTML(value='')))


epoch 103 test token acc 0.5867953178650864 test sentence acc 0.1617647111415863


HBox(children=(FloatProgress(value=0.0, max=219.0), HTML(value='')))

epoch 104 batch id 1 loss 0.43763306736946106 train token acc 0.6624803934246302 train sentence acc 0.40625
epoch 104 batch id 51 loss 0.2169220745563507 train token acc 0.7573472626721852 train sentence acc 0.3694853186607361
epoch 104 batch id 101 loss 0.2630763649940491 train token acc 0.7570771436313039 train sentence acc 0.37407177686691284
epoch 104 batch id 151 loss 0.41715216636657715 train token acc 0.7589827310263074 train sentence acc 0.3696191906929016
epoch 104 batch id 201 loss 0.32772114872932434 train token acc 0.7557363279108236 train sentence acc 0.3680037260055542

epoch 104 train token acc 0.7553664066480072 train sentence acc 0.3678177297115326


HBox(children=(FloatProgress(value=0.0, max=34.0), HTML(value='')))


epoch 104 test token acc 0.5859086184269365 test sentence acc 0.16819852590560913


HBox(children=(FloatProgress(value=0.0, max=219.0), HTML(value='')))

epoch 105 batch id 1 loss 0.44686052203178406 train token acc 0.6483516646549106 train sentence acc 0.34375
epoch 105 batch id 51 loss 0.20839206874370575 train token acc 0.7613323353934011 train sentence acc 0.375
epoch 105 batch id 101 loss 0.24705913662910461 train token acc 0.7629204939884862 train sentence acc 0.37283414602279663
epoch 127 batch id 101 loss 0.22367867827415466 train token acc 0.8032352421720891 train sentence acc 0.4699876308441162
epoch 127 batch id 151 loss 0.37992924451828003 train token acc 0.8068116990701351 train sentence acc 0.4753725230693817
epoch 127 batch id 201 loss 0.2618243992328644 train token acc 0.8050309954563257 train sentence acc 0.47481343150138855

epoch 127 train token acc 0.8051029912006858 train sentence acc 0.47379183769226074


HBox(children=(FloatProgress(value=0.0, max=34.0), HTML(value='')))


epoch 127 test token acc 0.5864352085524897 test sentence acc 0.18382352590560913


HBox(children=(FloatProgress(value=0.0, max=219.0), HTML(value='')))

epoch 128 batch id 1 loss 0.4006378948688507 train token acc 0.6923077097162604 train sentence acc 0.34375
epoch 128 batch id 51 loss 0.1852956861257553 train token acc 0.8022374220890924 train sentence acc 0.48161765933036804
epoch 128 batch id 101 loss 0.2288648784160614 train token acc 0.8056835566075647 train sentence acc 0.4780321717262268
epoch 128 batch id 151 loss 0.37765389680862427 train token acc 0.8089184676580723 train sentence acc 0.47744205594062805
epoch 128 batch id 201 loss 0.24939760565757751 train token acc 0.8067736999324027 train sentence acc 0.47652360796928406

epoch 128 train token acc 0.8074882415917459 train sentence acc 0.47702622413635254


HBox(children=(FloatProgress(value=0.0, max=34.0), HTML(value='')))


epoch 128 test token acc 0.5804028059664073 test sentence acc 0.18658088147640228


HBox(children=(FloatProgress(value=0.0, max=219.0), HTML(value='')))

epoch 129 batch id 1 loss 0.3911092281341553 train token acc 0.7080062972381711 train sentence acc 0.46875
epoch 129 batch id 51 loss 0.18511497974395752 train token acc 0.8102843925019032 train sentence acc 0.5024510025978088
epoch 129 batch id 101 loss 0.22939306497573853 train token acc 0.8075509357489737 train sentence acc 0.4941212832927704
epoch 129 batch id 151 loss 0.3696902096271515 train token acc 0.8090261286155426 train sentence acc 0.4857201874256134
epoch 129 batch id 201 loss 0.2603270411491394 train token acc 0.8066785934361494 train sentence acc 0.4844527244567871

epoch 129 train token acc 0.8082753183770942 train sentence acc 0.4859684109687805


HBox(children=(FloatProgress(value=0.0, max=34.0), HTML(value='')))


epoch 129 test token acc 0.5831325037493919 test sentence acc 0.1930147111415863


HBox(children=(FloatProgress(value=0.0, max=219.0), HTML(value='')))

epoch 130 batch id 1 loss 0.3633144199848175 train token acc 0.728414461016655 train sentence acc 0.40625
epoch 130 batch id 51 loss 0.18156059086322784 train token acc 0.8132191914151988 train sentence acc 0.5061274766921997
epoch 130 batch id 101 loss 0.2168983817100525 train token acc 0.8101986291550911 train sentence acc 0.49969059228897095
epoch 130 batch id 151 loss 0.3535321354866028 train token acc 0.8103922119839055 train sentence acc 0.49317052960395813
epoch 130 batch id 201 loss 0.2622918486595154 train token acc 0.8096622523657088 train sentence acc 0.4895833134651184

epoch 130 train token acc 0.8109813690250399 train sentence acc 0.4889649748802185


HBox(children=(FloatProgress(value=0.0, max=34.0), HTML(value='')))


epoch 130 test token acc 0.5700705611996133 test sentence acc 0.19025735557079315


HBox(children=(FloatProgress(value=0.0, max=219.0), HTML(value='')))

epoch 131 batch id 1 loss 0.3843962848186493 train token acc 0.7017268622294068 train sentence acc 0.40625
epoch 131 batch id 51 loss 0.1803339421749115 train token acc 0.8136271133439104 train sentence acc 0.5128676891326904
epoch 131 batch id 101 loss 0.21995152533054352 train token acc 0.8096186533561068 train sentence acc 0.504641056060791
epoch 131 batch id 151 loss 0.3657452464103699 train token acc 0.8111060932625567 train sentence acc 0.49524006247520447
epoch 131 batch id 201 loss 0.25673139095306396 train token acc 0.8092279155172213 train sentence acc 0.49238184094429016

epoch 131 train token acc 0.8109623166221113 train sentence acc 0.49200910329818726


HBox(children=(FloatProgress(value=0.0, max=34.0), HTML(value='')))


epoch 131 test token acc 0.5789600016227375 test sentence acc 0.18106617033481598


HBox(children=(FloatProgress(value=0.0, max=219.0), HTML(value='')))

epoch 132 batch id 1 loss 0.3775487542152405 train token acc 0.7080062972381711 train sentence acc 0.4375
epoch 132 batch id 51 loss 0.1932096779346466 train token acc 0.8153745464857339 train sentence acc 0.5128676891326904
epoch 132 batch id 101 loss 0.21998143196105957 train token acc 0.8147562523660629 train sentence acc 0.5139232873916626
epoch 132 batch id 151 loss 0.39603132009506226 train token acc 0.8153825946975598 train sentence acc 0.5051738619804382
epoch 132 batch id 201 loss 0.25284668803215027 train token acc 0.8123167992328567 train sentence acc 0.4998445212841034

epoch 132 train token acc 0.8136067561253472 train sentence acc 0.4990962743759155


HBox(children=(FloatProgress(value=0.0, max=34.0), HTML(value='')))


epoch 132 test token acc 0.5847789751235669 test sentence acc 0.18841911852359772


HBox(children=(FloatProgress(value=0.0, max=219.0), HTML(value='')))

epoch 133 batch id 1 loss 0.3619110882282257 train token acc 0.7331240372732282 train sentence acc 0.4375
epoch 133 batch id 51 loss 0.1905229389667511 train token acc 0.8161164823983449 train sentence acc 0.5085784196853638
epoch 133 batch id 101 loss 0.21614187955856323 train token acc 0.817864073661737 train sentence acc 0.5111386179924011
epoch 133 batch id 151 loss 0.3910374045372009 train token acc 0.8168266952047063 train sentence acc 0.5053808093070984
epoch 133 batch id 201 loss 0.29745495319366455 train token acc 0.8132615252531166 train sentence acc 0.49751242995262146

epoch 133 train token acc 0.813697832898071 train sentence acc 0.497526615858078


HBox(children=(FloatProgress(value=0.0, max=34.0), HTML(value='')))


epoch 133 test token acc 0.5687481578686001 test sentence acc 0.18933823704719543


HBox(children=(FloatProgress(value=0.0, max=219.0), HTML(value='')))

epoch 134 batch id 1 loss 0.39162367582321167 train token acc 0.7032967209815979 train sentence acc 0.375
epoch 134 batch id 51 loss 0.18010158836841583 train token acc 0.8075435991357902 train sentence acc 0.5012255311012268
epoch 134 batch id 101 loss 0.21559208631515503 train token acc 0.8142268961343896 train sentence acc 0.510519802570343
epoch 134 batch id 151 loss 0.3864664137363434 train token acc 0.8156876805275431 train sentence acc 0.5039321184158325
epoch 134 batch id 201 loss 0.26658350229263306 train token acc 0.8124265021329459 train sentence acc 0.4968905448913574

epoch 134 train token acc 0.813496668679006 train sentence acc 0.4968131482601166


HBox(children=(FloatProgress(value=0.0, max=34.0), HTML(value='')))


epoch 134 test token acc 0.5851095332798328 test sentence acc 0.20036764442920685


HBox(children=(FloatProgress(value=0.0, max=219.0), HTML(value='')))

epoch 135 batch id 1 loss 0.40523356199264526 train token acc 0.6875981334596872 train sentence acc 0.40625


KeyboardInterrupt: 

In [114]:
bertmodel.embeddings

BertEmbeddings(
  (word_embeddings): Embedding(8002, 768, padding_idx=1)
  (position_embeddings): Embedding(512, 768)
  (token_type_embeddings): Embedding(2, 768)
  (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
  (dropout): Dropout(p=0.1, inplace=False)
)

# Restore code

In [139]:
def generate_sentence(input):
    return ' '.join([vocab.idx_to_token[id] for id in input]).replace('[CLS]', '').replace('[PAD]', '').replace('[SEP]', '').replace(' ', '').replace('▁', ' ').strip().replace('= =', '==').replace(' . ', '.')


In [166]:
data_test[problem_id][4]

array(13, dtype=int32)

In [214]:
problem_id = 400
print(generate_sentence(data_test[problem_id][0]))
print(generate_sentence(data_test[problem_id][3]))

out = model(torch.tensor(data_test[problem_id][0]).unsqueeze(0).to(device), torch.tensor(data_test[problem_id][1]).unsqueeze(0).to(device), 
            torch.tensor(data_test[problem_id][2]).unsqueeze(0).to(device), torch.tensor(data_test[problem_id][3]).unsqueeze(1).to(device), 
            torch.tensor(data_test[problem_id][4]).unsqueeze(0).to(device))
predicted_solution = generate_sentence(out.argmax(-1).squeeze(0)[1:])

print(predicted_solution)

석진와 동생의 나이를 합하면 36이고 , 두 사람의 나이 차는 31입니다.석진의 동생은 몇 살일까요 ?
( 36 - 31 ) / 2
( 31 - 31 ) / 2
