In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

# import spacy

import random
import math
import time

import collections
import pickle
import os
import tensorflow as tf
from torch.utils.data import DataLoader, Dataset, SequentialSampler, RandomSampler
import torch.nn.utils.rnn as rnn_utils

In [2]:
data_dir = './data/'
proj_dir = './'

In [3]:
class Vocabulary(object):

    def __init__(self):
        self.word2idx = {'<SOS>': 0, '<EOS>': 1, '<PAD>': 2, '<UNK>': 3}
        self.idx2word = {0: '<SOS>', 1: '<EOS>', 2: '<PAD>', 3: '<UNK>'}
        self.num_words = 4
        self.OOV_list = []
        self.OOV=0
#         for i in range(10):
#             word = str(i)
#             self.word2idx[word] = self.num_words
#             self.idx2word[self.num_words] = word
#             self.num_words += 1           

    def build_vocab(self, data_path):
        """Construct the relation between words and indices"""
        i=0
        with open(data_path, 'r', encoding='utf-8') as dataset:
            for words in dataset:
                parts = words.strip('\n').strip().split('\t')
                assert len(parts) == 2, 'Not 2 parts'+words
                words = parts[0].split()
                if i<5:
                    print(words)
                i+=1
                for word in words:
                    if word not in self.word2idx:
                        self.word2idx[word] = self.num_words
                        self.idx2word[self.num_words] = word
                        self.num_words += 1

    def sequence_to_indices(self, sequence, add_eos=False, add_sos=False):
        """Transform a char sequence to index sequence
            :param sequence: a string composed with chars
            :param add_eos: if true, add the <EOS> tag at the end of given sentence
            :param add_sos: if true, add the <SOS> tag at the beginning of given sentence
        """
        index_sequence = [self.word2idx['<SOS>']] if add_sos else []

        for word in sequence:
            if word not in self.word2idx:
                self.OOV+=1
                self.OOV_list.append(word)
                index_sequence.append((self.word2idx['<UNK>']))
            else:
                index_sequence.append(self.word2idx[word])

        if add_eos:
            index_sequence.append(self.word2idx['<EOS>'])

        return index_sequence

    def indices_to_sequence(self, indices,print_signal=False):
        """Transform a list of indices
            :param indices: a list
        """
        sequence = []
        for idx in indices:
            word = self.idx2word[idx]
            if word == "<EOS>" and (not print_signal):
                sequence.append(word)
                break
            elif word == '<PAD>':
                break
            else:
                sequence.append(word)
        return sequence

    def __str__(self):
        str = "Vocab information:\n"
        for idx, word in self.idx2word.items():
            str += "word: %s Index: %d\n" % (word, idx)
        return str

In [4]:
class MyData(Dataset):
    def __init__(self, path,corpus_path=None,vocab=None):
        self.src_indices_seq = []
        self.trg_indices_seq = []
        if vocab == None:
            self.vocab = Vocabulary()
            self.vocab.build_vocab(corpus_path)
        else:
            self.vocab = vocab
            self.vocab.build_vocab(corpus_path)
        self.PAD_ID = self.vocab.word2idx["<PAD>"]
        self.SOS_ID = self.vocab.word2idx["<SOS>"]
        self.vocab_size = self.vocab.num_words
        self.max_length = -1
        self.raw_src_sent_data = [] #src sentence
        self.raw_trg_sent_data = [] #target sentence
#         self.max_length = self.vocab.max_length
        c=0
        file=open(path, 'r', encoding='utf-8')
        for line in file:
            pparts = line.strip('\n').split('\t')
            assert len(pparts) == 2, 'Error!!'
            words,trg_sentence = pparts
            trg_sentence = trg_sentence.split()
            self.raw_trg_sent_data.append(trg_sentence)
            if trg_sentence[0]!='<SOS>':
                print('trg:',trg_sentence)
            if self.max_length < len(trg_sentence):
                self.max_length = len(trg_sentence)
            src_sent = words.strip('\n').split()
            self.raw_src_sent_data.append(src_sent)
            if src_sent[0]!='<SOS>':
                print('src:',src_sent)
            if self.max_length < len(src_sent):
                self.max_length = len(src_sent)
        assert len(self.raw_trg_sent_data)==len(self.raw_src_sent_data),'Error 2!'+line
        for i,trg_sent in enumerate(self.raw_trg_sent_data):
            indices_seq = self.vocab.sequence_to_indices(self.raw_src_sent_data[i], add_eos=False)
            self.src_indices_seq.append(torch.tensor(indices_seq))
            indices_seq = self.vocab.sequence_to_indices(trg_sent, add_eos=False)
            self.trg_indices_seq.append(torch.tensor(indices_seq))
        self.src_indices_seq = rnn_utils.pad_sequence(self.src_indices_seq, batch_first=True, padding_value=self.PAD_ID)
        self.trg_indices_seq = rnn_utils.pad_sequence(self.trg_indices_seq, batch_first=True, padding_value=self.PAD_ID)
        print("## J: Total examples: %d, unique words:%d, Max seq length: %d"%(len(self.src_indices_seq),self.vocab_size,self.max_length))
    # def collate_fn(data):
    #     data = rnn_utils.pad_sequence(data, batch_first=True, padding_value=0)
    #     return data
    def __len__(self):
        return len(self.src_indices_seq)

    def __getitem__(self, idx):
        return torch.tensor(self.src_indices_seq[idx]), torch.tensor(self.trg_indices_seq[idx])


In [5]:
train_set = MyData(data_dir+'final_data/train.txt',corpus_path=data_dir+'final_data/train.txt')
test_set = MyData(data_dir+'final_data/val.txt',vocab=train_set.vocab,corpus_path=data_dir+'final_data/val.txt')
# test_set = TestData('./datatest/'+'hw2.1-1_testing_data.txt',max_length=train_set.max_length,vocab=train_set.vocab)

['<SOS>', 'you', 'ca', "n't", 'have', 'that', '<EOS>', '0', '1', '2']
['<SOS>', 'i', 'told', 'you', 'i', 'did', "n't", 'want', 'to', 'go', 'to', 'boston', '<EOS>', '4', '5', '1']
['<SOS>', 'that', "'s", 'what', 'i', 'said', 'to', 'tom', '<EOS>', '2', '0', '-2']
['<SOS>', 'tom', 'has', 'done', 'well', 'here', '<EOS>', '3', '1', '-1']
['<SOS>', 'tom', 'just', 'told', 'me', 'what', 'to', 'do', '<EOS>', '6', '-1', '-1']
## J: Total examples: 98040, unique words:198, Max seq length: 19
['<SOS>', 'tom', 'said', 'he', 'would', "n't", 'do', 'that', ',', 'so', 'i', 'asked', 'mary', 'to', '<EOS>', '4', '7', '2']
['<SOS>', 'tom', 'said', 'he', 'would', "n't", 'do', 'that', ',', 'so', 'i', 'asked', 'mary', 'to', '<EOS>', '0', '-1', '-2']
['<SOS>', 'tom', 'said', 'he', 'would', "n't", 'do', 'that', ',', 'so', 'i', 'asked', 'mary', 'to', '<EOS>', '11', '5', '-2']
['<SOS>', 'tom', 'said', 'he', 'would', "n't", 'do', 'that', ',', 'so', 'i', 'asked', 'mary', 'to', '<EOS>', '1', '10', '2']
['<SOS>', 'to

In [6]:
dataset=train_set
for i in range(len(dataset.src_indices_seq)):
    print('----',i,'-----')
    print('data:',dataset.vocab.indices_to_sequence(dataset.src_indices_seq[i].numpy(),print_signal=True))
    print('trg:',dataset.vocab.indices_to_sequence(dataset.trg_indices_seq[i].numpy(),print_signal=True))
    if i>100:
        break

---- 0 -----
data: ['<SOS>', 'you', 'ca', "n't", 'have', 'that', '<EOS>', '0', '1', '2']
trg: ['<SOS>', 'you', '<EOS>']
---- 1 -----
data: ['<SOS>', 'i', 'told', 'you', 'i', 'did', "n't", 'want', 'to', 'go', 'to', 'boston', '<EOS>', '4', '5', '1']
trg: ['<SOS>', 'did', '<EOS>']
---- 2 -----
data: ['<SOS>', 'that', "'s", 'what', 'i', 'said', 'to', 'tom', '<EOS>', '2', '0', '-2']
trg: ['<SOS>', 'what', '<EOS>']
---- 3 -----
data: ['<SOS>', 'tom', 'has', 'done', 'well', 'here', '<EOS>', '3', '1', '-1']
trg: ['<SOS>', 'well', 'done', '<EOS>']
---- 4 -----
data: ['<SOS>', 'tom', 'just', 'told', 'me', 'what', 'to', 'do', '<EOS>', '6', '-1', '-1']
trg: ['<SOS>', 'do', 'to', 'what', 'me', 'told', 'just', 'tom', '<EOS>']
---- 5 -----
data: ['<SOS>', 'she', 'told', 'him', 'that', 'he', 'was', 'right', '<EOS>', '1', '5', '1']
trg: ['<SOS>', 'told', 'him', 'that', 'he', '<EOS>']
---- 6 -----
data: ['<SOS>', 'is', "n't", 'tom', 'with', 'you', '<EOS>', '0', '2', '2']
trg: ['<SOS>', 'is', '<EOS>']
--

In [6]:
SEED = 1234

random.seed(SEED)
torch.manual_seed(SEED)
# torch.backends.cudnn.deterministic = True

<torch._C.Generator at 0x7f671e1b3fd0>

In [6]:
device = torch.device('cuda:1')

In [7]:
class Encoder(nn.Module):
    def __init__(self, input_dim, emb_dim, hid_dim, n_layers, dropout):
        super().__init__()
        
        self.hid_dim = hid_dim
        self.n_layers = n_layers
        
        self.embedding = nn.Embedding(input_dim, emb_dim)
        
        # self.rnn = nn.LSTM(emb_dim, hid_dim, n_layers, dropout = dropout)
        self.gru = nn.GRU(emb_dim, hid_dim, n_layers,batch_first=True)
        self.dropout = nn.Dropout(dropout)
        self.layerN = nn.LayerNorm(emb_dim)
        
    def forward(self, src):
        
        #src = [src sent len, batch size]
        
        embedded = self.dropout(self.layerN((self.embedding(src))))
#         embedded = self.dropout(src)
        
        #embedded = [src sent len, batch size, emb dim]
        
        # outputs, (hidden, cell) = self.rnn(embedded)
        outputs, hidden = self.gru(embedded)
        
        #outputs = [src sent len, batch size, hid dim * n directions]
        #hidden = [n layers * n directions, batch size, hid dim]
        #cell = [n layers * n directions, batch size, hid dim]
        
        #outputs are always from the top hidden layer
        
        return hidden

In [8]:
class Decoder(nn.Module):
    def __init__(self, output_dim, emb_dim, hid_dim, n_layers, dropout):
        super().__init__()
        
        self.output_dim = output_dim
        self.hid_dim = hid_dim
        self.n_layers = n_layers
        
        self.embedding = nn.Embedding(output_dim, emb_dim)
        
        # self.rnn = nn.LSTM(emb_dim, hid_dim, n_layers, dropout = dropout)
        self.gru = nn.GRU(emb_dim, hid_dim, n_layers,batch_first=True)
        
        self.out = nn.Linear(hid_dim, output_dim)
        
        self.layerN_emb = nn.LayerNorm(emb_dim)
        self.layerN = nn.LayerNorm(hid_dim)
        self.dropout = nn.Dropout(dropout)
        self.dp_dense = nn.Dropout(0.1)
        
    def forward(self, input, hidden):
        
        #input = [batch size]
        #hidden = [n layers * n directions, batch size, hid dim]
        #cell = [n layers * n directions, batch size, hid dim]
        
        #n directions in the decoder will both always be 1, therefore:
        #hidden = [n layers, batch size, hid dim]
        #context = [n layers, batch size, hid dim]
        
        input = input.unsqueeze(1)
        
#         #input = [1, batch size]
        
        embedded = self.dropout(self.layerN_emb(self.embedding(input)))
#         embedded = self.dropout(input)
        
        #embedded = [1, batch size, emb dim]
                
        # output, (hidden, cell) = self.rnn(embedded, (hidden, cell))
        output, hidden = self.gru(embedded,hidden)
        
        #output = [sent len, batch size, hid dim * n directions]
        #hidden = [n layers * n directions, batch size, hid dim]
        #cell = [n layers * n directions, batch size, hid dim]
        
        #sent len and n directions will always be 1 in the decoder, therefore:
        #output = [1, batch size, hid dim]
        #hidden = [n layers, batch size, hid dim]
        #cell = [n layers, batch size, hid dim]
        prediction = self.layerN(output.squeeze(1))
        prediction = self.out(self.dp_dense(prediction))
        
        #prediction = [batch size, output dim]
        
        return prediction, hidden
class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder, device):
        super().__init__()
#         self.embedding = nn.Embedding(output_dim, emb_dim)
        self.encoder = encoder
        self.decoder = decoder
        self.device = device
        
        assert encoder.hid_dim == decoder.hid_dim, \
            "Hidden dimensions of encoder and decoder must be equal!"
        assert encoder.n_layers == decoder.n_layers, \
            "Encoder and decoder must have equal number of layers!"
        
    def forward(self, src, trg, teacher_forcing_ratio = 0.5):
        
        #src = [src sent len, batch size]
        #trg = [trg sent len, batch size]
        #teacher_forcing_ratio is probability to use teacher forcing
        #e.g. if teacher_forcing_ratio is 0.75 we use ground-truth inputs 75% of the time
        
        batch_size = trg.shape[0]
        max_len = trg.shape[1]
        trg_vocab_size = self.decoder.output_dim
        
        #tensor to store decoder outputs
#         outputs = torch.zeros(max_len, batch_size, trg_vocab_size).to(self.device)
        outputs = torch.zeros(batch_size, max_len, trg_vocab_size).to(self.device)
        outputs_idx = torch.zeros(batch_size, max_len-1)

        #last hidden state of the encoder is used as the initial hidden state of the decoder
        hidden = self.encoder(src)
#         embedded = self.embedding(src)
#         hidden = self.encoder(embedded)
        
        #first input to the decoder is the <sos> tokens
        input = trg[:,0]
        
        for t in range(1, max_len):
            
            #insert input token embedding, previous hidden and previous cell states
            #receive output tensor (predictions) and new hidden and cell states
            output, hidden= self.decoder(input, hidden)
#             input = input.unsqueeze(1)
#             embedded = self.embedding(input)
#             output, hidden= self.decoder(embedded, hidden)
            
            #place predictions in a tensor holding predictions for each token
            outputs[:,t,:] = output
            
            #decide if we are going to use teacher forcing or not
            teacher_force = random.random() < teacher_forcing_ratio
            
            #get the highest predicted token from our predictions
            top1 = output.argmax(1)
            outputs_idx[:,t-1] = top1
            
            #if teacher forcing, use actual next token as next input
            #if not, use predicted token
            input = trg[:,t] if teacher_force else top1
        
        return outputs,outputs_idx

In [82]:
INPUT_DIM = train_set.vocab_size
OUTPUT_DIM = train_set.vocab_size
ENC_EMB_DIM = 64
DEC_EMB_DIM = 64
HID_DIM = 256
N_LAYERS = 1
ENC_DROPOUT = 0.1
DEC_DROPOUT = 0.1

enc = Encoder(INPUT_DIM, ENC_EMB_DIM, HID_DIM, N_LAYERS, ENC_DROPOUT)
dec = Decoder(OUTPUT_DIM, DEC_EMB_DIM, HID_DIM, N_LAYERS, DEC_DROPOUT)
model = Seq2Seq(enc, dec, device).to(device)

# model = torch.load(open('./output_final/dataset2/model_8458.pkl','rb'))
# model = model.to(device)

In [83]:
def init_weights(m):
    for name, param in m.named_parameters():
        nn.init.uniform_(param.data, -0.08, 0.08)
        
model.apply(init_weights)

Seq2Seq(
  (encoder): Encoder(
    (embedding): Embedding(198, 64)
    (gru): GRU(64, 256, batch_first=True)
    (dropout): Dropout(p=0.1, inplace=False)
    (layerN): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
  )
  (decoder): Decoder(
    (embedding): Embedding(198, 64)
    (gru): GRU(64, 256, batch_first=True)
    (out): Linear(in_features=256, out_features=198, bias=True)
    (layerN_emb): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
    (layerN): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
    (dp_dense): Dropout(p=0.1, inplace=False)
  )
)

In [84]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f'The model has {count_parameters(model):,} trainable parameters')

The model has 571,590 trainable parameters


In [85]:
optimizer = optim.Adam(model.parameters())

In [9]:
criterion = nn.CrossEntropyLoss(ignore_index = train_set.PAD_ID)

In [10]:
from tqdm import tqdm, trange
def train(model, optimizer, criterion, clip):
    model.train()
    
    epoch_loss = 0
    batch_size = 128 #128
    data_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
    trange = tqdm(enumerate(data_loader), total=len(data_loader),desc='Train')
    for step,batch in trange:
        # print(step)
        src = batch[0]
        trg = batch[1]
        src = src.to(device)
        trg = trg.to(device)       
        optimizer.zero_grad()
        # print(trg.size())
        output,output_idx = model(src, trg,teacher_forcing_ratio=0.5)
        
        #trg = [trg sent len, batch size]
        #output = [trg sent len, batch size, output dim]
        
        output = output[:,1:].reshape(-1, output.shape[-1])
        trg = trg[:,1:].reshape(-1)
        
        #trg = [(trg sent len - 1) * batch size]
        #output = [(trg sent len - 1) * batch size, output dim]
        
        loss = criterion(output, trg)
        
        loss.backward()
        
        torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
        
        optimizer.step()
        
        epoch_loss += loss.item()
    return epoch_loss / len(data_loader)



In [11]:
def decode_string(output,dataset):
    results = []
    trg = []
    correct = 0
    for i,seq in enumerate(output):
        results.append(' '.join(['<SOS>']+dataset.vocab.indices_to_sequence(seq)))
        trg.append(' '.join(dataset.raw_trg_sent_data[i]))
        if results[-1] == trg[-1]:
            correct+=1
        
    print('-----exm-----')
    print(results[:10])
    print(trg[:10])
    return results, correct/len(output)

def evaluate(model,criterion,dataset):
    prediction = []
    model.eval()
    
    epoch_loss = 0
    data_loader = DataLoader(dataset, batch_size=512, shuffle=False)
    trange = tqdm(enumerate(data_loader), total=len(data_loader),desc='valid')
    for step, batch in trange:
        src = batch[0]
        trg = batch[1]
        src = src.to(device)
        trg = trg.to(device)
        with torch.no_grad():
            output,output_idx = model(src, trg, 0) #turn off teacher forcing
            prediction.append(output_idx.to('cpu'))

            #trg = [trg sent len, batch size]
            #output = [trg sent len, batch size, output dim]

            output = output[:,1:].reshape(-1, output.shape[-1])
            trg = trg[:,1:].reshape(-1)

            #trg = [(trg sent len - 1) * batch size]
            #output = [(trg sent len - 1) * batch size, output dim]

            loss = criterion(output, trg)

            epoch_loss += loss.item()
    prediction = torch.cat(prediction).detach().numpy().astype(int)
    prediction,acc = decode_string(prediction,dataset)
    return epoch_loss / len(data_loader),prediction,acc

In [89]:
def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs

In [90]:
N_EPOCHS = 40
CLIP = 1

best_valid_loss = float('inf')
best_valid_acc = -1
train_lm_score = -1
history = {'train':[],'val':[]}
for epoch in range(N_EPOCHS):
    start_time = time.time()
    train_loss = train(model, optimizer, criterion, CLIP)
    train_loss_2,predictions,acc = evaluate(model, criterion,train_set)
    history['train'].append({'loss':train_loss,'acc':acc})
    valid_loss,valid_predict,valid_acc = evaluate(model, criterion,test_set)
    end_time = time.time()
    epoch_mins, epoch_secs = epoch_time(start_time, end_time)
    
    if valid_acc > best_valid_acc:
        best_valid_acc = valid_acc
        torch.save(model, './output_final/dataset2/model_128_dp5_dense_LN2.pkl') 
    history['val'].append({'loss':valid_loss,'acc':valid_acc})
    print(f'Epoch: {epoch+1:02} | Time: {epoch_mins}m {epoch_secs}s')
#     print(f'\tTrain Loss: {train_loss:.3f} | Train PPL: {math.exp(train_loss):7.3f} | Train acc: {acc:.3f}| val LM: {train_lm_score:.3f}')
    print(f'\tTrain Loss: {train_loss:.3f} | Train PPL: {math.exp(train_loss):7.3f}')
    print(f'\t Val. Loss: {valid_loss:.3f} |  Val. PPL: {math.exp(valid_loss):7.3f} | val acc: {valid_acc:.3f} ')

Train: 100%|██████████| 766/766 [00:52<00:00, 14.73it/s]
valid: 100%|██████████| 192/192 [00:07<00:00, 24.56it/s]
valid:   6%|▋         | 3/48 [00:00<00:01, 24.58it/s]

-----exm-----
['<SOS> you <EOS>', '<SOS> want <EOS>', '<SOS> i <EOS>', '<SOS> in is <EOS>', "<SOS> that me me me told tom 's <EOS>", '<SOS> was he was that <EOS>', '<SOS> tom <EOS>', "<SOS> n't <EOS>", "<SOS> here got 've 've <EOS>", "<SOS> n't <EOS>"]
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']


valid: 100%|██████████| 48/48 [00:01<00:00, 25.00it/s]
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
Train:   0%|          | 2/766 [00:00<00:57, 13.28it/s]

-----exm-----
["<SOS> he n't <EOS>", '<SOS> tom <EOS>', "<SOS> that n't n't <EOS>", "<SOS> did n't that that <EOS>", "<SOS> he n't <EOS>", "<SOS> that mary he he tom n't <EOS>", "<SOS> 'll get do <EOS>", '<SOS> do <EOS>', '<SOS> i have to <EOS>', '<SOS> do to <EOS>']
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', '<SOS> mary i , <EOS>', '<SOS> said would do , i <EOS>', '<SOS> do , <EOS>', '<SOS> mary i , do would said <EOS>', '<SOS> much time i <EOS>', '<SOS> do <EOS>', '<SOS> how more do <EOS>', '<SOS> i time <EOS>']
Epoch: 01 | Time: 1m 2s
	Train Loss: 3.093 | Train PPL:  22.034
	 Val. Loss: 2.142 |  Val. PPL:   8.519 | val acc: 0.144 


Train: 100%|██████████| 766/766 [00:52<00:00, 14.67it/s]
valid: 100%|██████████| 192/192 [00:08<00:00, 22.53it/s]
valid:   6%|▋         | 3/48 [00:00<00:02, 21.87it/s]

-----exm-----
['<SOS> you <EOS>', '<SOS> i <EOS>', '<SOS> what <EOS>', '<SOS> done done <EOS>', '<SOS> do to what me me told i <EOS>', '<SOS> told mary that he <EOS>', '<SOS> tom <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']


valid: 100%|██████████| 48/48 [00:02<00:00, 23.75it/s]
Train:   0%|          | 0/766 [00:00<?, ?it/s]

-----exm-----
['<SOS> that that <EOS>', '<SOS> tom <EOS>', '<SOS> that that , <EOS>', '<SOS> said that do that , <EOS>', '<SOS> that do <EOS>', '<SOS> that that that he would tom <EOS>', '<SOS> much more i <EOS>', '<SOS> i <EOS>', '<SOS> how more do <EOS>', '<SOS> i more <EOS>']
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', '<SOS> mary i , <EOS>', '<SOS> said would do , i <EOS>', '<SOS> do , <EOS>', '<SOS> mary i , do would said <EOS>', '<SOS> much time i <EOS>', '<SOS> do <EOS>', '<SOS> how more do <EOS>', '<SOS> i time <EOS>']
Epoch: 02 | Time: 1m 3s
	Train Loss: 1.546 | Train PPL:   4.691
	 Val. Loss: 0.994 |  Val. PPL:   2.701 | val acc: 0.461 


Train: 100%|██████████| 766/766 [00:52<00:00, 14.60it/s]
valid: 100%|██████████| 192/192 [00:08<00:00, 23.93it/s]
valid:   6%|▋         | 3/48 [00:00<00:02, 22.43it/s]

-----exm-----
['<SOS> you <EOS>', "<SOS> n't <EOS>", '<SOS> what <EOS>', '<SOS> done done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told me that he <EOS>', '<SOS> is <EOS>', '<SOS> could <EOS>', "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']


valid: 100%|██████████| 48/48 [00:02<00:00, 22.18it/s]
Train:   0%|          | 0/766 [00:00<?, ?it/s]

-----exm-----
['<SOS> do that <EOS>', '<SOS> tom <EOS>', '<SOS> , but i <EOS>', '<SOS> said would do , but <EOS>', '<SOS> that , <EOS>', "<SOS> had that that n't he tom <EOS>", '<SOS> much more i <EOS>', '<SOS> more <EOS>', '<SOS> how more i <EOS>', '<SOS> i money <EOS>']
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', '<SOS> mary i , <EOS>', '<SOS> said would do , i <EOS>', '<SOS> do , <EOS>', '<SOS> mary i , do would said <EOS>', '<SOS> much time i <EOS>', '<SOS> do <EOS>', '<SOS> how more do <EOS>', '<SOS> i time <EOS>']
Epoch: 03 | Time: 1m 3s
	Train Loss: 0.780 | Train PPL:   2.182
	 Val. Loss: 0.556 |  Val. PPL:   1.744 | val acc: 0.677 


Train: 100%|██████████| 766/766 [00:45<00:00, 16.85it/s]
valid: 100%|██████████| 192/192 [00:05<00:00, 32.10it/s]
valid:   8%|▊         | 4/48 [00:00<00:01, 30.52it/s]

-----exm-----
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told me that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']


valid: 100%|██████████| 48/48 [00:01<00:00, 32.28it/s]
Train:   0%|          | 0/766 [00:00<?, ?it/s]

-----exm-----
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', '<SOS> asked i that <EOS>', '<SOS> said would do , but <EOS>', '<SOS> that , <EOS>', '<SOS> had but that do would tom <EOS>', '<SOS> much more have <EOS>', '<SOS> do <EOS>', '<SOS> how more time <EOS>', '<SOS> i money <EOS>']
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', '<SOS> mary i , <EOS>', '<SOS> said would do , i <EOS>', '<SOS> do , <EOS>', '<SOS> mary i , do would said <EOS>', '<SOS> much time i <EOS>', '<SOS> do <EOS>', '<SOS> how more do <EOS>', '<SOS> i time <EOS>']
Epoch: 04 | Time: 0m 53s
	Train Loss: 0.486 | Train PPL:   1.626
	 Val. Loss: 0.371 |  Val. PPL:   1.450 | val acc: 0.770 


Train: 100%|██████████| 766/766 [00:37<00:00, 20.36it/s]
valid: 100%|██████████| 192/192 [00:05<00:00, 32.92it/s]
valid:   8%|▊         | 4/48 [00:00<00:01, 33.35it/s]

-----exm-----
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', '<SOS> could <EOS>', "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']


valid: 100%|██████████| 48/48 [00:01<00:00, 28.83it/s]
Train:   0%|          | 2/766 [00:00<00:42, 18.14it/s]

-----exm-----
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', '<SOS> asked i that <EOS>', '<SOS> said would do , so <EOS>', '<SOS> do , <EOS>', "<SOS> he but that n't would tom <EOS>", '<SOS> much more have <EOS>', '<SOS> time <EOS>', '<SOS> how more i <EOS>', '<SOS> i more <EOS>']
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', '<SOS> mary i , <EOS>', '<SOS> said would do , i <EOS>', '<SOS> do , <EOS>', '<SOS> mary i , do would said <EOS>', '<SOS> much time i <EOS>', '<SOS> do <EOS>', '<SOS> how more do <EOS>', '<SOS> i time <EOS>']
Epoch: 05 | Time: 0m 45s
	Train Loss: 0.346 | Train PPL:   1.414
	 Val. Loss: 0.294 |  Val. PPL:   1.342 | val acc: 0.819 


Train: 100%|██████████| 766/766 [00:36<00:00, 20.77it/s]
valid: 100%|██████████| 192/192 [00:06<00:00, 31.94it/s]
valid:   8%|▊         | 4/48 [00:00<00:01, 33.16it/s]

-----exm-----
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', '<SOS> could <EOS>', "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']


valid: 100%|██████████| 48/48 [00:01<00:00, 32.10it/s]
Train:   0%|          | 2/766 [00:00<00:42, 18.09it/s]

-----exm-----
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', '<SOS> asked he but <EOS>', '<SOS> said would do , i <EOS>', '<SOS> do , <EOS>', '<SOS> asked he , do would said <EOS>', '<SOS> much more have <EOS>', '<SOS> time <EOS>', '<SOS> how more time <EOS>', '<SOS> i time <EOS>']
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', '<SOS> mary i , <EOS>', '<SOS> said would do , i <EOS>', '<SOS> do , <EOS>', '<SOS> mary i , do would said <EOS>', '<SOS> much time i <EOS>', '<SOS> do <EOS>', '<SOS> how more do <EOS>', '<SOS> i time <EOS>']
Epoch: 06 | Time: 0m 44s
	Train Loss: 0.266 | Train PPL:   1.305
	 Val. Loss: 0.245 |  Val. PPL:   1.277 | val acc: 0.844 


Train: 100%|██████████| 766/766 [00:36<00:00, 20.95it/s]
valid: 100%|██████████| 192/192 [00:05<00:00, 32.11it/s]
valid:   8%|▊         | 4/48 [00:00<00:01, 31.91it/s]

-----exm-----
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']


valid: 100%|██████████| 48/48 [00:01<00:00, 32.09it/s]
Train:   0%|          | 2/766 [00:00<00:41, 18.40it/s]

-----exm-----
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', '<SOS> asked he , <EOS>', '<SOS> said would do , so <EOS>', '<SOS> do , <EOS>', '<SOS> asked he , do would tom <EOS>', '<SOS> much more have <EOS>', '<SOS> do <EOS>', '<SOS> how more i <EOS>', '<SOS> i time <EOS>']
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', '<SOS> mary i , <EOS>', '<SOS> said would do , i <EOS>', '<SOS> do , <EOS>', '<SOS> mary i , do would said <EOS>', '<SOS> much time i <EOS>', '<SOS> do <EOS>', '<SOS> how more do <EOS>', '<SOS> i time <EOS>']
Epoch: 07 | Time: 0m 44s
	Train Loss: 0.216 | Train PPL:   1.241
	 Val. Loss: 0.218 |  Val. PPL:   1.244 | val acc: 0.864 


Train: 100%|██████████| 766/766 [00:36<00:00, 20.76it/s]
valid: 100%|██████████| 192/192 [00:05<00:00, 33.15it/s]
valid:   0%|          | 0/48 [00:00<?, ?it/s]

-----exm-----
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']


valid: 100%|██████████| 48/48 [00:01<00:00, 28.39it/s]
Train:   0%|          | 2/766 [00:00<00:42, 17.99it/s]

-----exm-----
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', '<SOS> asked i that <EOS>', '<SOS> said would do , i <EOS>', '<SOS> do , <EOS>', '<SOS> asked he , do would tom <EOS>', '<SOS> much money have <EOS>', '<SOS> do <EOS>', '<SOS> how more i <EOS>', '<SOS> i do <EOS>']
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', '<SOS> mary i , <EOS>', '<SOS> said would do , i <EOS>', '<SOS> do , <EOS>', '<SOS> mary i , do would said <EOS>', '<SOS> much time i <EOS>', '<SOS> do <EOS>', '<SOS> how more do <EOS>', '<SOS> i time <EOS>']
Epoch: 08 | Time: 0m 44s
	Train Loss: 0.182 | Train PPL:   1.199
	 Val. Loss: 0.199 |  Val. PPL:   1.221 | val acc: 0.873 


Train: 100%|██████████| 766/766 [00:37<00:00, 20.62it/s]
valid: 100%|██████████| 192/192 [00:06<00:00, 31.46it/s]
valid:   8%|▊         | 4/48 [00:00<00:01, 30.78it/s]

-----exm-----
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']


valid: 100%|██████████| 48/48 [00:01<00:00, 32.22it/s]
Train:   0%|          | 2/766 [00:00<00:41, 18.32it/s]

-----exm-----
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', '<SOS> asked i that <EOS>', '<SOS> said would do , i <EOS>', '<SOS> do , <EOS>', '<SOS> told but that do would tom <EOS>', '<SOS> much more i <EOS>', '<SOS> do <EOS>', '<SOS> how more i <EOS>', '<SOS> i time <EOS>']
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', '<SOS> mary i , <EOS>', '<SOS> said would do , i <EOS>', '<SOS> do , <EOS>', '<SOS> mary i , do would said <EOS>', '<SOS> much time i <EOS>', '<SOS> do <EOS>', '<SOS> how more do <EOS>', '<SOS> i time <EOS>']
Epoch: 09 | Time: 0m 45s
	Train Loss: 0.157 | Train PPL:   1.170
	 Val. Loss: 0.193 |  Val. PPL:   1.213 | val acc: 0.880 


Train: 100%|██████████| 766/766 [00:36<00:00, 20.77it/s]
valid: 100%|██████████| 192/192 [00:05<00:00, 32.67it/s]
valid:   8%|▊         | 4/48 [00:00<00:01, 33.39it/s]

-----exm-----
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']


valid: 100%|██████████| 48/48 [00:01<00:00, 28.96it/s]
Train:   0%|          | 2/766 [00:00<00:40, 19.09it/s]

-----exm-----
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', '<SOS> asked he so <EOS>', '<SOS> said would do , i <EOS>', '<SOS> do , <EOS>', '<SOS> told so , do he tom <EOS>', '<SOS> much time i <EOS>', '<SOS> do <EOS>', '<SOS> how more i <EOS>', '<SOS> i more <EOS>']
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', '<SOS> mary i , <EOS>', '<SOS> said would do , i <EOS>', '<SOS> do , <EOS>', '<SOS> mary i , do would said <EOS>', '<SOS> much time i <EOS>', '<SOS> do <EOS>', '<SOS> how more do <EOS>', '<SOS> i time <EOS>']
Epoch: 10 | Time: 0m 44s
	Train Loss: 0.135 | Train PPL:   1.145
	 Val. Loss: 0.179 |  Val. PPL:   1.195 | val acc: 0.889 


Train: 100%|██████████| 766/766 [00:38<00:00, 20.13it/s]
valid: 100%|██████████| 192/192 [00:05<00:00, 32.22it/s]
valid:   8%|▊         | 4/48 [00:00<00:01, 32.17it/s]

-----exm-----
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']


valid: 100%|██████████| 48/48 [00:01<00:00, 31.59it/s]
Train:   0%|          | 2/766 [00:00<00:43, 17.44it/s]

-----exm-----
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', '<SOS> told he so <EOS>', '<SOS> said would do , i <EOS>', '<SOS> do , <EOS>', '<SOS> told so , do he tom <EOS>', '<SOS> much time i <EOS>', '<SOS> do <EOS>', '<SOS> how more i <EOS>', '<SOS> i more <EOS>']
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', '<SOS> mary i , <EOS>', '<SOS> said would do , i <EOS>', '<SOS> do , <EOS>', '<SOS> mary i , do would said <EOS>', '<SOS> much time i <EOS>', '<SOS> do <EOS>', '<SOS> how more do <EOS>', '<SOS> i time <EOS>']
Epoch: 11 | Time: 0m 46s
	Train Loss: 0.121 | Train PPL:   1.129
	 Val. Loss: 0.166 |  Val. PPL:   1.181 | val acc: 0.898 


Train: 100%|██████████| 766/766 [00:37<00:00, 20.44it/s]
valid: 100%|██████████| 192/192 [00:05<00:00, 32.88it/s]
valid:   8%|▊         | 4/48 [00:00<00:01, 33.02it/s]

-----exm-----
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']


valid: 100%|██████████| 48/48 [00:01<00:00, 32.79it/s]
Train:   0%|          | 2/766 [00:00<00:40, 19.06it/s]

-----exm-----
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', '<SOS> told he so <EOS>', '<SOS> said would do , i <EOS>', '<SOS> do , <EOS>', '<SOS> asked he , do he said <EOS>', '<SOS> much time have <EOS>', '<SOS> do <EOS>', '<SOS> how time do <EOS>', '<SOS> i time <EOS>']
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', '<SOS> mary i , <EOS>', '<SOS> said would do , i <EOS>', '<SOS> do , <EOS>', '<SOS> mary i , do would said <EOS>', '<SOS> much time i <EOS>', '<SOS> do <EOS>', '<SOS> how more do <EOS>', '<SOS> i time <EOS>']
Epoch: 12 | Time: 0m 45s
	Train Loss: 0.110 | Train PPL:   1.116
	 Val. Loss: 0.163 |  Val. PPL:   1.177 | val acc: 0.898 


Train: 100%|██████████| 766/766 [00:37<00:00, 20.44it/s]
valid: 100%|██████████| 192/192 [00:06<00:00, 30.75it/s]
valid:   8%|▊         | 4/48 [00:00<00:01, 31.76it/s]

-----exm-----
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']


valid: 100%|██████████| 48/48 [00:01<00:00, 31.57it/s]
Train:   0%|          | 2/766 [00:00<00:44, 17.10it/s]

-----exm-----
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', '<SOS> to he so <EOS>', '<SOS> said would do , i <EOS>', '<SOS> do , <EOS>', '<SOS> told so , do he said <EOS>', '<SOS> much time have <EOS>', '<SOS> what <EOS>', '<SOS> how more i <EOS>', '<SOS> i more <EOS>']
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', '<SOS> mary i , <EOS>', '<SOS> said would do , i <EOS>', '<SOS> do , <EOS>', '<SOS> mary i , do would said <EOS>', '<SOS> much time i <EOS>', '<SOS> do <EOS>', '<SOS> how more do <EOS>', '<SOS> i time <EOS>']
Epoch: 13 | Time: 0m 45s
	Train Loss: 0.100 | Train PPL:   1.105
	 Val. Loss: 0.158 |  Val. PPL:   1.171 | val acc: 0.903 


Train: 100%|██████████| 766/766 [00:36<00:00, 21.07it/s]
valid: 100%|██████████| 192/192 [00:05<00:00, 33.12it/s]
valid:   8%|▊         | 4/48 [00:00<00:01, 30.67it/s]

-----exm-----
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']


valid: 100%|██████████| 48/48 [00:01<00:00, 32.09it/s]
Train:   0%|          | 2/766 [00:00<00:41, 18.53it/s]

-----exm-----
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', '<SOS> asked he that <EOS>', '<SOS> said would do , i <EOS>', '<SOS> do , <EOS>', '<SOS> asked so , do he tom <EOS>', '<SOS> much time i <EOS>', '<SOS> do <EOS>', '<SOS> how more time <EOS>', '<SOS> i do <EOS>']
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', '<SOS> mary i , <EOS>', '<SOS> said would do , i <EOS>', '<SOS> do , <EOS>', '<SOS> mary i , do would said <EOS>', '<SOS> much time i <EOS>', '<SOS> do <EOS>', '<SOS> how more do <EOS>', '<SOS> i time <EOS>']
Epoch: 14 | Time: 0m 44s
	Train Loss: 0.090 | Train PPL:   1.094
	 Val. Loss: 0.162 |  Val. PPL:   1.176 | val acc: 0.902 


Train: 100%|██████████| 766/766 [00:36<00:00, 21.08it/s]
valid: 100%|██████████| 192/192 [00:05<00:00, 32.26it/s]
valid:   8%|▊         | 4/48 [00:00<00:01, 33.06it/s]

-----exm-----
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']


valid: 100%|██████████| 48/48 [00:01<00:00, 32.74it/s]
Train:   0%|          | 2/766 [00:00<00:39, 19.22it/s]

-----exm-----
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', '<SOS> asked asked so <EOS>', '<SOS> said would do , i <EOS>', '<SOS> do , <EOS>', '<SOS> asked asked , do would said <EOS>', '<SOS> much time i <EOS>', '<SOS> what <EOS>', '<SOS> how more i <EOS>', '<SOS> i time <EOS>']
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', '<SOS> mary i , <EOS>', '<SOS> said would do , i <EOS>', '<SOS> do , <EOS>', '<SOS> mary i , do would said <EOS>', '<SOS> much time i <EOS>', '<SOS> do <EOS>', '<SOS> how more do <EOS>', '<SOS> i time <EOS>']
Epoch: 15 | Time: 0m 44s
	Train Loss: 0.084 | Train PPL:   1.087
	 Val. Loss: 0.155 |  Val. PPL:   1.168 | val acc: 0.909 


Train: 100%|██████████| 766/766 [00:37<00:00, 20.32it/s]
valid: 100%|██████████| 192/192 [00:05<00:00, 32.09it/s]
valid:   8%|▊         | 4/48 [00:00<00:01, 33.29it/s]

-----exm-----
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']


valid: 100%|██████████| 48/48 [00:01<00:00, 32.40it/s]
Train:   0%|          | 3/766 [00:00<00:34, 21.95it/s]

-----exm-----
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', '<SOS> asked mary so <EOS>', '<SOS> said would do , i <EOS>', '<SOS> do so <EOS>', '<SOS> asked so , do would said <EOS>', '<SOS> much time have <EOS>', '<SOS> do <EOS>', '<SOS> how more i <EOS>', '<SOS> i time <EOS>']
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', '<SOS> mary i , <EOS>', '<SOS> said would do , i <EOS>', '<SOS> do , <EOS>', '<SOS> mary i , do would said <EOS>', '<SOS> much time i <EOS>', '<SOS> do <EOS>', '<SOS> how more do <EOS>', '<SOS> i time <EOS>']
Epoch: 16 | Time: 0m 45s
	Train Loss: 0.077 | Train PPL:   1.080
	 Val. Loss: 0.152 |  Val. PPL:   1.165 | val acc: 0.909 


Train: 100%|██████████| 766/766 [00:36<00:00, 20.83it/s]
valid: 100%|██████████| 192/192 [00:06<00:00, 31.11it/s]
valid:   8%|▊         | 4/48 [00:00<00:01, 31.68it/s]

-----exm-----
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']


valid: 100%|██████████| 48/48 [00:01<00:00, 31.99it/s]
Train:   0%|          | 2/766 [00:00<00:40, 18.64it/s]

-----exm-----
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', '<SOS> asked mary , that <EOS>', '<SOS> said would do , i <EOS>', '<SOS> do , <EOS>', '<SOS> asked so , do would tom <EOS>', '<SOS> much time i <EOS>', '<SOS> do <EOS>', '<SOS> how more time <EOS>', '<SOS> i time <EOS>']
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', '<SOS> mary i , <EOS>', '<SOS> said would do , i <EOS>', '<SOS> do , <EOS>', '<SOS> mary i , do would said <EOS>', '<SOS> much time i <EOS>', '<SOS> do <EOS>', '<SOS> how more do <EOS>', '<SOS> i time <EOS>']
Epoch: 17 | Time: 0m 44s
	Train Loss: 0.073 | Train PPL:   1.076
	 Val. Loss: 0.164 |  Val. PPL:   1.178 | val acc: 0.906 


Train: 100%|██████████| 766/766 [00:38<00:00, 19.70it/s]
valid: 100%|██████████| 192/192 [00:06<00:00, 31.00it/s]
valid:   6%|▋         | 3/48 [00:00<00:01, 28.80it/s]

-----exm-----
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']


valid: 100%|██████████| 48/48 [00:01<00:00, 31.42it/s]
Train:   0%|          | 2/766 [00:00<00:38, 19.76it/s]

-----exm-----
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', '<SOS> asked mary , <EOS>', '<SOS> said would do , i <EOS>', '<SOS> do , <EOS>', "<SOS> asked mary , do n't said <EOS>", '<SOS> much time i <EOS>', '<SOS> do <EOS>', '<SOS> how more i <EOS>', '<SOS> i time <EOS>']
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', '<SOS> mary i , <EOS>', '<SOS> said would do , i <EOS>', '<SOS> do , <EOS>', '<SOS> mary i , do would said <EOS>', '<SOS> much time i <EOS>', '<SOS> do <EOS>', '<SOS> how more do <EOS>', '<SOS> i time <EOS>']
Epoch: 18 | Time: 0m 47s
	Train Loss: 0.067 | Train PPL:   1.070
	 Val. Loss: 0.142 |  Val. PPL:   1.152 | val acc: 0.916 


Train: 100%|██████████| 766/766 [00:37<00:00, 20.61it/s]
valid: 100%|██████████| 192/192 [00:05<00:00, 32.26it/s]
valid:   8%|▊         | 4/48 [00:00<00:01, 31.79it/s]

-----exm-----
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']


valid: 100%|██████████| 48/48 [00:01<00:00, 31.89it/s]
Train:   0%|          | 2/766 [00:00<00:43, 17.67it/s]

-----exm-----
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', '<SOS> asked i that <EOS>', '<SOS> said would do , i <EOS>', '<SOS> do , <EOS>', '<SOS> asked he , do he tom <EOS>', '<SOS> much time i <EOS>', '<SOS> do <EOS>', '<SOS> how more time <EOS>', '<SOS> i time <EOS>']
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', '<SOS> mary i , <EOS>', '<SOS> said would do , i <EOS>', '<SOS> do , <EOS>', '<SOS> mary i , do would said <EOS>', '<SOS> much time i <EOS>', '<SOS> do <EOS>', '<SOS> how more do <EOS>', '<SOS> i time <EOS>']
Epoch: 19 | Time: 0m 45s
	Train Loss: 0.062 | Train PPL:   1.064
	 Val. Loss: 0.155 |  Val. PPL:   1.168 | val acc: 0.913 


Train: 100%|██████████| 766/766 [00:37<00:00, 20.39it/s]
valid: 100%|██████████| 192/192 [00:06<00:00, 30.85it/s]
valid:   8%|▊         | 4/48 [00:00<00:01, 32.71it/s]

-----exm-----
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']


valid: 100%|██████████| 48/48 [00:01<00:00, 34.41it/s]
Train:   0%|          | 2/766 [00:00<00:39, 19.40it/s]

-----exm-----
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', '<SOS> asked i that <EOS>', '<SOS> said would do so i <EOS>', '<SOS> do so <EOS>', '<SOS> asked he , do he said <EOS>', '<SOS> much time i <EOS>', '<SOS> what <EOS>', '<SOS> how more time <EOS>', '<SOS> i time <EOS>']
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', '<SOS> mary i , <EOS>', '<SOS> said would do , i <EOS>', '<SOS> do , <EOS>', '<SOS> mary i , do would said <EOS>', '<SOS> much time i <EOS>', '<SOS> do <EOS>', '<SOS> how more do <EOS>', '<SOS> i time <EOS>']
Epoch: 20 | Time: 0m 45s
	Train Loss: 0.060 | Train PPL:   1.062
	 Val. Loss: 0.142 |  Val. PPL:   1.152 | val acc: 0.918 


Train: 100%|██████████| 766/766 [00:36<00:00, 20.99it/s]
valid: 100%|██████████| 192/192 [00:05<00:00, 32.49it/s]
valid:   8%|▊         | 4/48 [00:00<00:01, 33.93it/s]

-----exm-----
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']


valid: 100%|██████████| 48/48 [00:01<00:00, 32.38it/s]
Train:   0%|          | 2/766 [00:00<00:41, 18.27it/s]

-----exm-----
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', '<SOS> asked i so <EOS>', '<SOS> said would do , i <EOS>', '<SOS> do , <EOS>', '<SOS> asked mary , do he tom <EOS>', '<SOS> much time i <EOS>', '<SOS> do <EOS>', '<SOS> how more time <EOS>', '<SOS> i time <EOS>']
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', '<SOS> mary i , <EOS>', '<SOS> said would do , i <EOS>', '<SOS> do , <EOS>', '<SOS> mary i , do would said <EOS>', '<SOS> much time i <EOS>', '<SOS> do <EOS>', '<SOS> how more do <EOS>', '<SOS> i time <EOS>']
Epoch: 21 | Time: 0m 44s
	Train Loss: 0.056 | Train PPL:   1.058
	 Val. Loss: 0.153 |  Val. PPL:   1.165 | val acc: 0.914 


Train: 100%|██████████| 766/766 [00:35<00:00, 21.61it/s]
valid: 100%|██████████| 192/192 [00:05<00:00, 33.64it/s]
valid:   8%|▊         | 4/48 [00:00<00:01, 32.10it/s]

-----exm-----
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']


valid: 100%|██████████| 48/48 [00:01<00:00, 32.12it/s]
Train:   0%|          | 2/766 [00:00<00:42, 18.00it/s]

-----exm-----
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', '<SOS> asked mary so <EOS>', '<SOS> said would do , i <EOS>', '<SOS> do , <EOS>', '<SOS> asked mary , do would said <EOS>', '<SOS> much time i <EOS>', '<SOS> do <EOS>', '<SOS> how more time <EOS>', '<SOS> i time <EOS>']
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', '<SOS> mary i , <EOS>', '<SOS> said would do , i <EOS>', '<SOS> do , <EOS>', '<SOS> mary i , do would said <EOS>', '<SOS> much time i <EOS>', '<SOS> do <EOS>', '<SOS> how more do <EOS>', '<SOS> i time <EOS>']
Epoch: 22 | Time: 0m 43s
	Train Loss: 0.053 | Train PPL:   1.054
	 Val. Loss: 0.144 |  Val. PPL:   1.154 | val acc: 0.920 


Train: 100%|██████████| 766/766 [00:37<00:00, 20.45it/s]
valid: 100%|██████████| 192/192 [00:06<00:00, 30.65it/s]
valid:   8%|▊         | 4/48 [00:00<00:01, 31.01it/s]

-----exm-----
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']


valid: 100%|██████████| 48/48 [00:01<00:00, 32.20it/s]
Train:   0%|          | 2/766 [00:00<00:39, 19.38it/s]

-----exm-----
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', '<SOS> asked mary , that <EOS>', '<SOS> said would do , i <EOS>', '<SOS> do , <EOS>', '<SOS> asked mary , do would said <EOS>', '<SOS> much time i <EOS>', '<SOS> time <EOS>', '<SOS> how more time <EOS>', '<SOS> i time <EOS>']
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', '<SOS> mary i , <EOS>', '<SOS> said would do , i <EOS>', '<SOS> do , <EOS>', '<SOS> mary i , do would said <EOS>', '<SOS> much time i <EOS>', '<SOS> do <EOS>', '<SOS> how more do <EOS>', '<SOS> i time <EOS>']
Epoch: 23 | Time: 0m 45s
	Train Loss: 0.052 | Train PPL:   1.053
	 Val. Loss: 0.147 |  Val. PPL:   1.159 | val acc: 0.919 


Train: 100%|██████████| 766/766 [00:37<00:00, 20.66it/s]
valid: 100%|██████████| 192/192 [00:05<00:00, 32.08it/s]
valid:   8%|▊         | 4/48 [00:00<00:01, 29.67it/s]

-----exm-----
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']


valid: 100%|██████████| 48/48 [00:01<00:00, 31.97it/s]
Train:   0%|          | 2/766 [00:00<00:43, 17.55it/s]

-----exm-----
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', '<SOS> asked mary , <EOS>', '<SOS> said would do , i <EOS>', '<SOS> do , <EOS>', '<SOS> asked mary , do did said <EOS>', '<SOS> much time i <EOS>', '<SOS> do <EOS>', '<SOS> how more i <EOS>', '<SOS> i time <EOS>']
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', '<SOS> mary i , <EOS>', '<SOS> said would do , i <EOS>', '<SOS> do , <EOS>', '<SOS> mary i , do would said <EOS>', '<SOS> much time i <EOS>', '<SOS> do <EOS>', '<SOS> how more do <EOS>', '<SOS> i time <EOS>']
Epoch: 24 | Time: 0m 45s
	Train Loss: 0.048 | Train PPL:   1.049
	 Val. Loss: 0.149 |  Val. PPL:   1.161 | val acc: 0.920 


Train: 100%|██████████| 766/766 [00:35<00:00, 21.31it/s]
valid: 100%|██████████| 192/192 [00:06<00:00, 31.10it/s]
valid:   8%|▊         | 4/48 [00:00<00:01, 32.93it/s]

-----exm-----
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']


valid: 100%|██████████| 48/48 [00:01<00:00, 33.05it/s]
Train:   0%|          | 3/766 [00:00<00:34, 21.88it/s]

-----exm-----
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', '<SOS> to i so <EOS>', '<SOS> said would do , i <EOS>', '<SOS> do , <EOS>', '<SOS> asked i , do he said <EOS>', '<SOS> much time i <EOS>', '<SOS> do <EOS>', '<SOS> how more time <EOS>', '<SOS> i time <EOS>']
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', '<SOS> mary i , <EOS>', '<SOS> said would do , i <EOS>', '<SOS> do , <EOS>', '<SOS> mary i , do would said <EOS>', '<SOS> much time i <EOS>', '<SOS> do <EOS>', '<SOS> how more do <EOS>', '<SOS> i time <EOS>']
Epoch: 25 | Time: 0m 44s
	Train Loss: 0.047 | Train PPL:   1.048
	 Val. Loss: 0.147 |  Val. PPL:   1.158 | val acc: 0.920 


Train: 100%|██████████| 766/766 [00:35<00:00, 21.54it/s]
valid: 100%|██████████| 192/192 [00:05<00:00, 32.72it/s]
valid:   8%|▊         | 4/48 [00:00<00:01, 31.85it/s]

-----exm-----
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']


valid: 100%|██████████| 48/48 [00:01<00:00, 32.71it/s]
Train:   0%|          | 2/766 [00:00<00:39, 19.54it/s]

-----exm-----
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', '<SOS> asked mary so <EOS>', '<SOS> said would do , i <EOS>', '<SOS> do so <EOS>', '<SOS> asked he , do said said <EOS>', '<SOS> much time i <EOS>', '<SOS> do <EOS>', '<SOS> how time i <EOS>', '<SOS> i time <EOS>']
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', '<SOS> mary i , <EOS>', '<SOS> said would do , i <EOS>', '<SOS> do , <EOS>', '<SOS> mary i , do would said <EOS>', '<SOS> much time i <EOS>', '<SOS> do <EOS>', '<SOS> how more do <EOS>', '<SOS> i time <EOS>']
Epoch: 26 | Time: 0m 43s
	Train Loss: 0.044 | Train PPL:   1.045
	 Val. Loss: 0.153 |  Val. PPL:   1.165 | val acc: 0.919 


Train: 100%|██████████| 766/766 [00:38<00:00, 19.81it/s]
valid: 100%|██████████| 192/192 [00:05<00:00, 33.32it/s]
valid:   8%|▊         | 4/48 [00:00<00:01, 31.68it/s]

-----exm-----
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']


valid: 100%|██████████| 48/48 [00:01<00:00, 31.52it/s]
Train:   0%|          | 0/766 [00:00<?, ?it/s]

-----exm-----
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', '<SOS> to mary , <EOS>', '<SOS> said would do , mary <EOS>', '<SOS> do so <EOS>', '<SOS> to so , do would said <EOS>', '<SOS> much time i <EOS>', '<SOS> do <EOS>', '<SOS> how time i <EOS>', '<SOS> i more <EOS>']
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', '<SOS> mary i , <EOS>', '<SOS> said would do , i <EOS>', '<SOS> do , <EOS>', '<SOS> mary i , do would said <EOS>', '<SOS> much time i <EOS>', '<SOS> do <EOS>', '<SOS> how more do <EOS>', '<SOS> i time <EOS>']
Epoch: 27 | Time: 0m 46s
	Train Loss: 0.045 | Train PPL:   1.046
	 Val. Loss: 0.140 |  Val. PPL:   1.151 | val acc: 0.925 


Train: 100%|██████████| 766/766 [00:35<00:00, 21.55it/s]
valid: 100%|██████████| 192/192 [00:06<00:00, 29.96it/s]
valid:   8%|▊         | 4/48 [00:00<00:01, 32.94it/s]

-----exm-----
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']


valid: 100%|██████████| 48/48 [00:01<00:00, 32.81it/s]
Train:   0%|          | 2/766 [00:00<00:40, 18.81it/s]

-----exm-----
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', '<SOS> to mary so do <EOS>', '<SOS> said would do , i <EOS>', '<SOS> do , <EOS>', '<SOS> asked mary , do would said <EOS>', '<SOS> much time i <EOS>', '<SOS> do <EOS>', '<SOS> how more do <EOS>', '<SOS> i time <EOS>']
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', '<SOS> mary i , <EOS>', '<SOS> said would do , i <EOS>', '<SOS> do , <EOS>', '<SOS> mary i , do would said <EOS>', '<SOS> much time i <EOS>', '<SOS> do <EOS>', '<SOS> how more do <EOS>', '<SOS> i time <EOS>']
Epoch: 28 | Time: 0m 43s
	Train Loss: 0.041 | Train PPL:   1.042
	 Val. Loss: 0.142 |  Val. PPL:   1.152 | val acc: 0.920 


Train: 100%|██████████| 766/766 [00:36<00:00, 21.14it/s]
valid: 100%|██████████| 192/192 [00:05<00:00, 33.16it/s]
valid:   8%|▊         | 4/48 [00:00<00:01, 32.61it/s]

-----exm-----
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']


valid: 100%|██████████| 48/48 [00:01<00:00, 32.02it/s]
Train:   0%|          | 2/766 [00:00<00:42, 17.80it/s]

-----exm-----
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', '<SOS> to asked so , <EOS>', '<SOS> said would do , mary <EOS>', '<SOS> do , <EOS>', '<SOS> asked mary , that would said <EOS>', '<SOS> much time do <EOS>', '<SOS> do <EOS>', '<SOS> how more time <EOS>', '<SOS> i time <EOS>']
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', '<SOS> mary i , <EOS>', '<SOS> said would do , i <EOS>', '<SOS> do , <EOS>', '<SOS> mary i , do would said <EOS>', '<SOS> much time i <EOS>', '<SOS> do <EOS>', '<SOS> how more do <EOS>', '<SOS> i time <EOS>']
Epoch: 29 | Time: 0m 44s
	Train Loss: 0.040 | Train PPL:   1.041
	 Val. Loss: 0.137 |  Val. PPL:   1.147 | val acc: 0.927 


Train: 100%|██████████| 766/766 [00:37<00:00, 20.60it/s]
valid: 100%|██████████| 192/192 [00:06<00:00, 31.99it/s]
valid:   8%|▊         | 4/48 [00:00<00:01, 31.60it/s]

-----exm-----
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']


valid: 100%|██████████| 48/48 [00:01<00:00, 31.85it/s]
Train:   0%|          | 3/766 [00:00<00:36, 20.87it/s]

-----exm-----
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', '<SOS> asked mary so do <EOS>', '<SOS> said would do , mary <EOS>', '<SOS> do , <EOS>', '<SOS> asked so , do would said <EOS>', '<SOS> much time i <EOS>', '<SOS> do <EOS>', '<SOS> how time time <EOS>', '<SOS> i time <EOS>']
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', '<SOS> mary i , <EOS>', '<SOS> said would do , i <EOS>', '<SOS> do , <EOS>', '<SOS> mary i , do would said <EOS>', '<SOS> much time i <EOS>', '<SOS> do <EOS>', '<SOS> how more do <EOS>', '<SOS> i time <EOS>']
Epoch: 30 | Time: 0m 45s
	Train Loss: 0.037 | Train PPL:   1.037
	 Val. Loss: 0.143 |  Val. PPL:   1.153 | val acc: 0.925 


Train: 100%|██████████| 766/766 [00:38<00:00, 19.84it/s]
valid: 100%|██████████| 192/192 [00:05<00:00, 32.77it/s]
valid:   8%|▊         | 4/48 [00:00<00:01, 33.68it/s]

-----exm-----
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']


valid: 100%|██████████| 48/48 [00:01<00:00, 32.31it/s]
Train:   0%|          | 2/766 [00:00<00:44, 17.20it/s]

-----exm-----
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', '<SOS> to mary so <EOS>', '<SOS> said would do , i <EOS>', '<SOS> do , <EOS>', '<SOS> asked asked , do would tom <EOS>', '<SOS> much time i <EOS>', '<SOS> do <EOS>', '<SOS> how more i <EOS>', '<SOS> i time <EOS>']
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', '<SOS> mary i , <EOS>', '<SOS> said would do , i <EOS>', '<SOS> do , <EOS>', '<SOS> mary i , do would said <EOS>', '<SOS> much time i <EOS>', '<SOS> do <EOS>', '<SOS> how more do <EOS>', '<SOS> i time <EOS>']
Epoch: 31 | Time: 0m 46s
	Train Loss: 0.039 | Train PPL:   1.040
	 Val. Loss: 0.141 |  Val. PPL:   1.152 | val acc: 0.927 


Train: 100%|██████████| 766/766 [00:37<00:00, 20.44it/s]
valid: 100%|██████████| 192/192 [00:05<00:00, 32.78it/s]
valid:   6%|▋         | 3/48 [00:00<00:01, 29.27it/s]

-----exm-----
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']


valid: 100%|██████████| 48/48 [00:01<00:00, 32.05it/s]
Train:   0%|          | 0/766 [00:00<?, ?it/s]

-----exm-----
['<SOS> do that <EOS>', '<SOS> tom <EOS>', '<SOS> to mary so do <EOS>', '<SOS> said would do , i <EOS>', '<SOS> do so <EOS>', '<SOS> asked asked , do would said <EOS>', '<SOS> much time i <EOS>', '<SOS> do <EOS>', '<SOS> how more time <EOS>', '<SOS> i time <EOS>']
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', '<SOS> mary i , <EOS>', '<SOS> said would do , i <EOS>', '<SOS> do , <EOS>', '<SOS> mary i , do would said <EOS>', '<SOS> much time i <EOS>', '<SOS> do <EOS>', '<SOS> how more do <EOS>', '<SOS> i time <EOS>']
Epoch: 32 | Time: 0m 45s
	Train Loss: 0.035 | Train PPL:   1.035
	 Val. Loss: 0.135 |  Val. PPL:   1.145 | val acc: 0.928 


Train: 100%|██████████| 766/766 [00:37<00:00, 20.26it/s]
valid: 100%|██████████| 192/192 [00:06<00:00, 30.42it/s]
valid:   8%|▊         | 4/48 [00:00<00:01, 32.25it/s]

-----exm-----
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']


valid: 100%|██████████| 48/48 [00:01<00:00, 32.63it/s]
Train:   0%|          | 2/766 [00:00<00:41, 18.51it/s]

-----exm-----
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', '<SOS> asked mary so do <EOS>', '<SOS> said would do , i <EOS>', '<SOS> do , <EOS>', '<SOS> asked asked , do would said <EOS>', '<SOS> much time i <EOS>', '<SOS> do <EOS>', '<SOS> how time what <EOS>', '<SOS> i time <EOS>']
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', '<SOS> mary i , <EOS>', '<SOS> said would do , i <EOS>', '<SOS> do , <EOS>', '<SOS> mary i , do would said <EOS>', '<SOS> much time i <EOS>', '<SOS> do <EOS>', '<SOS> how more do <EOS>', '<SOS> i time <EOS>']
Epoch: 33 | Time: 0m 46s
	Train Loss: 0.036 | Train PPL:   1.037
	 Val. Loss: 0.130 |  Val. PPL:   1.139 | val acc: 0.930 


Train: 100%|██████████| 766/766 [00:37<00:00, 20.63it/s]
valid: 100%|██████████| 192/192 [00:06<00:00, 31.75it/s]
valid:   8%|▊         | 4/48 [00:00<00:01, 31.65it/s]

-----exm-----
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']


valid: 100%|██████████| 48/48 [00:01<00:00, 31.63it/s]
Train:   0%|          | 3/766 [00:00<00:34, 22.10it/s]

-----exm-----
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', '<SOS> to asked so do <EOS>', '<SOS> said would do , i <EOS>', '<SOS> do , <EOS>', '<SOS> asked so , do would said <EOS>', '<SOS> much time we <EOS>', '<SOS> do <EOS>', '<SOS> how more what <EOS>', '<SOS> i time <EOS>']
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', '<SOS> mary i , <EOS>', '<SOS> said would do , i <EOS>', '<SOS> do , <EOS>', '<SOS> mary i , do would said <EOS>', '<SOS> much time i <EOS>', '<SOS> do <EOS>', '<SOS> how more do <EOS>', '<SOS> i time <EOS>']
Epoch: 34 | Time: 0m 45s
	Train Loss: 0.033 | Train PPL:   1.033
	 Val. Loss: 0.143 |  Val. PPL:   1.154 | val acc: 0.927 


Train: 100%|██████████| 766/766 [00:37<00:00, 20.59it/s]
valid: 100%|██████████| 192/192 [00:06<00:00, 31.22it/s]
valid:   8%|▊         | 4/48 [00:00<00:01, 32.85it/s]

-----exm-----
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']


valid: 100%|██████████| 48/48 [00:01<00:00, 32.51it/s]
Train:   0%|          | 0/766 [00:00<?, ?it/s]

-----exm-----
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', '<SOS> me asked so <EOS>', '<SOS> said would do , i <EOS>', '<SOS> do so <EOS>', '<SOS> asked asked , that would said <EOS>', '<SOS> much time i <EOS>', '<SOS> do <EOS>', '<SOS> how more time <EOS>', '<SOS> i time <EOS>']
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', '<SOS> mary i , <EOS>', '<SOS> said would do , i <EOS>', '<SOS> do , <EOS>', '<SOS> mary i , do would said <EOS>', '<SOS> much time i <EOS>', '<SOS> do <EOS>', '<SOS> how more do <EOS>', '<SOS> i time <EOS>']
Epoch: 35 | Time: 0m 45s
	Train Loss: 0.032 | Train PPL:   1.032
	 Val. Loss: 0.131 |  Val. PPL:   1.140 | val acc: 0.931 


Train: 100%|██████████| 766/766 [00:36<00:00, 20.87it/s]
valid: 100%|██████████| 192/192 [00:05<00:00, 32.64it/s]
valid:   8%|▊         | 4/48 [00:00<00:01, 32.38it/s]

-----exm-----
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']


valid: 100%|██████████| 48/48 [00:01<00:00, 31.08it/s]
Train:   0%|          | 2/766 [00:00<00:42, 17.82it/s]

-----exm-----
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', '<SOS> asked i so do <EOS>', '<SOS> said would do , i <EOS>', '<SOS> do so <EOS>', '<SOS> asked asked , do would said <EOS>', '<SOS> much time i <EOS>', '<SOS> do <EOS>', '<SOS> how time do <EOS>', '<SOS> i time <EOS>']
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', '<SOS> mary i , <EOS>', '<SOS> said would do , i <EOS>', '<SOS> do , <EOS>', '<SOS> mary i , do would said <EOS>', '<SOS> much time i <EOS>', '<SOS> do <EOS>', '<SOS> how more do <EOS>', '<SOS> i time <EOS>']
Epoch: 36 | Time: 0m 44s
	Train Loss: 0.033 | Train PPL:   1.033
	 Val. Loss: 0.141 |  Val. PPL:   1.151 | val acc: 0.925 


Train: 100%|██████████| 766/766 [00:36<00:00, 20.86it/s]
valid: 100%|██████████| 192/192 [00:06<00:00, 30.95it/s]
valid:   8%|▊         | 4/48 [00:00<00:01, 31.54it/s]

-----exm-----
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']


valid: 100%|██████████| 48/48 [00:01<00:00, 27.99it/s]
Train:   0%|          | 2/766 [00:00<00:39, 19.43it/s]

-----exm-----
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', '<SOS> to mary , <EOS>', '<SOS> said would do , i <EOS>', '<SOS> do , <EOS>', '<SOS> asked mary , do would said <EOS>', '<SOS> much time i <EOS>', '<SOS> do <EOS>', '<SOS> how more time <EOS>', '<SOS> i time <EOS>']
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', '<SOS> mary i , <EOS>', '<SOS> said would do , i <EOS>', '<SOS> do , <EOS>', '<SOS> mary i , do would said <EOS>', '<SOS> much time i <EOS>', '<SOS> do <EOS>', '<SOS> how more do <EOS>', '<SOS> i time <EOS>']
Epoch: 37 | Time: 0m 45s
	Train Loss: 0.031 | Train PPL:   1.032
	 Val. Loss: 0.140 |  Val. PPL:   1.151 | val acc: 0.929 


Train: 100%|██████████| 766/766 [00:37<00:00, 20.60it/s]
valid: 100%|██████████| 192/192 [00:05<00:00, 32.78it/s]
valid:   8%|▊         | 4/48 [00:00<00:01, 31.97it/s]

-----exm-----
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']


valid: 100%|██████████| 48/48 [00:01<00:00, 31.88it/s]
Train:   0%|          | 3/766 [00:00<00:34, 22.14it/s]

-----exm-----
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', "<SOS> mary asked so n't <EOS>", '<SOS> said would do , i <EOS>', '<SOS> do so <EOS>', '<SOS> asked mary , do did said <EOS>', '<SOS> much time i <EOS>', '<SOS> do <EOS>', '<SOS> how more i <EOS>', '<SOS> i time <EOS>']
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', '<SOS> mary i , <EOS>', '<SOS> said would do , i <EOS>', '<SOS> do , <EOS>', '<SOS> mary i , do would said <EOS>', '<SOS> much time i <EOS>', '<SOS> do <EOS>', '<SOS> how more do <EOS>', '<SOS> i time <EOS>']
Epoch: 38 | Time: 0m 45s
	Train Loss: 0.030 | Train PPL:   1.031
	 Val. Loss: 0.142 |  Val. PPL:   1.153 | val acc: 0.929 


Train: 100%|██████████| 766/766 [00:36<00:00, 21.13it/s]
valid: 100%|██████████| 192/192 [00:05<00:00, 32.49it/s]
valid:   8%|▊         | 4/48 [00:00<00:01, 32.19it/s]

-----exm-----
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']


valid: 100%|██████████| 48/48 [00:01<00:00, 27.70it/s]
Train:   0%|          | 3/766 [00:00<00:35, 21.57it/s]

-----exm-----
['<SOS> do that <EOS>', '<SOS> tom <EOS>', '<SOS> asked i , so <EOS>', '<SOS> said would do , i <EOS>', '<SOS> do so <EOS>', '<SOS> asked asked , do would said <EOS>', '<SOS> much time i <EOS>', '<SOS> do <EOS>', '<SOS> how more i <EOS>', '<SOS> i time <EOS>']
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', '<SOS> mary i , <EOS>', '<SOS> said would do , i <EOS>', '<SOS> do , <EOS>', '<SOS> mary i , do would said <EOS>', '<SOS> much time i <EOS>', '<SOS> do <EOS>', '<SOS> how more do <EOS>', '<SOS> i time <EOS>']
Epoch: 39 | Time: 0m 44s
	Train Loss: 0.031 | Train PPL:   1.031
	 Val. Loss: 0.133 |  Val. PPL:   1.142 | val acc: 0.929 


Train: 100%|██████████| 766/766 [00:38<00:00, 20.16it/s]
valid: 100%|██████████| 192/192 [00:05<00:00, 32.99it/s]
valid:   8%|▊         | 4/48 [00:00<00:01, 33.25it/s]

-----exm-----
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']
['<SOS> you <EOS>', '<SOS> did <EOS>', '<SOS> what <EOS>', '<SOS> well done <EOS>', '<SOS> do to what me told just tom <EOS>', '<SOS> told him that he <EOS>', '<SOS> is <EOS>', "<SOS> n't <EOS>", "<SOS> before here been 've <EOS>", '<SOS> know <EOS>']


valid: 100%|██████████| 48/48 [00:01<00:00, 32.57it/s]


-----exm-----
['<SOS> do , <EOS>', '<SOS> tom <EOS>', '<SOS> to mary so <EOS>', '<SOS> said would do , mary <EOS>', '<SOS> do so <EOS>', '<SOS> asked asked , do would said <EOS>', '<SOS> much time i <EOS>', '<SOS> do <EOS>', '<SOS> how more time <EOS>', '<SOS> i time <EOS>']
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', '<SOS> mary i , <EOS>', '<SOS> said would do , i <EOS>', '<SOS> do , <EOS>', '<SOS> mary i , do would said <EOS>', '<SOS> much time i <EOS>', '<SOS> do <EOS>', '<SOS> how more do <EOS>', '<SOS> i time <EOS>']
Epoch: 40 | Time: 0m 45s
	Train Loss: 0.029 | Train PPL:   1.030
	 Val. Loss: 0.136 |  Val. PPL:   1.145 | val acc: 0.934 


In [91]:
train_set.vocab.OOV

0

In [22]:
import matplotlib.pyplot as plt
%matplotlib inline
def plot_hist(history):
    train_loss = [l['loss'] for l in history['train']]
    valid_loss = [l['loss'] for l in history['val']]
    train_f1 = [l['acc'] for l in history['train']]
    valid_f1 = [l['acc'] for l in history['val']]

    plt.figure(figsize=(7,5))
    plt.title('Loss')
    plt.plot(train_loss, label='train')
    plt.plot(valid_loss, label='valid')
    plt.legend()
    plt.show()

    plt.figure(figsize=(7,5))
    plt.title('acc')
    plt.plot(train_f1, label='train')
    plt.plot(valid_f1, label='valid')
    plt.legend()
    plt.show()

    print('Best acc ', max([[l['acc'], idx] for idx, l in enumerate(history['val'])]))

In [95]:
import pickle
pickle.dump(history,open('./output_final/history_final.pkl','wb'))

In [34]:
history

{'train': [{'loss': 2.331152370588307, 'acc': 0.9393649335244064, 'lm': -1},
  {'loss': 2.0850237941289964, 'acc': 0.9468304958071351, 'lm': -1},
  {'loss': 2.0284336240161442, 'acc': 0.9543234848432904, 'lm': -1},
  {'loss': 1.9979701880218481, 'acc': 0.9506140164423387, 'lm': -1},
  {'loss': 1.9783137987972834, 'acc': 0.9532771542000644, 'lm': -1},
  {'loss': 1.9628462804834823, 'acc': 0.9535212523055615, 'lm': -1},
  {'loss': 1.9527147529584306, 'acc': 0.9531921312644419, 'lm': -1},
  {'loss': 1.941058111458635, 'acc': 0.9540080771788841, 'lm': -1},
  {'loss': 1.9341200902124278, 'acc': 0.9549309874316902, 'lm': -1},
  {'loss': 1.926205194831334, 'acc': 0.9548939613145643, 'lm': -1}],
 'val': [{'loss': 9.203364372253418, 'acc': 0.94, 'lm': 54.71903576136504},
  {'loss': 9.289450645446777, 'acc': 0.96, 'lm': 54.71903576136504},
  {'loss': 9.13467025756836, 'acc': 0.96, 'lm': 54.71903576136504},
  {'loss': 9.402077674865723, 'acc': 0.97, 'lm': 54.71903576136504},
  {'loss': 9.18008804

In [93]:
vocab_save={}
vocab_save['word2idx'] = train_set.vocab.word2idx; vocab_save['idx2word'] = train_set.vocab.idx2word
vocab_save['num_words'] = train_set.vocab.num_words
pickle.dump(vocab_save,open('./output_final/vocab_final.pkl','wb'))

In [12]:
vocab = Vocabulary()
vocab_dic = pickle.load(open('./output_final/vocab_final.pkl','rb'))
vocab.word2idx = vocab_dic['word2idx']; vocab.idx2word = vocab_dic['idx2word']
vocab.num_words = vocab_dic['num_words']
model_load = torch.load(open('./output_final/model_final_9336.pkl','rb'))
# test_set = TestData(data_dir+'hw2.1-1_sample_testing_data.txt',max_length=train_set.max_length,vocab=vocab)
criterion = nn.CrossEntropyLoss(ignore_index = test_set.PAD_ID)
valid_loss,valid_predict,valid_acc = evaluate(model_load, criterion,test_set)
print(len(valid_predict),valid_acc)
# f_out = open('./output/task2_1_1_sample_predictions.txt','w')
# for row in valid_predict:
#     f_out.write(row+'\n')
# f_out.close()

valid: 100%|██████████| 48/48 [00:01<00:00, 27.45it/s]


-----exm-----
['<SOS> do , <EOS>', '<SOS> tom <EOS>', '<SOS> to mary so <EOS>', '<SOS> said would do , mary <EOS>', '<SOS> do so <EOS>', '<SOS> asked asked , do would said <EOS>', '<SOS> much time i <EOS>', '<SOS> do <EOS>', '<SOS> how more time <EOS>', '<SOS> i time <EOS>']
["<SOS> n't that <EOS>", '<SOS> tom <EOS>', '<SOS> mary i , <EOS>', '<SOS> said would do , i <EOS>', '<SOS> do , <EOS>', '<SOS> mary i , do would said <EOS>', '<SOS> much time i <EOS>', '<SOS> do <EOS>', '<SOS> how more do <EOS>', '<SOS> i time <EOS>']
24510 0.9336597307221542


In [33]:
print(len(valid_predict))
print(valid_acc)

98040
0.7814973480212158
