In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
!ls "gdrive/MyDrive/wikitext-2" # check that it has successfully connected
# files should be at ur GDrive inside folder wikitext-2
!cp "gdrive/MyDrive/wikitext-2/wiki.test.tokens.txt" "test.txt" # copy the files to colab runtime
!cp "gdrive/MyDrive/wikitext-2/wiki.train.tokens.txt" "train.txt"
!cp "gdrive/MyDrive/wikitext-2/wiki.valid.tokens.txt" "valid.txt"

wiki.test.tokens      wiki.train.tokens      wiki.valid.tokens
wiki.test.tokens.txt  wiki.train.tokens.txt  wiki.valid.tokens.txt


# Preprocessing

In [None]:
import os
from io import open
import torch

class Dictionary(object):
    def __init__(self):
        self.word2idx = {}
        self.idx2word = []

    def add_word(self, word):
        if word not in self.word2idx:
            self.idx2word.append(word)
            self.word2idx[word] = len(self.idx2word) - 1
        return self.word2idx[word]

    def __len__(self):
        return len(self.idx2word)


class Corpus(object):
    def __init__(self, path):
        self.dictionary = Dictionary()
        self.train = self.tokenize(os.path.join(path, 'train.txt'))
        self.valid = self.tokenize(os.path.join(path, 'valid.txt'))
        self.test = self.tokenize(os.path.join(path, 'test.txt'))

    def tokenize(self, path):
        """Tokenizes a text file."""
        assert os.path.exists(path)
        # Add words to the dictionary
        with open(path, 'r', encoding="utf8") as f:
            for line in f:
                # remove the headers e.g.  = = Description = = 
                if line.startswith('='): 
                    continue
                words = line.split() + ['<eos>']
                for word in words:
                    self.dictionary.add_word(word.lower()) # make to lower

        # Tokenize file content
        with open(path, 'r', encoding="utf8") as f:
            idss = []
            for line in f:
                words = line.split() + ['<eos>']
                ids = []
                for word in words:
                    ids.append(self.dictionary.word2idx[word.lower()]) # make to lower
                idss.append(torch.tensor(ids).type(torch.int64))
            ids = torch.cat(idss)

        return ids
    
    @property
    def vocab_size(self):
        return len(self.dictionary.idx2word)

# Params

In [None]:
#=== params
corpus = Corpus('/content')
n_class = corpus.vocab_size
n_step = 7 # n-1 in paper
n_hidden = 200 # h in paper
embed_size = 200       # m in paper
batch_size = 128
order = n_step # order (int): the order of the language model, i.e. length of the history
epochs = 100
learning_rate = 0.001
cuda = torch.cuda.is_available()
seed = 42
clip = 2.0
#===

# Model

In [None]:
#== MODEL ==#
import math
import torch
import torch.nn as nn
import torch.nn.functional as F

'''
    the neural model learns the distributed representation of each word 
    (embedding matrix C) and 
    the probability function of a word sequence as a function of their distributed representations. 
    It has a hidden layer with 
    tanh activation and the output layer is a 
    Softmax layer. 
    The output of the model for each 
    input of (n - 1) previous words are the 
    probabilities over the |V | words in the vocabulary for the next word.
'''
class FNNModel(nn.Module):
    def __init__(self, vocab_size, embed_size, context_size, no_hidden, dropout=0.5, tie_weight=True):
        super(FNNModel,self).__init__()
        """
        Args:
            n_class (int): no. of vocabulary
            m (int): size of each embedding vector
#n-gram models construct tables of conditional probabilities for the next word, 
#for each one of a large number of contexts, i.e. combinations of the last n − 1 words
            n_step (int): n-1 in paper. #n_step + 1 = n-gram. if n_step = 1, bigram
            n_hidden (int): no. of hidden units associated with each word
        """
        self.embeddings = nn.Embedding(vocab_size, embed_size)
        self.linear1 = nn.Linear(context_size * embed_size, no_hidden)
        self.dropout = nn.Dropout(p=dropout)
        self.linear2 = nn.Linear(no_hidden, vocab_size)
        self.context_size = context_size
        self.embed_size = embed_size
        if tie_weight:
            self.linear2.weight = self.embeddings.weight

    def forward(self,inputs):
        embeds = self.embeddings(inputs).view((-1, self.context_size * self.embed_size))
        hidden_output = self.linear1(embeds)
        out = hidden_output.tanh()
        out = self.dropout(out)
        out = self.linear2(out)
        log_probs = F.log_softmax(out, dim=1) # [1000, 28912]: softmax on 28912's dim
        return log_probs

# Data Loading

In [None]:
def batchify(data, batch_size):
    # Work out how cleanly we can divide the dataset into args.batch_size parts.
    nbatch = data.size(0) // batch_size
    # Trim off any extra elements that wouldn't cleanly fit (remainders).
    data = data.narrow(0, 0, nbatch * batch_size)
    # Evenly divide the data across the batch_size batches.
    data = data.view(batch_size, -1).t().contiguous()
    return data
def get_batch(data, i, order):
    x = torch.autograd.Variable(torch.t(data[i:i+order]))
    y = torch.autograd.Variable(data[i+order].view(-1))
    return x, y
def evaluate(data, model, criterion):
	model.eval()
	total_loss = 0
	n_steps = data.size(0) - order - 1
	for i in tqdm(range(n_steps)):
		x, y = get_batch(data, i, order)
		out = model(x)
		loss = criterion(out, y)
		total_loss += loss.data.data
	return total_loss / n_steps

In [None]:
def clock_time(s):
    h, s = divmod(s, 3600)
    m, s = divmod(s, 60)
    return int(h), int(m), int(s)

In [None]:
import numpy as np
import torch.optim as optim
from tqdm import tqdm
import time


train_data = batchify(corpus.train, batch_size)
val_data = batchify(corpus.valid, batch_size)
test_data = batchify(corpus.test, batch_size)
if cuda:
	train_data, val_data, test_data = train_data.cuda(), val_data.cuda(), test_data.cuda()
print('Using cuda: {}'.format(cuda))
print('Size of training set: {:,} tokens'.format(np.prod(train_data.size())))
print('Size of validation set: {:,} tokens'.format(np.prod(val_data.size())))
print('Size of test set: {:,} tokens'.format(np.prod(test_data.size())))
print('Vocabulary size: {:,}'.format(corpus.vocab_size))
print('Example data:')
for k in range(100, 107):
    x = [corpus.dictionary.idx2word[i] for i in train_data[k:order+k, 0]]
    y = [corpus.dictionary.idx2word[train_data[k+order, 0]]]
    print(x, y)
#=== initialise model
model = FNNModel(
    n_class, 
    embed_size, 
    n_step, 
    n_hidden,
    tie_weight=True
    )
if cuda:
  model.cuda()
# Display the model's architecture
print('Model: \n', model)
criterion = nn.NLLLoss()
optimizer = optim.RMSprop(model.parameters(),lr=learning_rate)

Using cuda: True
Size of training set: 2,088,576 tokens
Size of validation set: 217,600 tokens
Size of test set: 245,504 tokens
Vocabulary size: 28,912
Example data:
['"', 'nameless', '"', ',', 'a', 'penal', 'military'] ['unit']
['nameless', '"', ',', 'a', 'penal', 'military', 'unit'] ['serving']
['"', ',', 'a', 'penal', 'military', 'unit', 'serving'] ['the']
[',', 'a', 'penal', 'military', 'unit', 'serving', 'the'] ['nation']
['a', 'penal', 'military', 'unit', 'serving', 'the', 'nation'] ['of']
['penal', 'military', 'unit', 'serving', 'the', 'nation', 'of'] ['gallia']
['military', 'unit', 'serving', 'the', 'nation', 'of', 'gallia'] ['during']
Model: 
 FNNModel(
  (embeddings): Embedding(28912, 200)
  (linear1): Linear(in_features=1400, out_features=200, bias=True)
  (dropout): Dropout(p=0.5, inplace=False)
  (linear2): Linear(in_features=200, out_features=28912, bias=True)
)


In [None]:
!nvidia-smi

Sat Nov 28 07:39:06 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 455.38       Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   62C    P0    31W /  70W |   1011MiB / 15079MiB |      1%      Default |
|                               |                      |                 ERR! |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
# Set seed for reproducibility.
torch.manual_seed(seed)
np.random.seed(seed)
parameters = [param for param in model.parameters() if param.requires_grad]
# Training
print('Training...')
losses = dict(train=[], val=[])

# initialize the early_stopping counter
stop_counter = 0

lr = learning_rate # so that can alter later if SGD not descending 
best_val_loss = None

num_steps = train_data.size(0) - order - 1
batch_order = np.arange(num_steps)

t0 = time.time()
try:
    for epoch in range(1, epochs+1):
        model.train()
        epoch_start_time = time.time()
        np.random.shuffle(batch_order)

        for step in range(1, num_steps+1):
            idx = batch_order[step-1]
            x, y = get_batch(train_data, idx, order)

            model.zero_grad()
            # Forward pass
            logits = model(x)
            loss = criterion(logits, y)
            debug = False
            #   if debug:
            #     # Debugging softmax approximation.
            #     xe = nn.CrossEntropyLoss()
            #     true_loss = xe(logits, y)
            #     print('approx {:>3.2f}, true {:>3.2f}, diff {:>3.4f}'.format(
            #       loss.data, true_loss.data, true_loss.data - loss.data))

            # Update parameters
            loss.backward()

            torch.nn.utils.clip_grad_norm_(model.parameters(), clip) # reduce exploding grad
            optimizer.step()

            # Save loss.
            losses['train'].append(loss.cpu().data)
            print_every = 10000
            if step % print_every == 0:
                avg_loss = sum(losses['train'][-print_every:]) / print_every
                t1 = time.time()
                steps_per_second = print_every / (t1 - t0)
                print('| epoch {} | step {}/{} | loss {:.4f} | lr {:.5f} | '
                    'ngrams/sec {:.1f} | eta {}h{}m{}s'.format(
                    epoch, step, num_steps, avg_loss, lr,
                    steps_per_second * batch_size,
                    *clock_time((num_steps - step) / steps_per_second)))
                t0 = time.time()
            
        print('Evaluating on validation set...')
        val_loss = evaluate(val_data, model, criterion)
        losses['val'].append(val_loss)
        print('-' * 89)
        print('| end of epoch {:3d} | time {:5.2f}s | valid loss {:5.2f} | valid ppl {:8.2f}'.format(
            epoch, (time.time() - epoch_start_time), val_loss, torch.exp(val_loss)))
        print('-' * 89)

        if not best_val_loss or val_loss < best_val_loss:
            stop_counter = 0 # reset counter
            best_val_loss = val_loss
            print('| saving current state of model ...')
            torch.save(model.state_dict(), 'checkpoint.pth')
            #=== download checkpoint file
            # files.download('checkpoint.pth')
        elif val_loss < best_val_loss and val_loss < losses['val'][-2] and val_loss < torch.mean(torch.stack(losses['val'])): # curr loss less than best loss and previous loss
            stop_counter += 1
            if stop_counter >= 10:
                print("Early stopping")
                break
except KeyboardInterrupt:
    print('-' * 89)
    print('Exiting from training early')
    
# write_losses(losses['train'], args.log_dir, name='train-losses')
# write_losses(losses['val'], args.log_dir, name='val-losses')

print('Evaluating on test set...')
test_loss = evaluate(test_data, model, criterion)
print('=' * 89)
print('| End of training | test loss {:5.2f} | test ppl {:8.2f}'.format(
    test_loss, torch.exp(test_loss)))
print('=' * 89)

Training...
| epoch 1 | step 10000/16309 | loss 16.0063 | lr 0.00100 | ngrams/sec 23144.1 | eta 0h0m34s


 11%|█▏        | 194/1692 [00:00<00:00, 1937.83it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 995.90it/s]


-----------------------------------------------------------------------------------------
| end of epoch   1 | time 91.49s | valid loss  7.68 | valid ppl  2166.14
-----------------------------------------------------------------------------------------
| saving current state of model ...
| epoch 2 | step 10000/16309 | loss 9.4076 | lr 0.00100 | ngrams/sec 14060.8 | eta 0h0m57s


 12%|█▏        | 196/1692 [00:00<00:00, 1941.06it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 1001.42it/s]


-----------------------------------------------------------------------------------------
| end of epoch   2 | time 90.96s | valid loss  7.41 | valid ppl  1655.77
-----------------------------------------------------------------------------------------
| saving current state of model ...
| epoch 3 | step 10000/16309 | loss 9.2057 | lr 0.00100 | ngrams/sec 14005.8 | eta 0h0m57s


 11%|█▏        | 194/1692 [00:00<00:00, 1923.44it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 990.29it/s]


-----------------------------------------------------------------------------------------
| end of epoch   3 | time 91.37s | valid loss  7.34 | valid ppl  1539.20
-----------------------------------------------------------------------------------------
| saving current state of model ...
| epoch 4 | step 10000/16309 | loss 9.1704 | lr 0.00100 | ngrams/sec 13998.9 | eta 0h0m57s


 11%|█▏        | 194/1692 [00:00<00:00, 1920.42it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 991.21it/s]


-----------------------------------------------------------------------------------------
| end of epoch   4 | time 91.00s | valid loss  7.28 | valid ppl  1448.28
-----------------------------------------------------------------------------------------
| saving current state of model ...
| epoch 5 | step 10000/16309 | loss 9.1495 | lr 0.00100 | ngrams/sec 14094.1 | eta 0h0m57s


 11%|█▏        | 194/1692 [00:00<00:00, 1936.87it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 999.33it/s]


-----------------------------------------------------------------------------------------
| end of epoch   5 | time 90.44s | valid loss  7.26 | valid ppl  1428.58
-----------------------------------------------------------------------------------------
| saving current state of model ...
| epoch 6 | step 10000/16309 | loss 9.1404 | lr 0.00100 | ngrams/sec 14153.7 | eta 0h0m57s


 11%|█         | 190/1692 [00:00<00:00, 1893.68it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 987.70it/s]


-----------------------------------------------------------------------------------------
| end of epoch   6 | time 90.38s | valid loss  7.28 | valid ppl  1452.14
-----------------------------------------------------------------------------------------
| epoch 7 | step 10000/16309 | loss 9.1187 | lr 0.00100 | ngrams/sec 14169.3 | eta 0h0m56s


 11%|█▏        | 194/1692 [00:00<00:00, 1923.52it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 985.41it/s]


-----------------------------------------------------------------------------------------
| end of epoch   7 | time 90.02s | valid loss  7.18 | valid ppl  1317.75
-----------------------------------------------------------------------------------------
| saving current state of model ...
| epoch 8 | step 10000/16309 | loss 9.0913 | lr 0.00100 | ngrams/sec 14207.3 | eta 0h0m56s


 11%|█▏        | 193/1692 [00:00<00:00, 1927.04it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 991.34it/s]


-----------------------------------------------------------------------------------------
| end of epoch   8 | time 89.85s | valid loss  7.17 | valid ppl  1298.09
-----------------------------------------------------------------------------------------
| saving current state of model ...
| epoch 9 | step 10000/16309 | loss 9.0727 | lr 0.00100 | ngrams/sec 14251.5 | eta 0h0m56s


 11%|█▏        | 192/1692 [00:00<00:00, 1913.97it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 996.53it/s]


-----------------------------------------------------------------------------------------
| end of epoch   9 | time 89.58s | valid loss  7.21 | valid ppl  1357.54
-----------------------------------------------------------------------------------------
| epoch 10 | step 10000/16309 | loss 9.0587 | lr 0.00100 | ngrams/sec 14283.8 | eta 0h0m56s


 11%|█▏        | 194/1692 [00:00<00:00, 1922.17it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 987.42it/s]


-----------------------------------------------------------------------------------------
| end of epoch  10 | time 89.51s | valid loss  7.19 | valid ppl  1322.50
-----------------------------------------------------------------------------------------
| epoch 11 | step 10000/16309 | loss 9.0433 | lr 0.00100 | ngrams/sec 14288.9 | eta 0h0m56s


 11%|█▏        | 194/1692 [00:00<00:00, 1921.18it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 993.50it/s]


-----------------------------------------------------------------------------------------
| end of epoch  11 | time 89.47s | valid loss  7.18 | valid ppl  1310.63
-----------------------------------------------------------------------------------------
| epoch 12 | step 10000/16309 | loss 9.0255 | lr 0.00100 | ngrams/sec 14275.0 | eta 0h0m56s


 11%|█▏        | 193/1692 [00:00<00:00, 1911.64it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 998.63it/s]


-----------------------------------------------------------------------------------------
| end of epoch  12 | time 89.49s | valid loss  7.18 | valid ppl  1314.00
-----------------------------------------------------------------------------------------
| epoch 13 | step 10000/16309 | loss 9.0149 | lr 0.00100 | ngrams/sec 14308.7 | eta 0h0m56s


 11%|█▏        | 193/1692 [00:00<00:00, 1918.32it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 989.52it/s]


-----------------------------------------------------------------------------------------
| end of epoch  13 | time 89.41s | valid loss  7.19 | valid ppl  1326.78
-----------------------------------------------------------------------------------------
| epoch 14 | step 10000/16309 | loss 9.0027 | lr 0.00100 | ngrams/sec 14301.4 | eta 0h0m56s


 11%|█▏        | 193/1692 [00:00<00:00, 1929.69it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 996.01it/s]


-----------------------------------------------------------------------------------------
| end of epoch  14 | time 89.34s | valid loss  7.13 | valid ppl  1254.31
-----------------------------------------------------------------------------------------
| saving current state of model ...
| epoch 15 | step 10000/16309 | loss 9.0011 | lr 0.00100 | ngrams/sec 14312.1 | eta 0h0m56s


 11%|█▏        | 194/1692 [00:00<00:00, 1917.58it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 1003.20it/s]


-----------------------------------------------------------------------------------------
| end of epoch  15 | time 89.21s | valid loss  7.13 | valid ppl  1254.26
-----------------------------------------------------------------------------------------
| saving current state of model ...
| epoch 16 | step 10000/16309 | loss 8.9920 | lr 0.00100 | ngrams/sec 14319.4 | eta 0h0m56s


 11%|█▏        | 192/1692 [00:00<00:00, 1903.04it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 993.16it/s]


-----------------------------------------------------------------------------------------
| end of epoch  16 | time 89.30s | valid loss  7.11 | valid ppl  1220.46
-----------------------------------------------------------------------------------------
| saving current state of model ...
| epoch 17 | step 10000/16309 | loss 8.9915 | lr 0.00100 | ngrams/sec 14296.1 | eta 0h0m56s


 12%|█▏        | 195/1692 [00:00<00:00, 1940.82it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 989.39it/s]


-----------------------------------------------------------------------------------------
| end of epoch  17 | time 89.44s | valid loss  7.16 | valid ppl  1287.88
-----------------------------------------------------------------------------------------
| epoch 18 | step 10000/16309 | loss 8.9896 | lr 0.00100 | ngrams/sec 14286.7 | eta 0h0m56s


 11%|█▏        | 193/1692 [00:00<00:00, 1908.84it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 993.27it/s]


-----------------------------------------------------------------------------------------
| end of epoch  18 | time 89.38s | valid loss  7.16 | valid ppl  1288.31
-----------------------------------------------------------------------------------------
| epoch 19 | step 10000/16309 | loss 8.9843 | lr 0.00100 | ngrams/sec 14336.1 | eta 0h0m56s


 11%|█▏        | 193/1692 [00:00<00:00, 1917.18it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 997.77it/s]


-----------------------------------------------------------------------------------------
| end of epoch  19 | time 89.28s | valid loss  7.16 | valid ppl  1287.87
-----------------------------------------------------------------------------------------
| epoch 20 | step 10000/16309 | loss 8.9887 | lr 0.00100 | ngrams/sec 14322.7 | eta 0h0m56s


 11%|█▏        | 194/1692 [00:00<00:00, 1924.03it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 1001.62it/s]


-----------------------------------------------------------------------------------------
| end of epoch  20 | time 89.14s | valid loss  7.15 | valid ppl  1275.72
-----------------------------------------------------------------------------------------
| epoch 21 | step 10000/16309 | loss 8.9812 | lr 0.00100 | ngrams/sec 14344.1 | eta 0h0m56s


 11%|█▏        | 193/1692 [00:00<00:00, 1926.52it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 993.94it/s]


-----------------------------------------------------------------------------------------
| end of epoch  21 | time 89.15s | valid loss  7.13 | valid ppl  1248.17
-----------------------------------------------------------------------------------------
| epoch 22 | step 10000/16309 | loss 8.9865 | lr 0.00100 | ngrams/sec 14340.8 | eta 0h0m56s


 12%|█▏        | 195/1692 [00:00<00:00, 1948.30it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 992.19it/s]


-----------------------------------------------------------------------------------------
| end of epoch  22 | time 89.16s | valid loss  7.11 | valid ppl  1219.14
-----------------------------------------------------------------------------------------
| saving current state of model ...
| epoch 23 | step 10000/16309 | loss 8.9837 | lr 0.00100 | ngrams/sec 14327.1 | eta 0h0m56s


 11%|█▏        | 194/1692 [00:00<00:00, 1924.70it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 996.55it/s]


-----------------------------------------------------------------------------------------
| end of epoch  23 | time 89.16s | valid loss  7.14 | valid ppl  1258.62
-----------------------------------------------------------------------------------------
| epoch 24 | step 10000/16309 | loss 8.9874 | lr 0.00100 | ngrams/sec 14339.1 | eta 0h0m56s


 11%|█▏        | 193/1692 [00:00<00:00, 1929.56it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 998.41it/s]


-----------------------------------------------------------------------------------------
| end of epoch  24 | time 89.32s | valid loss  7.13 | valid ppl  1253.94
-----------------------------------------------------------------------------------------
| epoch 25 | step 10000/16309 | loss 8.9900 | lr 0.00100 | ngrams/sec 14320.6 | eta 0h0m56s


 12%|█▏        | 196/1692 [00:00<00:00, 1948.54it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 997.87it/s]


-----------------------------------------------------------------------------------------
| end of epoch  25 | time 89.15s | valid loss  7.15 | valid ppl  1270.78
-----------------------------------------------------------------------------------------
| epoch 26 | step 10000/16309 | loss 8.9875 | lr 0.00100 | ngrams/sec 14347.7 | eta 0h0m56s


 12%|█▏        | 195/1692 [00:00<00:00, 1940.43it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 1000.16it/s]


-----------------------------------------------------------------------------------------
| end of epoch  26 | time 89.12s | valid loss  7.16 | valid ppl  1280.54
-----------------------------------------------------------------------------------------
| epoch 27 | step 10000/16309 | loss 8.9853 | lr 0.00100 | ngrams/sec 14344.4 | eta 0h0m56s


 11%|█▏        | 194/1692 [00:00<00:00, 1938.69it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 1001.25it/s]


-----------------------------------------------------------------------------------------
| end of epoch  27 | time 89.17s | valid loss  7.12 | valid ppl  1235.26
-----------------------------------------------------------------------------------------
| epoch 28 | step 10000/16309 | loss 8.9868 | lr 0.00100 | ngrams/sec 14352.9 | eta 0h0m56s


 12%|█▏        | 195/1692 [00:00<00:00, 1927.92it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 995.36it/s]


-----------------------------------------------------------------------------------------
| end of epoch  28 | time 89.17s | valid loss  7.13 | valid ppl  1245.04
-----------------------------------------------------------------------------------------
| epoch 29 | step 10000/16309 | loss 8.9902 | lr 0.00100 | ngrams/sec 14317.0 | eta 0h0m56s


 11%|█▏        | 193/1692 [00:00<00:00, 1925.25it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 999.21it/s]


-----------------------------------------------------------------------------------------
| end of epoch  29 | time 89.21s | valid loss  7.10 | valid ppl  1208.90
-----------------------------------------------------------------------------------------
| saving current state of model ...
| epoch 30 | step 10000/16309 | loss 8.9820 | lr 0.00100 | ngrams/sec 14313.7 | eta 0h0m56s


 12%|█▏        | 195/1692 [00:00<00:00, 1943.96it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 993.31it/s]


-----------------------------------------------------------------------------------------
| end of epoch  30 | time 89.28s | valid loss  7.10 | valid ppl  1208.84
-----------------------------------------------------------------------------------------
| saving current state of model ...
| epoch 31 | step 10000/16309 | loss 8.9791 | lr 0.00100 | ngrams/sec 14305.9 | eta 0h0m56s


 12%|█▏        | 195/1692 [00:00<00:00, 1936.32it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 991.91it/s]


-----------------------------------------------------------------------------------------
| end of epoch  31 | time 89.36s | valid loss  7.06 | valid ppl  1164.74
-----------------------------------------------------------------------------------------
| saving current state of model ...
| epoch 32 | step 10000/16309 | loss 8.9764 | lr 0.00100 | ngrams/sec 14286.6 | eta 0h0m56s


 11%|█▏        | 194/1692 [00:00<00:00, 1927.04it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 996.33it/s]


-----------------------------------------------------------------------------------------
| end of epoch  32 | time 89.51s | valid loss  7.12 | valid ppl  1231.04
-----------------------------------------------------------------------------------------
| epoch 33 | step 10000/16309 | loss 8.9696 | lr 0.00100 | ngrams/sec 14300.2 | eta 0h0m56s


 11%|█▏        | 194/1692 [00:00<00:00, 1935.49it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 1001.14it/s]


-----------------------------------------------------------------------------------------
| end of epoch  33 | time 89.24s | valid loss  7.07 | valid ppl  1173.53
-----------------------------------------------------------------------------------------
| epoch 34 | step 10000/16309 | loss 8.9735 | lr 0.00100 | ngrams/sec 14346.6 | eta 0h0m56s


 11%|█▏        | 194/1692 [00:00<00:00, 1935.84it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 1002.50it/s]


-----------------------------------------------------------------------------------------
| end of epoch  34 | time 89.01s | valid loss  7.13 | valid ppl  1251.78
-----------------------------------------------------------------------------------------
| epoch 35 | step 10000/16309 | loss 8.9709 | lr 0.00100 | ngrams/sec 14381.3 | eta 0h0m56s


 12%|█▏        | 195/1692 [00:00<00:00, 1932.53it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 994.16it/s]


-----------------------------------------------------------------------------------------
| end of epoch  35 | time 88.94s | valid loss  7.10 | valid ppl  1213.05
-----------------------------------------------------------------------------------------
| epoch 36 | step 10000/16309 | loss 8.9688 | lr 0.00100 | ngrams/sec 14376.5 | eta 0h0m56s


 11%|█▏        | 193/1692 [00:00<00:00, 1928.08it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 998.47it/s]


-----------------------------------------------------------------------------------------
| end of epoch  36 | time 88.84s | valid loss  7.09 | valid ppl  1199.15
-----------------------------------------------------------------------------------------
| epoch 37 | step 10000/16309 | loss 8.9632 | lr 0.00100 | ngrams/sec 14416.1 | eta 0h0m56s


 11%|█▏        | 194/1692 [00:00<00:00, 1923.67it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 999.12it/s]


-----------------------------------------------------------------------------------------
| end of epoch  37 | time 88.69s | valid loss  7.11 | valid ppl  1227.11
-----------------------------------------------------------------------------------------
| epoch 38 | step 10000/16309 | loss 8.9654 | lr 0.00100 | ngrams/sec 14375.9 | eta 0h0m56s


 11%|█▏        | 193/1692 [00:00<00:00, 1929.66it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 996.04it/s]


-----------------------------------------------------------------------------------------
| end of epoch  38 | time 89.11s | valid loss  7.07 | valid ppl  1170.69
-----------------------------------------------------------------------------------------
| epoch 39 | step 10000/16309 | loss 8.9588 | lr 0.00100 | ngrams/sec 14366.2 | eta 0h0m56s


 12%|█▏        | 195/1692 [00:00<00:00, 1936.69it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 996.34it/s]


-----------------------------------------------------------------------------------------
| end of epoch  39 | time 88.92s | valid loss  7.11 | valid ppl  1219.21
-----------------------------------------------------------------------------------------
| epoch 40 | step 10000/16309 | loss 8.9615 | lr 0.00100 | ngrams/sec 14375.1 | eta 0h0m56s


 12%|█▏        | 195/1692 [00:00<00:00, 1928.82it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 1001.37it/s]


-----------------------------------------------------------------------------------------
| end of epoch  40 | time 88.89s | valid loss  7.10 | valid ppl  1207.63
-----------------------------------------------------------------------------------------
| epoch 41 | step 10000/16309 | loss 8.9720 | lr 0.00100 | ngrams/sec 14390.1 | eta 0h0m56s


 12%|█▏        | 195/1692 [00:00<00:00, 1949.70it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 1004.56it/s]


-----------------------------------------------------------------------------------------
| end of epoch  41 | time 88.85s | valid loss  7.11 | valid ppl  1225.50
-----------------------------------------------------------------------------------------
| epoch 42 | step 10000/16309 | loss 8.9595 | lr 0.00100 | ngrams/sec 14393.0 | eta 0h0m56s


 11%|█▏        | 193/1692 [00:00<00:00, 1921.72it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 992.81it/s]


-----------------------------------------------------------------------------------------
| end of epoch  42 | time 89.11s | valid loss  7.09 | valid ppl  1195.15
-----------------------------------------------------------------------------------------
| epoch 43 | step 10000/16309 | loss 8.9567 | lr 0.00100 | ngrams/sec 14334.0 | eta 0h0m56s


 12%|█▏        | 195/1692 [00:00<00:00, 1948.53it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 997.16it/s]


-----------------------------------------------------------------------------------------
| end of epoch  43 | time 89.01s | valid loss  7.09 | valid ppl  1196.59
-----------------------------------------------------------------------------------------
| epoch 44 | step 10000/16309 | loss 8.9524 | lr 0.00100 | ngrams/sec 14365.0 | eta 0h0m56s


 11%|█▏        | 194/1692 [00:00<00:00, 1930.76it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 1001.66it/s]


-----------------------------------------------------------------------------------------
| end of epoch  44 | time 88.96s | valid loss  7.05 | valid ppl  1156.45
-----------------------------------------------------------------------------------------
| saving current state of model ...
| epoch 45 | step 10000/16309 | loss 8.9554 | lr 0.00100 | ngrams/sec 14364.2 | eta 0h0m56s


 11%|█▏        | 193/1692 [00:00<00:00, 1909.11it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 994.39it/s]


-----------------------------------------------------------------------------------------
| end of epoch  45 | time 89.04s | valid loss  7.07 | valid ppl  1176.41
-----------------------------------------------------------------------------------------
| epoch 46 | step 10000/16309 | loss 8.9598 | lr 0.00100 | ngrams/sec 14345.2 | eta 0h0m56s


 11%|█▏        | 194/1692 [00:00<00:00, 1925.30it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 996.33it/s]


-----------------------------------------------------------------------------------------
| end of epoch  46 | time 89.06s | valid loss  7.06 | valid ppl  1165.34
-----------------------------------------------------------------------------------------
| epoch 47 | step 10000/16309 | loss 8.9502 | lr 0.00100 | ngrams/sec 14383.7 | eta 0h0m56s


 11%|█▏        | 194/1692 [00:00<00:00, 1921.97it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 1002.63it/s]


-----------------------------------------------------------------------------------------
| end of epoch  47 | time 88.84s | valid loss  7.12 | valid ppl  1235.59
-----------------------------------------------------------------------------------------
| epoch 48 | step 10000/16309 | loss 8.9478 | lr 0.00100 | ngrams/sec 14379.6 | eta 0h0m56s


 12%|█▏        | 195/1692 [00:00<00:00, 1933.48it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 1002.61it/s]


-----------------------------------------------------------------------------------------
| end of epoch  48 | time 88.94s | valid loss  7.09 | valid ppl  1194.00
-----------------------------------------------------------------------------------------
| epoch 49 | step 10000/16309 | loss 8.9429 | lr 0.00100 | ngrams/sec 14388.2 | eta 0h0m56s


 11%|█▏        | 193/1692 [00:00<00:00, 1923.94it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 997.96it/s]


-----------------------------------------------------------------------------------------
| end of epoch  49 | time 88.87s | valid loss  7.10 | valid ppl  1217.87
-----------------------------------------------------------------------------------------
| epoch 50 | step 10000/16309 | loss 8.9498 | lr 0.00100 | ngrams/sec 14372.4 | eta 0h0m56s


 11%|█▏        | 194/1692 [00:00<00:00, 1930.12it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 995.60it/s]


-----------------------------------------------------------------------------------------
| end of epoch  50 | time 89.00s | valid loss  7.10 | valid ppl  1207.96
-----------------------------------------------------------------------------------------
| epoch 51 | step 10000/16309 | loss 8.9397 | lr 0.00100 | ngrams/sec 14373.6 | eta 0h0m56s


 11%|█▏        | 193/1692 [00:00<00:00, 1927.75it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 992.87it/s]


-----------------------------------------------------------------------------------------
| end of epoch  51 | time 88.96s | valid loss  7.07 | valid ppl  1174.16
-----------------------------------------------------------------------------------------
| epoch 52 | step 10000/16309 | loss 8.9381 | lr 0.00100 | ngrams/sec 14366.7 | eta 0h0m56s


 11%|█▏        | 194/1692 [00:00<00:00, 1918.56it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 992.68it/s]


-----------------------------------------------------------------------------------------
| end of epoch  52 | time 88.95s | valid loss  7.07 | valid ppl  1171.46
-----------------------------------------------------------------------------------------
| epoch 53 | step 10000/16309 | loss 8.9437 | lr 0.00100 | ngrams/sec 14387.1 | eta 0h0m56s


 11%|█▏        | 193/1692 [00:00<00:00, 1924.23it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 992.75it/s]


-----------------------------------------------------------------------------------------
| end of epoch  53 | time 88.82s | valid loss  7.08 | valid ppl  1191.30
-----------------------------------------------------------------------------------------
| epoch 54 | step 10000/16309 | loss 8.9404 | lr 0.00100 | ngrams/sec 14404.6 | eta 0h0m56s


 11%|█▏        | 194/1692 [00:00<00:00, 1925.89it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 995.96it/s]


-----------------------------------------------------------------------------------------
| end of epoch  54 | time 88.70s | valid loss  7.07 | valid ppl  1173.53
-----------------------------------------------------------------------------------------
| epoch 55 | step 10000/16309 | loss 8.9330 | lr 0.00100 | ngrams/sec 14391.8 | eta 0h0m56s


 11%|█▏        | 194/1692 [00:00<00:00, 1928.16it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 997.44it/s]


-----------------------------------------------------------------------------------------
| end of epoch  55 | time 88.84s | valid loss  7.09 | valid ppl  1196.89
-----------------------------------------------------------------------------------------
| epoch 56 | step 10000/16309 | loss 8.9383 | lr 0.00100 | ngrams/sec 14438.8 | eta 0h0m55s


 12%|█▏        | 195/1692 [00:00<00:00, 1936.63it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 999.32it/s]


-----------------------------------------------------------------------------------------
| end of epoch  56 | time 88.55s | valid loss  7.08 | valid ppl  1183.14
-----------------------------------------------------------------------------------------
| epoch 57 | step 10000/16309 | loss 8.9309 | lr 0.00100 | ngrams/sec 14428.7 | eta 0h0m55s


 11%|█▏        | 193/1692 [00:00<00:00, 1927.21it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 996.26it/s]


-----------------------------------------------------------------------------------------
| end of epoch  57 | time 88.66s | valid loss  7.09 | valid ppl  1193.97
-----------------------------------------------------------------------------------------
| epoch 58 | step 10000/16309 | loss 8.9333 | lr 0.00100 | ngrams/sec 14433.0 | eta 0h0m55s


 11%|█▏        | 193/1692 [00:00<00:00, 1927.58it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 995.18it/s]


-----------------------------------------------------------------------------------------
| end of epoch  58 | time 88.52s | valid loss  7.04 | valid ppl  1143.85
-----------------------------------------------------------------------------------------
| saving current state of model ...
| epoch 59 | step 10000/16309 | loss 8.9319 | lr 0.00100 | ngrams/sec 14425.5 | eta 0h0m55s


 11%|█▏        | 193/1692 [00:00<00:00, 1920.80it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 994.91it/s]


-----------------------------------------------------------------------------------------
| end of epoch  59 | time 88.61s | valid loss  7.11 | valid ppl  1225.41
-----------------------------------------------------------------------------------------
| epoch 60 | step 10000/16309 | loss 8.9261 | lr 0.00100 | ngrams/sec 14418.9 | eta 0h0m56s


 11%|█▏        | 194/1692 [00:00<00:00, 1918.33it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 995.70it/s]


-----------------------------------------------------------------------------------------
| end of epoch  60 | time 88.69s | valid loss  7.10 | valid ppl  1208.66
-----------------------------------------------------------------------------------------
| epoch 61 | step 10000/16309 | loss 8.9283 | lr 0.00100 | ngrams/sec 14409.3 | eta 0h0m56s


 11%|█▏        | 193/1692 [00:00<00:00, 1919.81it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 1001.88it/s]


-----------------------------------------------------------------------------------------
| end of epoch  61 | time 88.67s | valid loss  7.11 | valid ppl  1223.89
-----------------------------------------------------------------------------------------
| epoch 62 | step 10000/16309 | loss 8.9301 | lr 0.00100 | ngrams/sec 14424.8 | eta 0h0m55s


 11%|█▏        | 194/1692 [00:00<00:00, 1921.93it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 993.28it/s]


-----------------------------------------------------------------------------------------
| end of epoch  62 | time 88.73s | valid loss  7.11 | valid ppl  1224.07
-----------------------------------------------------------------------------------------
| epoch 63 | step 10000/16309 | loss 8.9227 | lr 0.00100 | ngrams/sec 14406.3 | eta 0h0m56s


 11%|█▏        | 194/1692 [00:00<00:00, 1918.77it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 995.60it/s]


-----------------------------------------------------------------------------------------
| end of epoch  63 | time 88.71s | valid loss  7.07 | valid ppl  1180.23
-----------------------------------------------------------------------------------------
| epoch 64 | step 10000/16309 | loss 8.9218 | lr 0.00100 | ngrams/sec 14428.0 | eta 0h0m55s


 11%|█▏        | 193/1692 [00:00<00:00, 1909.56it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 1000.18it/s]


-----------------------------------------------------------------------------------------
| end of epoch  64 | time 88.58s | valid loss  7.09 | valid ppl  1204.10
-----------------------------------------------------------------------------------------
| epoch 65 | step 10000/16309 | loss 8.9235 | lr 0.00100 | ngrams/sec 14444.4 | eta 0h0m55s


 12%|█▏        | 195/1692 [00:00<00:00, 1929.48it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 1004.64it/s]


-----------------------------------------------------------------------------------------
| end of epoch  65 | time 88.57s | valid loss  7.06 | valid ppl  1168.22
-----------------------------------------------------------------------------------------
| epoch 66 | step 10000/16309 | loss 8.9215 | lr 0.00100 | ngrams/sec 14386.0 | eta 0h0m56s


 12%|█▏        | 196/1692 [00:00<00:00, 1940.33it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 999.56it/s]


-----------------------------------------------------------------------------------------
| end of epoch  66 | time 89.01s | valid loss  7.09 | valid ppl  1199.61
-----------------------------------------------------------------------------------------
| epoch 67 | step 10000/16309 | loss 8.9215 | lr 0.00100 | ngrams/sec 14364.6 | eta 0h0m56s


 12%|█▏        | 195/1692 [00:00<00:00, 1933.90it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 1001.16it/s]


-----------------------------------------------------------------------------------------
| end of epoch  67 | time 88.91s | valid loss  7.10 | valid ppl  1214.08
-----------------------------------------------------------------------------------------
| epoch 68 | step 10000/16309 | loss 8.9179 | lr 0.00100 | ngrams/sec 14410.7 | eta 0h0m56s


 11%|█▏        | 194/1692 [00:00<00:00, 1918.78it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 1002.00it/s]


-----------------------------------------------------------------------------------------
| end of epoch  68 | time 88.64s | valid loss  7.08 | valid ppl  1192.28
-----------------------------------------------------------------------------------------
| epoch 69 | step 10000/16309 | loss 8.9190 | lr 0.00100 | ngrams/sec 14407.7 | eta 0h0m56s


 11%|█▏        | 192/1692 [00:00<00:00, 1916.85it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 999.31it/s]


-----------------------------------------------------------------------------------------
| end of epoch  69 | time 88.78s | valid loss  7.09 | valid ppl  1194.00
-----------------------------------------------------------------------------------------
| epoch 70 | step 10000/16309 | loss 8.9172 | lr 0.00100 | ngrams/sec 14410.4 | eta 0h0m56s


 11%|█▏        | 194/1692 [00:00<00:00, 1928.97it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 1003.13it/s]


-----------------------------------------------------------------------------------------
| end of epoch  70 | time 88.74s | valid loss  7.07 | valid ppl  1174.50
-----------------------------------------------------------------------------------------
| epoch 71 | step 10000/16309 | loss 8.9113 | lr 0.00100 | ngrams/sec 14364.3 | eta 0h0m56s


 11%|█▏        | 193/1692 [00:00<00:00, 1921.54it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 998.09it/s]


-----------------------------------------------------------------------------------------
| end of epoch  71 | time 89.04s | valid loss  7.10 | valid ppl  1208.45
-----------------------------------------------------------------------------------------
| epoch 72 | step 10000/16309 | loss 8.9217 | lr 0.00100 | ngrams/sec 14390.6 | eta 0h0m56s


 12%|█▏        | 196/1692 [00:00<00:00, 1947.48it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 996.50it/s]


-----------------------------------------------------------------------------------------
| end of epoch  72 | time 88.92s | valid loss  7.07 | valid ppl  1170.36
-----------------------------------------------------------------------------------------
| epoch 73 | step 10000/16309 | loss 8.9138 | lr 0.00100 | ngrams/sec 14379.1 | eta 0h0m56s


 11%|█▏        | 192/1692 [00:00<00:00, 1913.75it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 998.52it/s]


-----------------------------------------------------------------------------------------
| end of epoch  73 | time 88.89s | valid loss  7.06 | valid ppl  1160.16
-----------------------------------------------------------------------------------------
| epoch 74 | step 10000/16309 | loss 8.9125 | lr 0.00100 | ngrams/sec 14385.6 | eta 0h0m56s


 11%|█▏        | 194/1692 [00:00<00:00, 1932.27it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 999.55it/s]


-----------------------------------------------------------------------------------------
| end of epoch  74 | time 88.88s | valid loss  7.04 | valid ppl  1144.25
-----------------------------------------------------------------------------------------
| epoch 75 | step 10000/16309 | loss 8.9103 | lr 0.00100 | ngrams/sec 14394.9 | eta 0h0m56s


 12%|█▏        | 195/1692 [00:00<00:00, 1940.98it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 1000.31it/s]


-----------------------------------------------------------------------------------------
| end of epoch  75 | time 88.81s | valid loss  7.07 | valid ppl  1175.97
-----------------------------------------------------------------------------------------
| epoch 76 | step 10000/16309 | loss 8.9163 | lr 0.00100 | ngrams/sec 14374.3 | eta 0h0m56s


 12%|█▏        | 195/1692 [00:00<00:00, 1949.35it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 1001.89it/s]


-----------------------------------------------------------------------------------------
| end of epoch  76 | time 89.04s | valid loss  7.08 | valid ppl  1189.14
-----------------------------------------------------------------------------------------
| epoch 77 | step 10000/16309 | loss 8.9079 | lr 0.00100 | ngrams/sec 14377.4 | eta 0h0m56s


 11%|█▏        | 192/1692 [00:00<00:00, 1898.45it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 996.57it/s]


-----------------------------------------------------------------------------------------
| end of epoch  77 | time 88.86s | valid loss  7.05 | valid ppl  1148.39
-----------------------------------------------------------------------------------------
| epoch 78 | step 10000/16309 | loss 8.9096 | lr 0.00100 | ngrams/sec 14384.0 | eta 0h0m56s


 11%|█▏        | 194/1692 [00:00<00:00, 1924.71it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 1000.54it/s]


-----------------------------------------------------------------------------------------
| end of epoch  78 | time 88.89s | valid loss  7.06 | valid ppl  1165.93
-----------------------------------------------------------------------------------------
| epoch 79 | step 10000/16309 | loss 8.9108 | lr 0.00100 | ngrams/sec 14386.0 | eta 0h0m56s


 11%|█▏        | 194/1692 [00:00<00:00, 1923.57it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 1000.07it/s]


-----------------------------------------------------------------------------------------
| end of epoch  79 | time 88.89s | valid loss  7.09 | valid ppl  1198.80
-----------------------------------------------------------------------------------------
| epoch 80 | step 10000/16309 | loss 8.9095 | lr 0.00100 | ngrams/sec 14382.4 | eta 0h0m56s


 11%|█▏        | 194/1692 [00:00<00:00, 1924.93it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 1000.35it/s]


-----------------------------------------------------------------------------------------
| end of epoch  80 | time 88.91s | valid loss  7.05 | valid ppl  1154.95
-----------------------------------------------------------------------------------------
| epoch 81 | step 10000/16309 | loss 8.9084 | lr 0.00100 | ngrams/sec 14376.6 | eta 0h0m56s


 11%|█▏        | 194/1692 [00:00<00:00, 1920.30it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 996.24it/s]


-----------------------------------------------------------------------------------------
| end of epoch  81 | time 88.95s | valid loss  7.04 | valid ppl  1138.80
-----------------------------------------------------------------------------------------
| saving current state of model ...
| epoch 82 | step 10000/16309 | loss 8.9077 | lr 0.00100 | ngrams/sec 14350.9 | eta 0h0m56s


 11%|█▏        | 194/1692 [00:00<00:00, 1918.75it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 1000.09it/s]


-----------------------------------------------------------------------------------------
| end of epoch  82 | time 89.01s | valid loss  7.06 | valid ppl  1163.77
-----------------------------------------------------------------------------------------
| epoch 83 | step 10000/16309 | loss 8.9017 | lr 0.00100 | ngrams/sec 14367.7 | eta 0h0m56s


 11%|█▏        | 194/1692 [00:00<00:00, 1918.64it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 998.10it/s]


-----------------------------------------------------------------------------------------
| end of epoch  83 | time 88.96s | valid loss  7.09 | valid ppl  1195.57
-----------------------------------------------------------------------------------------
| epoch 84 | step 10000/16309 | loss 8.8967 | lr 0.00100 | ngrams/sec 14382.4 | eta 0h0m56s


 12%|█▏        | 195/1692 [00:00<00:00, 1932.85it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 1000.43it/s]


-----------------------------------------------------------------------------------------
| end of epoch  84 | time 88.91s | valid loss  7.06 | valid ppl  1161.32
-----------------------------------------------------------------------------------------
| epoch 85 | step 10000/16309 | loss 8.8987 | lr 0.00100 | ngrams/sec 14386.2 | eta 0h0m56s


 12%|█▏        | 195/1692 [00:00<00:00, 1932.92it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 997.05it/s]


-----------------------------------------------------------------------------------------
| end of epoch  85 | time 88.75s | valid loss  7.03 | valid ppl  1134.14
-----------------------------------------------------------------------------------------
| saving current state of model ...
| epoch 86 | step 10000/16309 | loss 8.8995 | lr 0.00100 | ngrams/sec 14370.0 | eta 0h0m56s


 11%|█▏        | 194/1692 [00:00<00:00, 1923.23it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 1000.55it/s]


-----------------------------------------------------------------------------------------
| end of epoch  86 | time 89.06s | valid loss  7.05 | valid ppl  1157.16
-----------------------------------------------------------------------------------------
| epoch 87 | step 10000/16309 | loss 8.9023 | lr 0.00100 | ngrams/sec 14373.2 | eta 0h0m56s


 11%|█▏        | 194/1692 [00:00<00:00, 1932.94it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 1002.97it/s]


-----------------------------------------------------------------------------------------
| end of epoch  87 | time 88.92s | valid loss  7.03 | valid ppl  1124.92
-----------------------------------------------------------------------------------------
| saving current state of model ...
| epoch 88 | step 10000/16309 | loss 8.8971 | lr 0.00100 | ngrams/sec 14362.2 | eta 0h0m56s


 12%|█▏        | 195/1692 [00:00<00:00, 1929.10it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 998.86it/s]


-----------------------------------------------------------------------------------------
| end of epoch  88 | time 88.95s | valid loss  7.03 | valid ppl  1128.15
-----------------------------------------------------------------------------------------
| epoch 89 | step 10000/16309 | loss 8.8957 | lr 0.00100 | ngrams/sec 14384.1 | eta 0h0m56s


 11%|█▏        | 194/1692 [00:00<00:00, 1926.37it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 993.85it/s]


-----------------------------------------------------------------------------------------
| end of epoch  89 | time 88.86s | valid loss  7.04 | valid ppl  1141.62
-----------------------------------------------------------------------------------------
| epoch 90 | step 10000/16309 | loss 8.8928 | lr 0.00100 | ngrams/sec 14399.8 | eta 0h0m56s


 11%|█▏        | 191/1692 [00:00<00:00, 1907.79it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 998.26it/s]


-----------------------------------------------------------------------------------------
| end of epoch  90 | time 88.78s | valid loss  7.05 | valid ppl  1155.40
-----------------------------------------------------------------------------------------
| epoch 91 | step 10000/16309 | loss 8.8906 | lr 0.00100 | ngrams/sec 14406.2 | eta 0h0m56s


 11%|█▏        | 193/1692 [00:00<00:00, 1913.65it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 1001.20it/s]


-----------------------------------------------------------------------------------------
| end of epoch  91 | time 89.08s | valid loss  7.06 | valid ppl  1159.53
-----------------------------------------------------------------------------------------
| epoch 92 | step 10000/16309 | loss 8.8874 | lr 0.00100 | ngrams/sec 14358.1 | eta 0h0m56s


 12%|█▏        | 195/1692 [00:00<00:00, 1930.24it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 1001.93it/s]


-----------------------------------------------------------------------------------------
| end of epoch  92 | time 88.71s | valid loss  7.02 | valid ppl  1117.68
-----------------------------------------------------------------------------------------
| saving current state of model ...
| epoch 93 | step 10000/16309 | loss 8.8893 | lr 0.00100 | ngrams/sec 14400.1 | eta 0h0m56s


 12%|█▏        | 195/1692 [00:00<00:00, 1936.17it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 994.55it/s]


-----------------------------------------------------------------------------------------
| end of epoch  93 | time 88.78s | valid loss  7.04 | valid ppl  1141.19
-----------------------------------------------------------------------------------------
| epoch 94 | step 10000/16309 | loss 8.8894 | lr 0.00100 | ngrams/sec 14393.0 | eta 0h0m56s


 11%|█▏        | 194/1692 [00:00<00:00, 1918.26it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 991.34it/s]


-----------------------------------------------------------------------------------------
| end of epoch  94 | time 88.81s | valid loss  7.00 | valid ppl  1095.50
-----------------------------------------------------------------------------------------
| saving current state of model ...
| epoch 95 | step 10000/16309 | loss 8.8905 | lr 0.00100 | ngrams/sec 14384.4 | eta 0h0m56s


 11%|█▏        | 192/1692 [00:00<00:00, 1916.86it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 1002.91it/s]


-----------------------------------------------------------------------------------------
| end of epoch  95 | time 88.86s | valid loss  7.03 | valid ppl  1126.34
-----------------------------------------------------------------------------------------
| epoch 96 | step 10000/16309 | loss 8.8928 | lr 0.00100 | ngrams/sec 14401.2 | eta 0h0m56s


 11%|█▏        | 194/1692 [00:00<00:00, 1919.53it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 991.72it/s]


-----------------------------------------------------------------------------------------
| end of epoch  96 | time 88.79s | valid loss  7.06 | valid ppl  1166.48
-----------------------------------------------------------------------------------------
| epoch 97 | step 10000/16309 | loss 8.8880 | lr 0.00100 | ngrams/sec 14381.1 | eta 0h0m56s


 12%|█▏        | 195/1692 [00:00<00:00, 1930.57it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 1007.16it/s]


-----------------------------------------------------------------------------------------
| end of epoch  97 | time 88.83s | valid loss  7.05 | valid ppl  1151.01
-----------------------------------------------------------------------------------------
| epoch 98 | step 10000/16309 | loss 8.8954 | lr 0.00100 | ngrams/sec 14411.4 | eta 0h0m56s


 11%|█▏        | 193/1692 [00:00<00:00, 1910.71it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 1002.49it/s]


-----------------------------------------------------------------------------------------
| end of epoch  98 | time 88.60s | valid loss  7.05 | valid ppl  1148.93
-----------------------------------------------------------------------------------------
| epoch 99 | step 10000/16309 | loss 8.8877 | lr 0.00100 | ngrams/sec 14435.5 | eta 0h0m55s


 11%|█▏        | 193/1692 [00:00<00:00, 1923.86it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 996.81it/s]


-----------------------------------------------------------------------------------------
| end of epoch  99 | time 88.72s | valid loss  7.06 | valid ppl  1162.53
-----------------------------------------------------------------------------------------
| epoch 100 | step 10000/16309 | loss 8.8887 | lr 0.00100 | ngrams/sec 14402.4 | eta 0h0m56s


 12%|█▏        | 195/1692 [00:00<00:00, 1937.27it/s]

Evaluating on validation set...


100%|██████████| 1692/1692 [00:01<00:00, 999.52it/s]
 10%|█         | 194/1910 [00:00<00:00, 1927.92it/s]

-----------------------------------------------------------------------------------------
| end of epoch 100 | time 88.79s | valid loss  7.08 | valid ppl  1189.64
-----------------------------------------------------------------------------------------
Evaluating on test set...


100%|██████████| 1910/1910 [00:01<00:00, 999.43it/s]


| End of training | test loss  7.05 | test ppl  1148.52


In [None]:
from google.colab import files
files.download('checkpoint.pth')
!cp "checkpoint.pth" "gdrive/MyDrive/checkpoint-tied-128.pth"

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# Generate

In [None]:
!cp "gdrive/MyDrive/checkpoint-tied-128.pth" "checkpoint.pth" 

In [None]:
import torch 
# Model parameters.
class Args:
    data = 'gdrive/MyDrive/wikitext-2'
    checkpoint = 'checkpoint.pth'
    outf = 'generated.txt'
    #words = 1000
    seed = 42
    cuda = True
    temperature = 1.0 # temperature - higher will increase diversity
    log_interval = 10 # reporting interval
    words = 100
args = Args()

# Set the random seed manually for reproducibility.
torch.manual_seed(args.seed)
if torch.cuda.is_available():
    if not args.cuda:
        print("WARNING: You have a CUDA device, so you should probably run with --cuda")

device = torch.device("cuda" if args.cuda else "cpu")

if args.temperature < 1e-3:
    parser.error("--temperature has to be greater or equal 1e-3")

model.load_state_dict(torch.load(args.checkpoint))
print(model)
model.eval()

ntokens = n_class
input_idx = 104#torch.randint(ntokens, (1, 1), dtype=torch.long).to(device)
# input_idx = torch.autograd.Variable(torch.t(torch.randint(ntokens, (1, 7), dtype=torch.long))).to(device)
input_words = [corpus.dictionary.idx2word[i] for i in train_data[input_idx:order+input_idx, 0]]
input = torch.tensor([i for i in train_data[input_idx:order+input_idx, 0]]).to(device)
print(input)
print(input_words)

FNNModel(
  (embeddings): Embedding(28912, 200)
  (linear1): Linear(in_features=1400, out_features=200, bias=True)
  (linear2): Linear(in_features=200, out_features=28912, bias=True)
  (dropout): Dropout(p=0.5, inplace=False)
)
tensor([27, 63, 64, 65, 66, 17, 67], device='cuda:0')
['a', 'penal', 'military', 'unit', 'serving', 'the', 'nation']


In [None]:
glue = ' '
start = None
with open(args.outf, 'w') as outf:
    for i in range(args.words):
        output = model(input)
        word_weights = output.squeeze().div(args.temperature).exp().cpu()
        # if args.no_unk:
        #     word_weights[corpus.dictionary.w2i[unk]] = 0
        word_idx = torch.multinomial(word_weights, 1)[0]
        # word_idx = word_idx.data[0]
        word = corpus.dictionary.idx2word[word_idx]
        print(word)

        # ids.append(word_idx)
        # input = Variable(torch.LongTensor(ids[-model.order:]).unsqueeze(0))
        word_tensor = torch.tensor([word_idx]).to(device)
        input = torch.cat((input[1:], word_tensor), 0)
        # input.fill_(word_idx)
        input = input.cuda() if cuda else input
        # print(input)
        if word is "<sos>": # ignore start of sentence predictns
            continue
        elif word is "<eos>":
            outf.write('\n')
        else:
            outf.write(word + glue)

        if i % args.log_interval == 0:
            print('| Generated {}/{} words'.format(i, args.words))

lent
| Generated 0/100 words
wall
became
most
forests
never
<unk>
recreational
liverpool
pradesh
representation
| Generated 10/100 words
it
almost
180
assistance
between
aml
north
veronica
pitching
historical
| Generated 20/100 words
border
large
monster
llosa
.
gacko
across
1947
church
barnes
| Generated 30/100 words
cheese
believed
donated
same
release
parallel
10
houses
ramon
years
| Generated 40/100 words
raised
<unk>
continued
independence
;
numerous
included
£
without
veto
| Generated 50/100 words
specimens
,
just
opening
remix
defeated
21st
any
usually
apparent
| Generated 60/100 words
wrote
had
entrance
attacked
6
julio
decade
hero
date
as
| Generated 70/100 words
phase
they
put
tennyson
tie
<eos>
curtis
and
15
watched
| Generated 80/100 words
<unk>
llosa
named
this
television
producer
population
semi
performing
of
| Generated 90/100 words
<unk>
@,@
epic
made
ocean
was
population
gave
management


In [None]:
!cp "generated.txt" "gdrive/MyDrive/generated-tied-128.txt"

In [None]:
drive.flush_and_unmount()