In [0]:
import os
from io import open
import torch

class Dictionary(object):
    def __init__(self):
        self.word2idx = {}
        self.idx2word = []

    def add_word(self, word):
        if word not in self.word2idx:
            self.idx2word.append(word)
            self.word2idx[word] = len(self.idx2word) - 1
        return self.word2idx[word]

    def __len__(self):
        return len(self.idx2word)


class Corpus(object):
    def __init__(self, path):
        self.dictionary = Dictionary()
        self.train = self.tokenize(os.path.join(path, 'train.txt'))

    def tokenize(self, path):
        """Tokenizes a text file."""
        assert os.path.exists(path)
        # Add words to the dictionary
        with open(path, 'r', encoding="utf8") as f:
            for line in f:
                words = line.split() + ['<eos>']
                for word in words:
                    self.dictionary.add_word(word)

        # Tokenize file content
        with open(path, 'r', encoding="utf8") as f:
            idss = []
            for line in f:
                words = line.split() + ['<eos>']
                ids = []
                for word in words:
                    ids.append(self.dictionary.word2idx[word])
                idss.append(torch.tensor(ids).type(torch.int64))
            ids = torch.cat(idss)

        return ids

In [0]:
corpus = Corpus(path='data')

In [0]:
def batchify(data, bsz):
    # Work out how cleanly we can divide the dataset into bsz parts.
    nbatch = data.size(0) // bsz
    # Trim off any extra elements that wouldn't cleanly fit (remainders).
    data = data.narrow(0, 0, nbatch * bsz)
    # Evenly divide the data across the bsz batches.
    data = data.view(bsz, -1).t().contiguous()
    return data.to(device)

def get_batch(source, i):
    seq_len = min(bptt, len(source) - 1 - i)
    data = source[i:i+seq_len]
    target = source[i+1:i+1+seq_len].view(-1)
    return data, target


In [0]:
import math
import torch
import torch.nn as nn
import torch.nn.functional as F

class RNNModel(nn.Module):
    """Container module with an encoder, a recurrent module, and a decoder."""

    def __init__(self, input_dim, embed_dim, hid_dim, n_layers, dropout=0.5):
        super(RNNModel, self).__init__()
        
        self.input_dim = input_dim
        self.hid_dim = hid_dim
        self.n_layers = n_layers
        
        self.encoder = nn.Embedding(input_dim, embed_dim)
    
        self.rnn = nn.GRU(embed_dim,
                          hid_dim,
                          n_layers,
                          dropout = 0 if n_layers < 2 else dropout)
        
        self.decoder = nn.Linear(hid_dim, input_dim)

        self.drop = nn.Dropout(dropout)
        self.init_weights()


    def init_weights(self):
        initrange = 0.1
        self.encoder.weight.data.uniform_(-initrange, initrange)
        self.decoder.bias.data.zero_()
        self.decoder.weight.data.uniform_(-initrange, initrange)

    def forward(self, inputs, hidden):
        emb = self.drop(self.encoder(inputs))
        output, hidden = self.rnn(emb, hidden)
        output = self.drop(output)
        decoded = self.decoder(output)
        decoded = decoded.view(-1, self.input_dim)
        return F.log_softmax(decoded, dim=1), hidden

    def init_hidden(self, bsz):
        weight = next(self.parameters())

        return weight.new_zeros(self.n_layers, bsz, self.hid_dim)

In [0]:
vocab_size = len(corpus.dictionary)
emb_dim = 200
hid_dim = 250
n_layers = 2
dropout = 0.5
eval_batch_size = 10
batch_size = 128
bptt = 10

log_interval = 100

model = RNNModel(vocab_size, emb_dim, hid_dim, n_layers, dropout)


In [7]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f'The model has {count_parameters(model):,} trainable parameters')

The model has 21,627,468 trainable parameters


In [8]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [0]:
batch_size = 2
bptt = 10

train_loader = batchify(corpus.train, batch_size)


In [0]:
i,t = get_batch(train_loader,0)

In [29]:
i

tensor([[    0,   146],
        [    1,   209],
        [    2,    74],
        [    3,   479],
        [    4,    13],
        [    5,     1],
        [    6,   136],
        [    7,  1006],
        [    8, 27416],
        [    1,   111]], device='cuda:0')

In [30]:
t

tensor([    1,   209,     2,    74,     3,   479,     4,    13,     5,     1,
            6,   136,     7,  1006,     8, 27416,     1,   111,     2,   111],
       device='cuda:0')

In [0]:
import torch.optim as optim
lr = 0.001
criterion = nn.NLLLoss()
optimizer = optim.Adam(model.parameters(), lr)

In [0]:
model=model.to(device)
criterion=criterion.to(device)

In [0]:
def train(model, iterator, criterion):
    clip = 0.25
    total_loss = 0
    
    model.train()
    
    hidden = model.init_hidden(batch_size)
    
    
    for batch, i in enumerate(range(0, iterator.size(0) - 1, bptt)):
        data, targets = get_batch(iterator, i)

        optimizer.zero_grad()
        hidden = hidden.detach()
      
        output, hidden = model(data, hidden)    
        
        loss = criterion(output, targets)
                
        loss.backward()

        optimizer.step()
        
        total_loss += len(data)*loss.item()
        
        if batch % log_interval == 0 and batch > 0:
            cur_loss = total_loss / log_interval
            print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | '
                    'loss {:5.2f} | ppl {:8.2f}'.format(
                epoch, batch, len(iterator) // bptt, lr,
                cur_loss, math.exp(cur_loss)))
            # total_loss = 0
        return total_loss / (len(iterator) - 1)


In [0]:
def evaluate(model, iterator, criterion):
    
    total_loss = 0
    
    model.eval()
    
    hidden = model.init_hidden(eval_batch_size)
    
    with torch.no_grad():
    
        for i in range(0, iterator.size(0) - 1, bptt):
            data, targets = get_batch(iterator, i)
            output, hidden = model(data, hidden)
            hidden = hidden.detach()
            
            total_loss += len(data) * criterion(output, targets).item()

        
    return total_loss / (len(iterator) - 1)

In [0]:
import time

def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs

In [0]:
N_EPOCHS = 50

best_valid_loss = float('inf')
counter = 0
patience = 2

for epoch in range(N_EPOCHS):

    start_time = time.time()
    
    train(model, train_loader, criterion)
    # valid_loss = evaluate(model, valid_loader, criterion)
    
    end_time = time.time()

    epoch_mins, epoch_secs = epoch_time(start_time, end_time)

    print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')
    # print(f'\t Val. Loss: {valid_loss:.3f} |  Val. PPL: {math.exp(valid_loss):.2f}')

    # if valid_loss < best_valid_loss:
    #     best_valid_loss = valid_loss
    #     #torch.save(model.state_dict(), 'tut2-model.pt')
    #     counter = 0 
    # else:
    #     lr /= 4.0
    #     counter += 1
    #     if counter >= patience:
    #         break

    

| epoch   0 |   100/  558 batches | lr 0.00 | loss  7.23 | ppl  1373.41
| epoch   0 |   200/  558 batches | lr 0.00 | loss  6.15 | ppl   470.69
| epoch   0 |   300/  558 batches | lr 0.00 | loss  5.94 | ppl   378.06
| epoch   0 |   400/  558 batches | lr 0.00 | loss  5.82 | ppl   338.20
| epoch   0 |   500/  558 batches | lr 0.00 | loss  5.69 | ppl   297.06
Epoch: 01 | Epoch Time: 0m 49s
| epoch   1 |   100/  558 batches | lr 0.00 | loss  5.53 | ppl   253.35
| epoch   1 |   200/  558 batches | lr 0.00 | loss  5.44 | ppl   231.24
| epoch   1 |   300/  558 batches | lr 0.00 | loss  5.38 | ppl   217.16
| epoch   1 |   400/  558 batches | lr 0.00 | loss  5.37 | ppl   215.16
| epoch   1 |   500/  558 batches | lr 0.00 | loss  5.32 | ppl   203.41
Epoch: 02 | Epoch Time: 0m 50s
| epoch   2 |   100/  558 batches | lr 0.00 | loss  5.26 | ppl   191.89
| epoch   2 |   200/  558 batches | lr 0.00 | loss  5.19 | ppl   179.65
| epoch   2 |   300/  558 batches | lr 0.00 | loss  5.15 | ppl   171.74
| 