# Init

In [1]:
# Execute this code block to install dependencies when running on colab
try:
    import torch
except:
    from os.path import exists
    from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag
    platform = '{}{}-{}'.format(get_abbr_impl(), get_impl_ver(), get_abi_tag())
    cuda_output = !ldconfig -p|grep cudart.so|sed -e 's/.*\.\([0-9]*\)\.\([0-9]*\)$/cu\1\2/'
    accelerator = cuda_output[0] if exists('/dev/nvidia0') else 'cpu'

    !pip install -q http://download.pytorch.org/whl/{accelerator}/torch-1.0.0-{platform}-linux_x86_64.whl torchvision

try: 
    import torchbearer
except:
    !pip install torchbearer

Collecting torchbearer
[?25l  Downloading https://files.pythonhosted.org/packages/ff/e9/4049a47dd2e5b6346a2c5d215b0c67dce814afbab1cd54ce024533c4834e/torchbearer-0.5.3-py3-none-any.whl (138kB)
[K     |██▍                             | 10kB 19.0MB/s eta 0:00:01[K     |████▊                           | 20kB 2.2MB/s eta 0:00:01[K     |███████▏                        | 30kB 2.9MB/s eta 0:00:01[K     |█████████▌                      | 40kB 3.2MB/s eta 0:00:01[K     |███████████▉                    | 51kB 2.6MB/s eta 0:00:01[K     |██████████████▎                 | 61kB 2.9MB/s eta 0:00:01[K     |████████████████▋               | 71kB 3.1MB/s eta 0:00:01[K     |███████████████████             | 81kB 3.5MB/s eta 0:00:01[K     |█████████████████████▍          | 92kB 3.6MB/s eta 0:00:01[K     |███████████████████████▊        | 102kB 3.5MB/s eta 0:00:01[K     |██████████████████████████      | 112kB 3.5MB/s eta 0:00:01[K     |████████████████████████████▌   | 122kB 3.5MB/

# Prepare Penn Treebank dataset

In [0]:
# automatically reload external modules if they change
%load_ext autoreload
%autoreload 2

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchtext import data
from torchtext import vocab
from torchtext import datasets

import numpy as np
from matplotlib import pyplot as plt

from tqdm import tqdm

In [3]:
tokenize = lambda x: x.split()
TEXT = data.Field(sequential = True, tokenize = tokenize, lower = True, batch_first = True)
train_dataset, val_dataset, test_dataset = datasets.PennTreebank.splits(TEXT)
TEXT.build_vocab(train_dataset, vectors=vocab.GloVe(name='6B', dim=300))

vocab_size = len(TEXT.vocab)
word_embeddings = TEXT.vocab.vectors
print(vocab_size)
print(word_embeddings.size())
embeddings_length = 300
hidden_size = 256
batch_size = 32

downloading ptb.train.txt


ptb.train.txt: 5.10MB [00:00, 57.5MB/s]                   


downloading ptb.valid.txt


ptb.valid.txt: 400kB [00:00, 12.6MB/s]                   


downloading ptb.test.txt


ptb.test.txt: 450kB [00:00, 10.7MB/s]                   
.vector_cache/glove.6B.zip: 862MB [06:27, 2.22MB/s]                          
100%|█████████▉| 399601/400000 [00:54<00:00, 7447.95it/s]

10001
torch.Size([10001, 300])


In [0]:
train_iter, val_iter, test_iter = data.BPTTIterator.splits((train_dataset, val_dataset, test_dataset), batch_size = 32, bptt_len=30, repeat=False)

In [0]:
class LstmLangModel(nn.Module):
   def __init__(self, batch_size, hidden_size, vocab_size, embeddings_length, weights):
       super(LstmLangModel, self).__init__()
       self.batch_size = batch_size
       self.hidden_size = hidden_size
       self.vocab_size = vocab_size
       self.embed = nn.Embedding(vocab_size, embeddings_length)
       self.embed.weight.data.copy_(weights)
       self.lstm = nn.LSTM(embeddings_length, hidden_size, batch_first=True)
       self.fc = nn.Linear(hidden_size, vocab_size)
   def forward(self, x, h):
       x = self.embed(x)
       output_seq, (h, c) = self.lstm(x, h)
       out = output_seq.reshape(output_seq.size(0)*output_seq.size(1), output_seq.size(2))
       out = self.fc(out)
       return out, (h, c)



In [6]:
vocab_size = len(TEXT.vocab)
word_embeddings = TEXT.vocab.vectors
print(vocab_size)
print(word_embeddings.size())
embeddings_length = 300
hidden_size = 256
batch_size = 32

10001
torch.Size([10001, 300])


In [7]:
model = LstmLangModel(batch_size, hidden_size, vocab_size, embeddings_length, word_embeddings)
model.eval()

LstmLangModel(
  (embed): Embedding(10001, 300)
  (lstm): LSTM(300, 256, batch_first=True)
  (fc): Linear(in_features=256, out_features=10001, bias=True)
)

# Torchbear Section

In [8]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
net_lstm = LstmLangModel(batch_size, hidden_size, vocab_size, embeddings_length, word_embeddings)
net_lstm = net_lstm.to(device)
criterion = nn.CrossEntropyLoss()
optim = torch.optim.Adam(filter(lambda p: p.requires_grad, net_lstm.parameters()))
num_epochs = 100
epoch_list = []
train_loss_lstm_list = []
train_perp_lstm_list = []

def detach(states):
   return [state.detach() for state in states]

for epoch in range(num_epochs):
   train_loss = 0
   states = (torch.zeros(1, batch_size, hidden_size).to(device),
             torch.zeros(1, batch_size, hidden_size).to(device))
   net_lstm.train()

   for i, batch in enumerate(train_iter):
       text = batch.text.to(device)
       labels = batch.target.to(device)
       text = text.permute(1, 0)
       labels = labels.permute(1, 0)

       optim.zero_grad()
       states = detach(states)
       outputs, states = net_lstm(text, states)
       loss = criterion(outputs, labels.reshape(-1))
       train_loss += loss.item()
       loss.backward()
       optim.step()
   avg_train_loss = train_loss / len(train_iter)
   perplexity = np.exp(avg_train_loss)
   print('Epoch [{}/{}], Loss: {:.4f}, Perplexity: {:5.2f}'.format(epoch + 1, num_epochs, avg_train_loss, perplexity))
   train_loss_lstm_list.append(avg_train_loss)
   train_perp_lstm_list.append(perplexity)

   if epoch % 100 == 0:
       torch.save(net_lstm.state_dict(), r"./LSTM_" + str(epoch) + r".pth")
   
   if epoch == num_epochs - 1:
       torch.save(net_lstm.state_dict(), r"./LSTM_" + str(epoch) + r".pth")

100%|█████████▉| 399601/400000 [01:10<00:00, 7447.95it/s]

Epoch [1/100], Loss: 5.7928, Perplexity: 327.94
Epoch [2/100], Loss: 5.0672, Perplexity: 158.73
Epoch [3/100], Loss: 4.7651, Perplexity: 117.34
Epoch [4/100], Loss: 4.5422, Perplexity: 93.90
Epoch [5/100], Loss: 4.3621, Perplexity: 78.42
Epoch [6/100], Loss: 4.2096, Perplexity: 67.33
Epoch [7/100], Loss: 4.0768, Perplexity: 58.96
Epoch [8/100], Loss: 3.9579, Perplexity: 52.35


KeyboardInterrupt: ignored