In [6]:
import torch
import math
import random
from torch import nn

In [7]:
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(LSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        self.device = "cuda" if torch.cuda.is_available() else "cpu"

        self.embedding = nn.Embedding(input_size, hidden_size)
        self.lstm = nn.LSTM(
            input_size=hidden_size, 
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)


    def forward(self, x, hidden, cell):
        output = self.embedding(x)
        output, (hidden, cell) = self.lstm(output.unsqueeze(1), (hidden, cell))
        output = self.fc(output.reshape(output.shape[0], -1))
        return output, (hidden, cell)


    def init_hidden(self, batch_size=1):
        hidden = torch.zeros(self.num_layers, batch_size, self.hidden_size).to(self.device)
        cell = torch.zeros(self.num_layers, batch_size, self.hidden_size).to(self.device)
        return hidden, cell

In [24]:
from torch.autograd import Variable
import string

def char_tensor(strings):
    all_characters = string.printable
    tensor = torch.zeros(len(strings)).long()
    for index,char in enumerate(strings):
        tensor[index] = all_characters.index(char)
    return Variable(tensor)

CHUNK_LEN = 200

In [25]:
criterion = torch.nn.CrossEntropyLoss()

In [26]:
print(criterion)

CrossEntropyLoss()


In [27]:
lstm = LSTM(100,128,2,100)

In [28]:
hidden,cell = lstm.init_hidden()

In [29]:
import unidecode

TRAIN_PATH = '../data/dickens_train.txt'

def load_dataset(path):
    all_characters = string.printable
    n_characters = len(all_characters)

    file = unidecode.unidecode(open(path, 'r').read())
    return file

In [30]:
data = load_dataset(TRAIN_PATH)

In [64]:
avg_bpc = 0
bpc_losses = []
num_iters = 0
for i in range(0,len(data)-1,CHUNK_LEN):
    hidden, cell = lstm.init_hidden()
    chunk = data[i:i+CHUNK_LEN+1]
    inp = char_tensor(chunk[:-1]).unsqueeze(0) #adds a dimension in the 0th index
    # print(inp.size()) = 200
    target = char_tensor(chunk[1:]).unsqueeze(0)
    # print(target.size()) = 200
    
    if len(target.squeeze(0)) != 200:
        continue
    
    loss = 0
    
    for c in range(CHUNK_LEN):
        with torch.no_grad():
            output, (hidden,cell) = lstm(inp[:,c],hidden,cell)
        loss += criterion(output, target[:,c].view(1))
        
    loss = loss.item()/CHUNK_LEN #gets element in the tensor
    
    # Bits per character = CrossEntropyLoss / log2
    bpc = loss / math.log(2)
    avg_bpc += bpc
    bpc_losses.append(bpc)
    num_iters += 1
    if num_iters % 1500 == 0:
        print(f"Number of iterations run for BPC calc {num_iters}")
avg_bpc / num_iters


Number of iterations run for BPC calc 100
Number of iterations run for BPC calc 200
Number of iterations run for BPC calc 300
Number of iterations run for BPC calc 400
Number of iterations run for BPC calc 500
Number of iterations run for BPC calc 600
Number of iterations run for BPC calc 700
Number of iterations run for BPC calc 800
Number of iterations run for BPC calc 900
Number of iterations run for BPC calc 1000
Number of iterations run for BPC calc 1100
Number of iterations run for BPC calc 1200
Number of iterations run for BPC calc 1300
Number of iterations run for BPC calc 1400
Number of iterations run for BPC calc 1500
Number of iterations run for BPC calc 1600
Number of iterations run for BPC calc 1700
Number of iterations run for BPC calc 1800
Number of iterations run for BPC calc 1900
Number of iterations run for BPC calc 2000
Number of iterations run for BPC calc 2100
Number of iterations run for BPC calc 2200
Number of iterations run for BPC calc 2300
Number of iterations

6.6133131691736535

In [63]:
len(target.squeeze(0))

72

In [56]:
hidden, cell = lstm.init_hidden()
chunk = data[20:20+CHUNK_LEN+1]
inp = char_tensor(chunk[:-1]).unsqueeze(0)
# print(inp.size()) = 200
target = char_tensor(chunk[1:]).unsqueeze(0)
# print(target.size()) = 200

loss = 0

for c in range(CHUNK_LEN):
    with torch.no_grad():
        output, (hidden,cell) = lstm(inp[:,c],hidden,cell)
    loss += criterion(output, target[:,c].view(1))
    

In [58]:
loss.item()

920.1068115234375

In [65]:
bpc_losses

[6.641715276895048,
 6.612424622695749,
 6.613543803235893,
 6.619682125458815,
 6.614954005938519,
 6.612783007022924,
 6.612053910652162,
 6.609100542019873,
 6.615223454597279,
 6.623210494006693,
 6.610224125315877,
 6.604962391785097,
 6.611299718572987,
 6.6063008295671715,
 6.605992196381239,
 6.6048294285580615,
 6.613792558942101,
 6.620564437733577,
 6.607709711443039,
 6.607501020815177,
 6.614798148381001,
 6.607916200692972,
 6.609512199692978,
 6.607858524591179,
 6.613577704456031,
 6.60791752151973,
 6.611668229238643,
 6.609683026620428,
 6.611734710852161,
 6.608284711358628,
 6.608948206667045,
 6.609632835203599,
 6.617318726112046,
 6.6113490294386414,
 6.610191104646912,
 6.6095711966215305,
 6.604573628442474,
 6.60831817230318,
 6.598270643150308,
 6.6215643035898575,
 6.60553342922041,
 6.616465031750387,
 6.609407414103461,
 6.611495641208848,
 6.605879926106756,
 6.616005824313971,
 6.606854255979036,
 6.612336127302921,
 6.616137026438662,
 6.606194723150895

In [66]:
len(bpc_losses)

5683