In [1]:
# Download dataset

import requests
import os

if not 'shakespeare.txt' in os.listdir('./'):
    r = requests.get('https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')
    open('shakespeare.txt', 'wb').write(r.content)

In [2]:
import numpy as np

# For every character create distinct ID and encode the text
n_chars = 0

with open('shakespeare.txt', 'r') as f:
    char2ids = {}
    idx2char = {}
    encoded = []
    for l in f.readlines():
        for char in l:
            n_chars += 1
            if char not in char2ids.keys():
                char2ids[char] = len(char2ids) + 1
                idx2char[len(char2ids) + 1] = char
            encoded.append(char2ids[char])    
    encoded = np.array(encoded, dtype=np.int8)

In [3]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

class ShakespeareDataset(Dataset):
    def __init__(self, encoded, n_unique_chars, window_size=101):
        self.n_unique_chars = n_unique_chars
        self.encoded = torch.tensor(encoded, dtype=torch.float)
        self.window_size = window_size
        
    def __len__(self):
        return len(self.encoded) - self.window_size - 1
    
    def __getitem__(self, idx):
        window = self.encoded[idx:idx+self.window_size]
        return window[:-1].long(), window[1:].long()

In [4]:
train_size = int(n_chars * 0.9) # Take 90% of text as training data
print(train_size)
train_dataset = ShakespeareDataset(encoded[:train_size], len(char2ids))
train_dataset[1][0].shape

1003854


torch.Size([100])

In [5]:
batch_size = 1
loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [6]:
# Model

class CharRNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(CharRNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.embed = nn.Embedding(input_size, hidden_size)
        self.gru = nn.LSTM(hidden_size, hidden_size, num_layers, batch_first=True)
        self.out = nn.Linear(hidden_size, output_size)
    
    def forward(self, X, hidden, cell):
        X = self.embed(X)
        X, (hidden, cell) = self.gru(X.unsqueeze(1), (hidden, cell))
        X = self.out(X.reshape(X.shape[0], -1))
        return X, hidden, cell
    
    def init_hidden(self, batch_size):
        hidden = torch.zeros(self.num_layers, batch_size, self.hidden_size).to(device)
        cell = torch.zeros(self.num_layers, batch_size, self.hidden_size).to(device)
        return hidden, cell
    
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(RNN, self).__init__()
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        self.embed = nn.Embedding(input_size, hidden_size)
        self.lstm = nn.LSTM(hidden_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, X, hidden, cell):
        print(X.shape)
        out = self.embed(X)
        print(out.shape)
        out, (hidden, cell) = self.lstm(out.unsqueeze(1), (hidden, cell))
        print(out.shape)
        out = self.fc(out.reshape(out.shape[0], -1))
        print(out.shape)
        input()
        return out, (hidden, cell)
    
    def init_hidden(self, batch_size):
        hidden = torch.zeros(self.num_layers, batch_size, self.hidden_size).to(device)
        cell = torch.zeros(self.num_layers, batch_size, self.hidden_size).to(device)
        return hidden, cell

In [9]:
model = CharRNN(len(char2ids), 256, 2, len(char2ids)).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
check_every = 100

def generate(model, init_str="Ty", prediction_len=100, temperature=0.85):
    hidden, cell = model.init_hidden(batch_size)
    init_inp = torch.tensor([char2ids[char] for char in init_str]).long()
    pred = init_str

    for p in range(len(init_str) - 1):
        _, hidden, cell = model(init_inp[p].view(1).to(device), hidden, cell)

    last_char = init_inp[-1]

    for p in range(prediction_len):
        output, hidden, cell = model(last_char.view(1).to(device), hidden, cell)
        output_distance = output.data.view(-1).div(temperature).exp()
        top_char = torch.multinomial(output_distance, 1)[0]
        predicted_char = idx2char[int(top_char) + 1]
        pred += predicted_char
        last_char = torch.tensor(char2ids[predicted_char])

    return pred

for epoch in range(10):
    for j, (window, target) in enumerate(loader):
        model.zero_grad()
        hidden, cell = model.init_hidden(batch_size)
        optimizer.zero_grad()
        window = window.to(device)
        target = target.to(device)
        loss = 0
        
        for i in range(window.shape[1]): 
            out, hidden, cell = model(window[:,i], hidden, cell)  
            loss += criterion(out, target[:,i])

        if j%check_every == 0:
            print('+=+++++++++')
            print(generate(model))
            print("+++++++++++++++++")
            
        loss.backward()
        optimizer.step()
        print(f'\r {loss.item() / window.shape[1]}', end='')

+=+++++++++
TyHa q-hDcAH-VFzww;WWv-ecYK&&comhQhmtPf'KjNgvbQ
ppSVv3IcxtRP,3?EaM
ikr!gTutWENALsb,qpkPwI??UgZuQkQVv!d
+++++++++++++++++
 2.5038496398925782+=+++++++++
Tyrert an! aibe ieg for wike fesw kas ursok tshitef had en auyvet tors, gory oud sorwe soe wof thisd I
+++++++++++++++++
 2.3083114624023438+=+++++++++
Ty nathe gienc he she het,, heve houe leand, prefy beit and I and fow rorures shake the,
Whe of fay.


+++++++++++++++++
 2.398551330566406+=+++++++++
Ty, yow be the thim the theat hous bes, glings
Gowe nou, I bay the me bete thers or, a mothere such on
+++++++++++++++++
 2.1592558288574217+=+++++++++
Tyur hushom the then tham is in bancy the he elest ighonce:
And if
I hincetenss mer for its where, cre
+++++++++++++++++
 2.1172967529296876+=+++++++++
Tyn henhey, what om witarond cay; diso tiis lons.

Nard; waas the hath hath
I fing newer.

FUUES:
Yew 
+++++++++++++++++
 2.067519073486328+=+++++++++
Ty brown harchsinn the plad
and dblet.

FLICER:
A Rond gage brcevent the my on

/pytorch/aten/src/ATen/native/cuda/Loss.cu:247: nll_loss_forward_reduce_cuda_kernel_2d: block: [0,0,0], thread: [0,0,0] Assertion `t >= 0 && t < n_classes` failed.


RuntimeError: cuDNN error: CUDNN_STATUS_INTERNAL_ERROR