In [1]:
import torch
import torch.nn as nn
import string
import numpy
import random
import os
import sys
import unidecode

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

all_chars = string.printable
n_characters = len(all_chars)

file = unidecode.unidecode(open("robert_frost.txt").read())


In [3]:
class RNN(nn.Module):
    def __init__(self,input_size,hidden_size,num_layers,output_size):
        super(RNN,self).__init__()

        self.hidden_size = hidden_size
        self.num_layers = num_layers
        # Input_size is max no of characters hidden size is dimension of embedding
        self.embed = nn.Embedding(input_size,hidden_size) 
        self.lstm = nn.LSTM(hidden_size,hidden_size,num_layers,batch_first=True)
        self.fc = nn.Linear(hidden_size,output_size)
    
    def forward(self,x,hidden,cell):

        out = self.embed(x)
        out,(hidden,cell) = self.lstm(out.unsqueeze(1),(hidden,cell))
        out = self.fc(out.reshape(out.shape[0],-1))

        return out,(hidden,cell)
    
    def init_hidden(self,batch_size):

        hidden = torch.zeros(self.num_layers,batch_size,self.hidden_size).to(device)
        cell = torch.zeros(self.num_layers,batch_size,self.hidden_size).to(device)

        return hidden,cell

In [4]:
class Generator():
    def __init__(self):
        self.chunk_len = 
        self.num_epoch = 2000
        self.batch_size = 1
        self.print_every = 2
        self.hidden_size = 300
        self.num_layers = 3
        self.lr = 0.003
        self.rnn = RNN(n_characters,self.hidden_size,self.num_layers,n_characters).to(device)

    def char_tensor(self,string):
        tensor = torch.zeros(len(string)).long()
        for c in range(len(string)):
            # Replacing zero with character index
            tensor[c] = all_chars.index(string[c])
        return tensor
    
    def get_random_batch(self):
        # start index of our random batch
        start_idx = random.randint(0,len(file)-self.chunk_len)
        # end index of our random batch
        end_idx = start_idx + self.chunk_len + 1
        # picking text string from file using start and end index
        text_str = file[start_idx:end_idx]
        text_input = torch.zeros(self.batch_size,self.chunk_len)
        text_target = torch.zeros(self.batch_size,self.chunk_len)

        # Targets will be next character in sequence since we will be predicting next characters
        for i in range(self.batch_size):
            text_input[i,:] = self.char_tensor(text_str[:-1])
            text_target[i,:] = self.char_tensor(text_str[1:])
        
        return text_input.long(),text_target.long()

    def generate(self,initial_str='A',prediction_len=1000,temprature=0.85):
        hidden,cell = self.rnn.init_hidden(self.batch_size)
        initial_input = self.char_tensor(initial_str)
        predicted = initial_str
        
        # Handeling LSTM hidden and cell if initial string is longer than 1
        for p in range(len(initial_str)-1):
            _,(hidden,cell) = self.rnn(initial_input[p].view(1).to(device),hidden,cell)

        last_char = initial_input[-1]

        for p in range(prediction_len):
            output,(hidden,cell) = self.rnn(last_char.view(1).to(device),hidden,cell)
            # Handeling temperature for predictions
            output_dist = output.data.view(-1).div(temprature).exp()
            # Finding characters with highest probs but with a little randomisation too
            top_char_probs = torch.multinomial(output_dist,1)
            top_char = top_char_probs[0]
            # print(top_char)
            predicted_char = all_chars[top_char]
            predicted+=predicted_char

            last_char = self.char_tensor(predicted_char)
        
        return predicted
    
    def save_weights(self,filename):
        torch.save(self.rnn.state_dict(),filename)
    
    def load_weights(self,filename):
        self.rnn.load_state_dict(torch.load(filename))


    def train(self):
        self.rnn = RNN(n_characters,self.hidden_size,self.num_layers,n_characters).to(device)

        optimizer = torch.optim.Adam(self.rnn.parameters(),lr=self.lr)
        criterion = nn.CrossEntropyLoss()

        print("==> Starting Training")

        for epoch in range(1,self.num_epoch+1):
            input,target = self.get_random_batch()

            hidden,cell = self.rnn.init_hidden(self.batch_size)

            self.rnn.zero_grad()
            loss = 0
            input = input.to(device)
            target = target.to(device)

            # providing characters 1 by 1
            for c in range(self.chunk_len):
                output,(hidden,cell) = self.rnn(input[:,c],hidden,cell)
                loss += criterion(output,target[:,c])
            
            loss.backward()
            optimizer.step()
            loss = loss.item()/self.chunk_len

            if epoch % self.print_every == 0:
                print(f"===> Epoch:{epoch} ==> Loss:{loss}")
                f_chars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
                initial_letter = f_chars[random.randint(0,len(f_chars)-1)]
                print(f"-----\n{self.generate(initial_str=initial_letter)}\n-----")
        self.save_weights(f"robert_data_weights_{epoch}.pth")

In [5]:
gen_model_1 = Generator()

In [6]:
gen_model_1.load_weights('robert_frost_weights_2000.pth')

RuntimeError: Error(s) in loading state_dict for RNN:
	Missing key(s) in state_dict: "lstm.weight_ih_l2", "lstm.weight_hh_l2", "lstm.bias_ih_l2", "lstm.bias_hh_l2". 
	size mismatch for embed.weight: copying a param with shape torch.Size([100, 120]) from checkpoint, the shape in current model is torch.Size([100, 300]).
	size mismatch for lstm.weight_ih_l0: copying a param with shape torch.Size([480, 120]) from checkpoint, the shape in current model is torch.Size([1200, 300]).
	size mismatch for lstm.weight_hh_l0: copying a param with shape torch.Size([480, 120]) from checkpoint, the shape in current model is torch.Size([1200, 300]).
	size mismatch for lstm.bias_ih_l0: copying a param with shape torch.Size([480]) from checkpoint, the shape in current model is torch.Size([1200]).
	size mismatch for lstm.bias_hh_l0: copying a param with shape torch.Size([480]) from checkpoint, the shape in current model is torch.Size([1200]).
	size mismatch for lstm.weight_ih_l1: copying a param with shape torch.Size([480, 120]) from checkpoint, the shape in current model is torch.Size([1200, 300]).
	size mismatch for lstm.weight_hh_l1: copying a param with shape torch.Size([480, 120]) from checkpoint, the shape in current model is torch.Size([1200, 300]).
	size mismatch for lstm.bias_ih_l1: copying a param with shape torch.Size([480]) from checkpoint, the shape in current model is torch.Size([1200]).
	size mismatch for lstm.bias_hh_l1: copying a param with shape torch.Size([480]) from checkpoint, the shape in current model is torch.Size([1200]).
	size mismatch for fc.weight: copying a param with shape torch.Size([100, 120]) from checkpoint, the shape in current model is torch.Size([100, 300]).

In [16]:
seed_text = "I been locked inside your heart shaped box for weeks "
out_1 = gen_model_1.generate(initial_str=seed_text,prediction_len=100*2,temprature=0.80)
print(out_1)

I been locked inside your heart shaped box for weeks to times or the swarm
And see make the cellar house that was where the farm- 
John some house the house the read that was no one of know of the same that I'm old tell them light
To mean that he stairs


In [18]:
seed_text = "Look at the stars\nlook how they shine for you\nthey were all yellow\n"
out_2 = gen_model_1.generate(initial_str=seed_text,prediction_len=100*2,temprature=0.80)
print(out_2)

Look at the stars
look how they shine for you
they were all yellow
We lip the tempting for a prese.' 
'Cold do be new in see she cellar she had from that, 
But down an attic,
The book out of the jest in a use out of a beltess window
The left it a back where it rattle
