<a href="https://colab.research.google.com/github/Aryan-401/GenAI-RNN/blob/master/Shakespeare_Type_Generation_RNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn
import string
import random
import sys
!pip install Unidecode
import unidecode
from torchtext.utils import download_from_url
from torch.utils.tensorboard import SummaryWriter

Collecting Unidecode
  Downloading Unidecode-1.3.6-py3-none-any.whl (235 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m235.9/235.9 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: Unidecode
Successfully installed Unidecode-1.3.6


In [None]:
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(device)

cuda


In [None]:
all_characters = string.printable
n_characters = len(all_characters)
print(all_characters, n_characters)

0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~ 	
 100


In [None]:
file_path = download_from_url('https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt', './shakespeare.txt')
file_shakespeare = unidecode.unidecode(open('./shakespeare.txt').read())

100%|██████████| 1.12M/1.12M [00:00<00:00, 102MB/s]


In [None]:
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        self.embed = nn.Embedding(input_size, hidden_size)
        self.lstm = nn.LSTM(hidden_size, hidden_size, num_layers, batch_first = True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, hidden, cell):
        out = self.embed(x)
        out, (hidden, cell)  = self.lstm(out.unsqueeze(1), (hidden, cell))
        out = self.fc(out.reshape(out.shape[0], -1))
        return out, (hidden, cell)

    def init_hidden(self, batch_size):
        hidden = torch.zeros(self.num_layers, batch_size, self.hidden_size).to(device)
        cell = torch.zeros(self.num_layers, batch_size, self.hidden_size).to(device)
        return hidden, cell

In [None]:
import random
class Generator():
    def __init__(self):
        self.chunk_len = 250
        self.num_epochs = 5000
        self.batch_size = 1
        self.print_every = 200
        self.hidden_size = 256
        self.num_layers = 2
        self.file = file_shakespeare
        self.lr = 0.003

    def char_tensor(self, string):
        tensor = torch.zeros(len(string)).long()
        for c in range(len(string)):
            tensor[c] = all_characters.index(string[c])
        return tensor

    def get_random_batch(self):
        start_index = random.randint(0, len(self.file) - self.chunk_len)
        end_index = start_index + self.chunk_len + 1
        text_str = self.file[start_index: end_index]
        text_input = torch.zeros(self.batch_size, self.chunk_len)
        text_target = torch.zeros(self.batch_size, self.chunk_len)

        for i in range(self.batch_size):
            text_input[i,:] = self.char_tensor(text_str[:-1])
            text_target[i,:] = self.char_tensor(text_str[1:])

        return text_input.long(), text_target.long()

    def generate(self, initial_string = 'A', prediction_len = 100, temprature = 1.0):
        hidden, cell = self.rnn.init_hidden(batch_size=self.batch_size)
        initial_input = self.char_tensor(initial_string)
        predicted = initial_string

        for p in range(len(initial_string) - 1):
            _, (hidden, cell) = self.rnn(initial_input[p].view(1).to(device), hidden, cell)

        last_char = initial_input[-1]

        for p in range(prediction_len):
            output, (hidden, cell) = self.rnn(last_char.view(1).to(device), hidden, cell)
            output_dist = output.data.view(-1).div(temprature).exp()
            top_char = torch.multinomial(output_dist, 1)[0]
            predicted_char = all_characters[top_char]
            if predicted_char == "\n":
                break
            predicted += predicted_char
            last_char = self.char_tensor(predicted_char)

        return predicted


    def train(self):
        self.rnn = RNN(n_characters, self.hidden_size, self.num_layers, n_characters).to(device)
        optimizer = torch.optim.Adam(self.rnn.parameters(), lr = self.lr)
        criterion = nn.CrossEntropyLoss()
        writer = SummaryWriter(f'runs/names0')

        print("== Starting Training ==")
        for epoch in range(1, self.num_epochs + 1):
            inp, target = self.get_random_batch()
            hidden, cell = self.rnn.init_hidden(batch_size=self.batch_size)

            self.rnn.zero_grad()
            loss = 0
            inp, target = inp.to(device), target.to(device)
            for c in range(self.chunk_len):
                output, (hidden, cell) = self.rnn(inp[:, c], hidden, cell)
                loss += criterion(output, target[:, c])

            loss.backward()
            optimizer.step()
            loss = loss.item() / self.chunk_len

            if epoch % self.print_every == 0:
                print(f"Epoch: {epoch}/ {self.num_epochs}| Loss: {loss}")
                torch.save(self.rnn.state_dict(), f'./shakespeare_model_{epoch}.pt')
                print(self.generate())
            writer.add_scalar("Training loss", loss, global_step=epoch)

        torch.save(self.rnn.state_dict(), './shakespeare_model.pt')

    def get_model(self, model_name):
        self.rnn = RNN(n_characters, self.hidden_size, self.num_layers, n_characters).to(device)
        self.rnn.load_state_dict(torch.load(model_name))
        self.rnn.eval()
        return self.rnn

In [None]:
gennames = Generator()
gennames.train()

== Starting Training ==
Epoch: 200/ 5000| Loss: 2.139631103515625
Acer and is hevent?

EUCICSAS:
I to the try, be net pressings then bey
Ard wandtell.

GENUTA:
If my n
Epoch: 400/ 5000| Loss: 1.7931781005859375
APUIET:
Of den calm te cagenglake ye, That both with gexous
fove thet.

Thed lied:
Of malaping the so
Epoch: 600/ 5000| Loss: 2.07499267578125
Adder bat my woulsch me will king: oor -roguelswar breded.
Musare!

Sainks whet heress
apin o fare hu
Epoch: 800/ 5000| Loss: 1.647232666015625
Awan thou super;
This came ip'd seect of manessual shall sent pester swo itent-crake uncitenter minot
Epoch: 1000/ 5000| Loss: 1.6970052490234375
Aper, sep the;' are bife you that which made rook
The part him repiefu should are:
But fore for notke
Epoch: 1200/ 5000| Loss: 1.8163983154296874
AiT
The contentery wasse wouce,
But longs in thee a
mous' weremn is somes of the gratturelt!

Seemt; 
Epoch: 1400/ 5000| Loss: 1.7013272705078124
ANNES:
Naround is lade, right of bringes,
And yet give not since

In [None]:
gennames.generate('I am God', 100, 0.4)

'I am God the books and the wife the good thou that thou hold with me\nAs the sure the true in thy stand and t'

In [None]:
gennames = Generator()

In [None]:
gennames.get_model("shakespeare_model_4400.pt")

RNN(
  (embed): Embedding(100, 256)
  (lstm): LSTM(256, 256, num_layers=2, batch_first=True)
  (fc): Linear(in_features=256, out_features=100, bias=True)
)

In [None]:
gennames.generate('I am God', 100, 0.4)

'I am God the earth to at the house the death,'

In [None]:
!pip freeze > requirements.txt

In [None]:
gennames.generate("Ikshan Bhardwaj ", 100, 0.7)

'Ikshan Bhardwaj hither.'