<a href="https://colab.research.google.com/github/SachinPrasanth777/PyTorch/blob/main/RNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import torch
from torch import nn
import torch.nn.functional as F

In [2]:
with open('anna.txt','r') as f:
  text = f.read()
print(text[:100])

Chapter 1


Happy families are all alike; every unhappy family is unhappy in its own
way.

Everythin


In [3]:
chars = tuple(set(text))
int2char = dict(enumerate(chars))
char2int = {ch: ii for ii,ch in int2char.items()}
encoded = np.array([char2int[ch] for ch in text])
print(encoded[:100])

[42 22 13  0 60 64 76 32 38 73 73 73  5 13  0  0 65 32 14 13 78 15 20 15
 64 36 32 13 76 64 32 13 20 20 32 13 20 15 50 64 58 32 64 75 64 76 65 32
  8 11 22 13  0  0 65 32 14 13 78 15 20 65 32 15 36 32  8 11 22 13  0  0
 65 32 15 11 32 15 60 36 32 70 21 11 73 21 13 65 23 73 73 46 75 64 76 65
 60 22 15 11]


In [4]:
def one_hot_encode(arr,n_labels):
  one_hot = np.zeros((arr.size,n_labels), dtype = np.float32)
  one_hot[np.arange(one_hot.shape[0]), arr.flatten()] = 1.
  one_hot = one_hot.reshape((*arr.shape,n_labels))
  return one_hot

In [5]:
test_seq = np.array([[3, 5, 1]])
one_hot = one_hot_encode(test_seq, 8)
print(one_hot)

[[[0. 0. 0. 1. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 1. 0. 0.]
  [0. 1. 0. 0. 0. 0. 0. 0.]]]


In [6]:
def get_batches(arr,batch_size,seq_length):
  batch_size_total = batch_size * seq_length
  n_batches = len(arr)//batch_size_total
  arr = arr[:n_batches * batch_size_total]
  arr = arr.reshape((batch_size, -1))
  x = np.zeros((batch_size, seq_length), dtype=arr.dtype)
  y = np.zeros_like(x)
  for n in range(0, arr.shape[1], seq_length):
    x = arr[:, n:n+seq_length]
    y = np.zeros_like(x)
    try:
      y[:, :-1], y[:, -1] = x[:, 1:], arr[:, n+seq_length]
    except IndexError:
      y[:, :-1], y[:, -1] = x[:, 1:], arr[:, 0]
    yield x,y

In [7]:
batches = get_batches(encoded,8,50)
x, y = next(batches)

In [8]:
print('x\n', x[:10, :10])
print('\ny\n', y[:10, :10])

x
 [[42 22 13  0 60 64 76 32 38 73]
 [36 70 11 32 60 22 13 60 32 13]
 [64 11 55 32 70 76 32 13 32 14]
 [36 32 60 22 64 32 82 22 15 64]
 [32 36 13 21 32 22 64 76 32 60]
 [82  8 36 36 15 70 11 32 13 11]
 [32  2 11 11 13 32 22 13 55 32]
 [63 24 20 70 11 36 50 65 23 32]]

y
 [[22 13  0 60 64 76 32 38 73 73]
 [70 11 32 60 22 13 60 32 13 60]
 [11 55 32 70 76 32 13 32 14 70]
 [32 60 22 64 32 82 22 15 64 14]
 [36 13 21 32 22 64 76 32 60 64]
 [ 8 36 36 15 70 11 32 13 11 55]
 [ 2 11 11 13 32 22 13 55 32 36]
 [24 20 70 11 36 50 65 23 32 10]]


In [16]:
train_on_gpu = torch.cuda.is_available
if(train_on_gpu):
    print('Training on GPU!')
else:
    print('No GPU available, training on CPU; consider making n_epochs very small.')

Training on GPU!


In [29]:
class RNN(nn.Module):
  def __init__(self,tokens,n_hidden=256,n_layers=2,drop_prob=0.5,lr=0.001):
    super().__init__()
    self.drop_prob = drop_prob
    self.n_layers = n_layers
    self.n_hidden = n_hidden
    self.lr = lr
    self.chars = tokens
    self.int2char = dict(enumerate(self.chars))
    self.char2int = {ch: ii for ii, ch in self.int2char.items()}
    self.lstm = nn.LSTM(len(self.chars), n_hidden, n_layers, dropout=drop_prob, batch_first=True)
    self.dropout = nn.Dropout(drop_prob)
    self.fc = nn.Linear(n_hidden, len(self.chars))


  def forward(self, x, hidden):
        r_output, hidden = self.lstm(x, hidden)
        out = self.dropout(r_output)
        out = out.contiguous().view(-1, self.n_hidden)
        out = self.fc(out)
        return out, hidden

  def init_hidden(self,batch_size):
    weight = next(self.parameters()).data
    if(train_on_gpu):
      hidden = (weight.new(self.n_layers, batch_size, self.n_hidden).zero_().cuda(),
                weight.new(self.n_layers, batch_size, self.n_hidden).zero_().cuda())
    else:
      hidden = (weight.new(self.n_layers, batch_size, self.n_hidden).zero_(),
                weight.new(self.n_layers,batch_size, self.n_hidden).zero_())
    return hidden

In [30]:
def train(net,data,epochs=10,batch_size=10,seq_length=50,lr=0.001,clip=5,val_frac=0.1,print_every=10):
  net.train()
  opt = torch.optim.Adam(net.parameters(),lr=lr)
  criterion = nn.CrossEntropyLoss()
  val_idx = int(len(data)*(1-val_frac))
  data, val_data = data[:val_idx], data[val_idx:]
  if(train_on_gpu):
    net.cuda()
  counter = 0
  n_chars = len(net.chars)
  for e in range(epochs):
    h = net.init_hidden(batch_size)
    for x,y in get_batches(data,batch_size,seq_length):
      counter += 1
      x = one_hot_encode(x,n_chars)
      inputs, targets = torch.from_numpy(x), torch.from_numpy(y)
      if train_on_gpu():
        inputs, targets = inputs.cuda(), targets.cuda()
      h = tuple([each.data for each in h])
      net.zero_grad()
      output, h = net(inputs,h)
      loss = criterion(output,targets.view(batch_size*seq_length).long())
      loss.backward()
      nn.utils.clip_grad_norm_(net.parameters(), clip)
      opt.step()
      if counter % print_every == 0:
                val_h = net.init_hidden(batch_size)
                val_losses = []
                net.eval()
                for x, y in get_batches(val_data, batch_size, seq_length):
                    x = one_hot_encode(x, n_chars)
                    x, y = torch.from_numpy(x), torch.from_numpy(y)
                    val_h = tuple([each.data for each in val_h])
                    inputs, targets = x, y
                    if(train_on_gpu):
                        inputs, targets = inputs.cuda(), targets.cuda()
                    output, val_h = net(inputs, val_h)
                    val_loss = criterion(output, targets.view(batch_size*seq_length).long())
                    val_losses.append(val_loss.item())
                net.train()
                print("Epoch: {}/{}...".format(e+1, epochs),
                      "Step: {}...".format(counter),
                      "Loss: {:.4f}...".format(loss.item()),
                      "Val Loss: {:.4f}".format(np.mean(val_losses)))

In [31]:
batch_size = 128
seq_length = 100
n_epochs = 20
net = RNN(chars,n_hidden=512,n_layers=2)
train(net, encoded, epochs=n_epochs, batch_size=batch_size, seq_length=seq_length, lr=0.001, print_every=10)

Epoch: 1/20... Step: 10... Loss: 3.2478... Val Loss: 3.1738
Epoch: 1/20... Step: 20... Loss: 3.1514... Val Loss: 3.1285
Epoch: 1/20... Step: 30... Loss: 3.1411... Val Loss: 3.1212
Epoch: 1/20... Step: 40... Loss: 3.1186... Val Loss: 3.1195
Epoch: 1/20... Step: 50... Loss: 3.1445... Val Loss: 3.1180
Epoch: 1/20... Step: 60... Loss: 3.1190... Val Loss: 3.1165
Epoch: 1/20... Step: 70... Loss: 3.1077... Val Loss: 3.1158
Epoch: 1/20... Step: 80... Loss: 3.1247... Val Loss: 3.1130
Epoch: 1/20... Step: 90... Loss: 3.1222... Val Loss: 3.1075
Epoch: 1/20... Step: 100... Loss: 3.1060... Val Loss: 3.0983
Epoch: 1/20... Step: 110... Loss: 3.0921... Val Loss: 3.0741
Epoch: 1/20... Step: 120... Loss: 3.0269... Val Loss: 3.0361
Epoch: 1/20... Step: 130... Loss: 2.9832... Val Loss: 2.9600
Epoch: 2/20... Step: 140... Loss: 2.9185... Val Loss: 2.8645
Epoch: 2/20... Step: 150... Loss: 2.8347... Val Loss: 2.7786
Epoch: 2/20... Step: 160... Loss: 2.7384... Val Loss: 2.6907
Epoch: 2/20... Step: 170... Loss:

In [32]:
model_name = 'rnn_20_epoch.net'

checkpoint = {'n_hidden': net.n_hidden,
              'n_layers': net.n_layers,
              'state_dict': net.state_dict(),
              'tokens': net.chars}

with open(model_name, 'wb') as f:
    torch.save(checkpoint, f)

In [33]:
def predict(net, char, h=None, top_k=None):
        x = np.array([[net.char2int[char]]])
        x = one_hot_encode(x, len(net.chars))
        inputs = torch.from_numpy(x)
        if(train_on_gpu):
            inputs = inputs.cuda()
        h = tuple([each.data for each in h])
        out, h = net(inputs, h)
        p = F.softmax(out, dim=1).data
        if(train_on_gpu):
            p = p.cpu()
        if top_k is None:
            top_ch = np.arange(len(net.chars))
        else:
            p, top_ch = p.topk(top_k)
            top_ch = top_ch.numpy().squeeze()
        p = p.numpy().squeeze()
        char = np.random.choice(top_ch, p=p/p.sum())
        return net.int2char[char], h

In [34]:
def sample(net, size, prime='The', top_k=None):
    if(train_on_gpu):
        net.cuda()
    else:
        net.cpu()
    net.eval()
    chars = [ch for ch in prime]
    h = net.init_hidden(1)
    for ch in prime:
        char, h = predict(net, ch, h, top_k=top_k)
    chars.append(char)
    for ii in range(size):
        char, h = predict(net, chars[-1], h, top_k=top_k)
        chars.append(char)
    return ''.join(chars)

In [35]:
print(sample(net, 1000, prime='Anna', top_k=5))

Anna, still stoding him the study of that sense of the first.

"When he's the singer to makman some simple, with a man and a minute. It's not a pity
of the strong table," answered Levin. "And
it is those will see her and worde tomerrach was alwored, that it is true that I
have been so merely for the peasant.

"I don't get on worre."

"What so means of the most of it, a dinner are to the coundry, but I were as a meanon," said the criminal conversation. "I'm not a success and than
evin of all the sides,"
she say to himself to her to start, the steps.

"I cannot general than
their mother."

"All the stream, that you don't be to that anything to be done to them
the people togather and sen their
feelings of mistaken and to be in the peasants'. Whether anything. Will the say, it's not to see it, and so," she said.

"I have nothing been that you say it out of this
more all these wild marriage. And I should be a might be, and I have been the
same and to speak of it, and to be a possible of the

In [36]:
with open('rnn_20_epoch.net', 'rb') as f:
    checkpoint = torch.load(f)

loaded = RNN(checkpoint['tokens'], n_hidden=checkpoint['n_hidden'], n_layers=checkpoint['n_layers'])
loaded.load_state_dict(checkpoint['state_dict'])

  checkpoint = torch.load(f)


<All keys matched successfully>

In [37]:
print(sample(loaded, 2000, top_k=5, prime="And Levin said"))

And Levin said had no second and the country.
The world-crowd, the mistate finished of them.

"Well, are your acquaintance we have been through to marrow;" he went on.

"And the sick meant misery the says and arrainish.

As he does not than
a man, waiting and sorry that the wearther was a cholid trouble for
the condringing minute at her
fasility. And
tharks to the country, and she would be indein to be anyway, and at the same to him, and all the forest of more talking of an official
cape of any post and man at times."

"Well, and there were a condession if anyone. We was so all and saying it. I've been mading her to her food. If they can't
did the paper, and
this is now a most
princess," he said, went in
the
streat of the same as it was shaked over the significance, and seemed to the middle of the cape of his brother he had at the mustrace of
a consider of his
stangings the parance of the sound of a porter
who had been despair, and
had tro with horror to her, and
had not called a littl