# **In this Lab, there is one exercise for evaluation at the end of this sheet. Do your submissions by 12 Nov, 11am. Submit a new colab file containing the code with supporting libraries and output required for your answer. Do not share the original labsheet code in your submission..**

*Submission Link:*[Upload Here](https://forms.gle/XkzCMM3KCVyBympj9)


[https://karpathy.github.io/2015/05/21/rnn-effectiveness/](https://karpathy.github.io/2015/05/21/rnn-effectiveness/)

The notebook provides a comprehensive overview of building and training a basic RNN for a character-level language modeling task. It covers data preprocessing, model definition, training, and using the model for predictions.

In [None]:
text = ['hey how are you', 'good i am fine', 'have a nice day']

In [None]:
chars = set(''.join(text))

In [None]:
chars

{' ',
 'a',
 'c',
 'd',
 'e',
 'f',
 'g',
 'h',
 'i',
 'm',
 'n',
 'o',
 'r',
 'u',
 'v',
 'w',
 'y'}

In [None]:
int_map = dict(enumerate(chars))

In [None]:
int_map

{0: 'v',
 1: 'y',
 2: 'h',
 3: ' ',
 4: 'd',
 5: 'f',
 6: 'a',
 7: 'n',
 8: 'c',
 9: 'r',
 10: 'w',
 11: 'g',
 12: 'u',
 13: 'm',
 14: 'i',
 15: 'o',
 16: 'e'}

In [None]:
char_map = {char:ind for ind, char in int_map.items()}

In [None]:
char_map

{'v': 0,
 'y': 1,
 'h': 2,
 ' ': 3,
 'd': 4,
 'f': 5,
 'a': 6,
 'n': 7,
 'c': 8,
 'r': 9,
 'w': 10,
 'g': 11,
 'u': 12,
 'm': 13,
 'i': 14,
 'o': 15,
 'e': 16}

In [None]:
num_unique_chars = len(char_map)

In [None]:
num_unique_chars

17

In [None]:
maxlen = len(max(text, key=len))

In [None]:
maxlen

15

# the items must be of the same dims if we want to stack them into a batch

for example, say we have a dataset of images

and say our batch size is 30

256x256

512x512

(30, 3, 256, 256)

In [None]:
# iterating over my sentences in the dataset
for i in range(len(text)):
  while(len(text[i]))<maxlen:
    text[i] += ' '

In [None]:
text

['hey how are you', 'good i am fine ', 'have a nice day']

In [None]:
input_seq = list()
target_seq = list()


for i in range(len(text)):
  input_seq.append(text[i][:-1])
  target_seq.append(text[i][1:])


In [None]:
input_seq

['hey how are yo', 'good i am fine', 'have a nice da']

In [None]:
target_seq

['ey how are you', 'ood i am fine ', 'ave a nice day']

In [None]:
for i in range(len(text)):
  input_seq[i] = [char_map[character] for character in input_seq[i]]
  target_seq[i] = [char_map[character] for character in target_seq[i]]


In [None]:
input_seq


[[2, 16, 1, 3, 2, 15, 10, 3, 6, 9, 16, 3, 1, 15],
 [11, 15, 15, 4, 3, 14, 3, 6, 13, 3, 5, 14, 7, 16],
 [2, 6, 0, 16, 3, 6, 3, 7, 14, 8, 16, 3, 4, 6]]

In [None]:
target_seq


[[16, 1, 3, 2, 15, 10, 3, 6, 9, 16, 3, 1, 15, 12],
 [15, 15, 4, 3, 14, 3, 6, 13, 3, 5, 14, 7, 16, 3],
 [6, 0, 16, 3, 6, 3, 7, 14, 8, 16, 3, 4, 6, 1]]

In [None]:
# you want to get the one-hot embedding/vector corresponding to 4

[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

In [None]:
import numpy as np

import torch
from torch import nn


num_of_sentences x num_characters_in_sentence x length_of_one_hot_vector

In [None]:
def one_hot_encode(sequence, num_unique_chars, seq_len, batch_size):

  features = np.zeros((batch_size, seq_len, num_unique_chars), dtype=np.float32)
  # for each sentence
  for i in range(batch_size):
    # for each character in a sentence
    for u in range(seq_len):
      features[i, u, sequence[i][u]] = 1

  return features


In [None]:
batch_size = len(text)
seq_len = maxlen - 1

In [None]:
input_seq = one_hot_encode(input_seq, num_unique_chars, seq_len, batch_size)

In [None]:
type(input_seq)

numpy.ndarray

In [None]:
input_seq = torch.from_numpy(input_seq)
target_seq = torch.Tensor(target_seq)

In [None]:
device = torch.device('cuda')

# Making the model

In [None]:
class Model(nn.Module):
  def __init__(self, input_size, output_size, hidden_dim, n_layers):
    super().__init__()
    self.hidden_dim = hidden_dim
    self.n_layers = n_layers

    self.rnn = nn.RNN(input_size, hidden_dim, n_layers, batch_first = True)
    self.fc = nn.Linear(hidden_dim, output_size)

  def init_hidden(self, batch_size):
    hidden = torch.zeros(self.n_layers, batch_size, self.hidden_dim).to(device)
    return hidden

  def forward(self, x):
    batch_size = x.shape[0]
    hidden = self.init_hidden(batch_size)

    out, hidden  = self.rnn(x, hidden)

    out = self.fc(out)

    return out, hidden


In [None]:
model = Model(input_size = num_unique_chars, output_size = num_unique_chars, hidden_dim = 12, n_layers = 1)

In [None]:
model = model.to(device)

In [None]:
n_epochs = 100
lr = 0.01

In [None]:
loss = nn.CrossEntropyLoss()

optimizer = torch.optim.Adam(model.parameters(), lr=lr)

In [None]:
input_seq = input_seq.to(device)

# Training the model

In [None]:
for epoch in range(1, n_epochs + 1):
  optimizer.zero_grad()

  output, hidden = model(input_seq)

  output = output.to(device)
  target_seq = target_seq.to(device)

  epoch_loss = loss(output.view(-1, output.shape[-1]), target_seq.view(-1).long())

  epoch_loss.backward()
  optimizer.step()

  if epoch % 10 == 0:
    print("Epoch: {}/{}............".format(epoch, n_epochs), end = ' ')
    print("Loss: {:.4f}".format(epoch_loss.item()))

Epoch: 10/100............ Loss: 2.4118
Epoch: 20/100............ Loss: 2.1012
Epoch: 30/100............ Loss: 1.6987
Epoch: 40/100............ Loss: 1.2996
Epoch: 50/100............ Loss: 0.9236
Epoch: 60/100............ Loss: 0.6218
Epoch: 70/100............ Loss: 0.4170
Epoch: 80/100............ Loss: 0.2842
Epoch: 90/100............ Loss: 0.2036
Epoch: 100/100............ Loss: 0.1550


In [None]:
output, hidden = model(input_seq)

In [None]:
output.shape

torch.Size([3, 14, 17])

In [None]:
target_seq.shape

torch.Size([3, 14])

In [None]:
target_seq.view(-1)

tensor([16.,  1.,  3.,  2., 15., 10.,  3.,  6.,  9., 16.,  3.,  1., 15., 12.,
        15., 15.,  4.,  3., 14.,  3.,  6., 13.,  3.,  5., 14.,  7., 16.,  3.,
         6.,  0., 16.,  3.,  6.,  3.,  7., 14.,  8., 16.,  3.,  4.,  6.,  1.],
       device='cuda:0')

# Get predictions from our trained model

In [None]:
# characters = ['h', 'e', 'y']
def predict(model, characters):
  characters = np.array([[char_map[c] for c in characters]])
  characters = one_hot_encode(characters, num_unique_chars, characters.shape[1], 1)
  characters = torch.from_numpy(characters)
  characters = characters.to(device)

  model.eval()

  out, hidden = model(characters)

  prob = nn.functional.softmax(torch.squeeze(out, dim=0)[-1], dim=0)

  char_ind = torch.argmax(prob, dim=0)

  return int_map[char_ind.item()], hidden

In [None]:
def sample(model, out_len, start):

  model.eval()

  start = start.lower()

  chars = [ch for ch in start]

  size = out_len - len(chars)

  for _ in range(size):
    char, h = predict(model, chars)
    chars.append(char)

  return ''.join(chars)

In [None]:
sample(model, 15, 'have')

'have a nice day'

In [None]:
# [0.01, 0.02, 0.3, 0.04, .....]
# you apply softmax to this -> you get a list of probabilites
# you use torch.argmax to get the index corresponding to the max value
# say our max value -> 3
# we use the int_map to convert 3 to 'i'


**Task:**
Your task is to modify the existing character-level RNN model to create a word-level language model. Instead of predicting the next character, your model should predict the next word in a sequence of words.

**Steps:**


1.   **Data Preprocessing:** Use a larger text dataset (you can choose a dataset or use any text corpus available in the lab). Tokenize the text data into words instead of characters. Create a mapping of each unique word to an integer (word to index) and the reverse mapping (index to word).
Prepare your input and target sequences based on words.
2.   **Model Modifications:** Adjust the input and output dimensions of your RNN model to accommodate the size of the word vocabulary (number of unique words). Consider experimenting with the size of the hidden layer or adding more layers to the RNN.
3. **Training:** Train your model with the word-level sequences. Pay attention to how the choice of sequence length impacts the training and results.
4. **Evaluation and Generation:** Evaluate your model's performance. How well does it predict the next word? Implement a function to generate a sequence of words given a starting word or phrase.
