In [33]:
import torch 
from torch import nn 
import numpy as np 

In [34]:
text = ['hey how are you','good i am fine','have a nice day']
chars = set(''.join(text))
print(chars)
int2char = dict(enumerate(chars))
print(int2char)
char2int = {char: idx for idx,char in int2char.items()}
print(char2int)

{'f', 'c', 'e', 'o', 'r', 'a', ' ', 'h', 'n', 'v', 'u', 'd', 'g', 'i', 'm', 'y', 'w'}
{0: 'f', 1: 'c', 2: 'e', 3: 'o', 4: 'r', 5: 'a', 6: ' ', 7: 'h', 8: 'n', 9: 'v', 10: 'u', 11: 'd', 12: 'g', 13: 'i', 14: 'm', 15: 'y', 16: 'w'}
{'f': 0, 'c': 1, 'e': 2, 'o': 3, 'r': 4, 'a': 5, ' ': 6, 'h': 7, 'n': 8, 'v': 9, 'u': 10, 'd': 11, 'g': 12, 'i': 13, 'm': 14, 'y': 15, 'w': 16}


In [35]:
max_len = len(max(text,key=len))
print(f"longest string has {max_len} characters")

longest string has 15 characters


In [36]:
#padding 
for i in range(len(text)):
    while len(text[i]) < max_len:
        text[i] += ' '
text

['hey how are you', 'good i am fine ', 'have a nice day']

In [37]:
#input,target
#input last char is not feed into the model
#target 1 time-step ahead of the Input data as this will be the "correct answer"
input_seq = []
target_seq = []
for i in range(len(text)):
    input_seq.append(text[i][:-1])
    target_seq.append(text[i][1:])
    print(f"Input sequence: {input_seq}\ntarget sequence: {target_seq}")

Input sequence: ['hey how are yo']
target sequence: ['ey how are you']
Input sequence: ['hey how are yo', 'good i am fine']
target sequence: ['ey how are you', 'ood i am fine ']
Input sequence: ['hey how are yo', 'good i am fine', 'have a nice da']
target sequence: ['ey how are you', 'ood i am fine ', 'ave a nice day']


In [38]:
## Convert characters to indices
for i in range(len(text)):
    input_seq[i] = [char2int[character] for character in input_seq[i]]
    target_seq[i] = [char2int[character] for character in target_seq[i]]


Before encoding our input sequence into one-hot vectors, we'll define 3 key variables:

- dict_size: The number of unique characters that we have in our text
This will determine the one-hot vector size as each character will have an assigned index in that vector
- seq_len: The length of the sequences that we're feeding into the model
As we standardised the length of all our sentences to be equal to the longest sentences, this value will be the max length - 1 as we removed the last character input as well
- batch_size: The number of sentences that we defined and are going to feed into the model as a batch

In [39]:
dict_size =  len(char2int)
seq_len = max_len - 1
batch_size = len(text)

In [40]:
from torch.nn.functional import one_hot
input_seq = one_hot(torch.tensor(input_seq),num_classes=dict_size)
target_seq = one_hot(torch.tensor(target_seq),num_classes=dict_size)

In [41]:
print(input_seq)

tensor([[[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0],
         [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
         [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0],
         [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],

        [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
         [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
      

In [42]:
print(target_seq)

tensor([[[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0],
         [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
         [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0],
         [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0]],

        [[0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
      

In [43]:
print("Input sequence one-hot shape:", input_seq.shape)
print("Target sequence one-hot shape:", target_seq.shape)

Input sequence one-hot shape: torch.Size([3, 14, 17])
Target sequence one-hot shape: torch.Size([3, 14, 17])


In [44]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [45]:
class RNNModel(nn.Module):
    def __init__(self, input_size,output_size,hidden_dim,n_layers) -> None:
        super().__init__()
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers
        self.rnn = nn.RNN(input_size,hidden_dim,n_layers,batch_first=True)
        self.fc = nn.Linear(hidden_dim,output_size)
    
    def forward(self,x):
        batch_size = x.size(0)
        hidden = self.init_hidden(batch_size)
        output,hidden = self.rnn(x,hidden)
        output = output.contiguous().view(-1,self.hidden_dim)
        output = self.fc(output)
        return output,hidden
    
    def init_hidden(self,batch_size):
        hidden = torch.zeros(self.n_layers,batch_size,self.hidden_dim).to(device)
        return hidden

torch.manual_seed(42)
model = RNNModel(input_size=dict_size,output_size=dict_size,hidden_dim=12,n_layers=1).to(device)
model

RNNModel(
  (rnn): RNN(17, 12, batch_first=True)
  (fc): Linear(in_features=12, out_features=17, bias=True)
)

In [46]:
n_epochs = 100
lr = 0.01
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),lr=lr)

In [47]:
# Train the model
torch.manual_seed(42)
input_seq = input_seq.to(device).float()
for epoch in range(1, n_epochs + 1):
    optimizer.zero_grad()
    output, hidden = model(input_seq)
    output = output.to(device)
    target_seq_one_hot = target_seq.to(device).float()
    loss = criterion(output, target_seq_one_hot.view(-1, target_seq_one_hot.size(-1)))
    loss.backward()
    optimizer.step()
    
    if epoch % 10 == 0:
        print(f"Epoch: {epoch}/{n_epochs} Loss: {loss.item()} ")

Epoch: 10/100 Loss: 2.3790833950042725 
Epoch: 20/100 Loss: 2.03517746925354 
Epoch: 30/100 Loss: 1.6522883176803589 
Epoch: 40/100 Loss: 1.2736116647720337 
Epoch: 50/100 Loss: 0.9190427660942078 
Epoch: 60/100 Loss: 0.6231265664100647 
Epoch: 70/100 Loss: 0.42084866762161255 
Epoch: 80/100 Loss: 0.2880089581012726 
Epoch: 90/100 Loss: 0.20716917514801025 
Epoch: 100/100 Loss: 0.16010627150535583 


In [64]:
def predict(model, character):
    character = torch.tensor([[char2int[c] for c in character]])
    character = one_hot(character, num_classes=dict_size).float().to(device)
    output, hidden = model(character)
    output = output.to(device)
    hidden = hidden.to(device)
    prob = torch.softmax(output[-1], dim=0).data
    char_ind = torch.max(prob, dim=0)[1].item()
    return int2char[char_ind], hidden


In [65]:
test_character = 'h'
predicted_character, _ = predict(model, test_character)
print(f"The predicted character for '{test_character}' is '{predicted_character}'")

The predicted character for 'h' is 'e'


In [66]:
def sample(model,out_len,start='hi'):
    model.eval()
    start = start.lower()
    chars = [ch for ch in start]
    size = out_len - len(chars)
    for i in range(size):
        char, h = predict(model,chars)
        chars.append(char)
    return ''.join(chars)

In [71]:
sample(model,15,'good')

'good i am fine '