<a href="https://colab.research.google.com/github/Gladiator07/Natural-Language-Processing/blob/main/RNN/PyTorch_RNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torch.nn as nn
import numpy as np

In [2]:
text = ['hey how are you', 'good i am fine', 'have a nice day']

# extract unique characters 
chars = set(' '.join(text))

# creating a dictionary that maps integer to the charachters
int2char = dict(enumerate(chars))

# creating a dictionary that maps characters to the integers
char2int = {char: ind for ind, char in int2char.items()}

In [3]:
print(int2char)
print(char2int)

{0: 'm', 1: 'y', 2: 'o', 3: 'a', 4: 'n', 5: 'd', 6: 'u', 7: 'i', 8: 'w', 9: 'h', 10: ' ', 11: 'f', 12: 'v', 13: 'e', 14: 'c', 15: 'r', 16: 'g'}
{'m': 0, 'y': 1, 'o': 2, 'a': 3, 'n': 4, 'd': 5, 'u': 6, 'i': 7, 'w': 8, 'h': 9, ' ': 10, 'f': 11, 'v': 12, 'e': 13, 'c': 14, 'r': 15, 'g': 16}


In [4]:
maxlen = len(max(text, key=len))
maxlen # hey, how are you

15

In [5]:
#padding
for i in range(len(text)):
    while len(text[i]) < maxlen:
        text[i] += ' '

In [6]:
# creating lists that will hold our input and target sequences
input_seq = []
target_seq = []

for i in range(len(text)):
    # remove last character for input sequence
    input_seq.append(text[i][:-1])

    # remove first charachter for target sequence
    target_seq.append(text[i][1:])

    print("Input sequence: {}\nTarget Sequence: {}".format(input_seq[i], target_seq[i]))

Input sequence: hey how are yo
Target Sequence: ey how are you
Input sequence: good i am fine
Target Sequence: ood i am fine 
Input sequence: have a nice da
Target Sequence: ave a nice day


In [7]:
for i in range(len(text)):
    input_seq[i] = [char2int[character] for character in input_seq[i]]
    target_seq[i] = [char2int[character] for character in target_seq[i]]

In [8]:
dict_size = len(char2int)
seq_len = maxlen - 1
batch_size = len(text)

def one_hot_encode(sequence, dict_size, seq_len, batch_size):
    # Creating a multi-dimensional array of zeros with the desired output shape
    features = np.zeros((batch_size, seq_len, dict_size), dtype=np.float32)
    
    # Replacing the 0 at the relevant character index with a 1 to represent that character
    for i in range(batch_size):
        for u in range(seq_len):
            features[i, u, sequence[i][u]] = 1
    return features

In [9]:
# Input shape --> (batch size, sequence length, One-Hot Encoding Size)
input_seq = one_hot_encode(input_seq,dict_size ,seq_len, batch_size)

In [10]:
# convert to tensors
input_seq = torch.from_numpy(input_seq)
target_seq = torch.Tensor(target_seq)

In [11]:
is_cuda = torch.cuda.is_available()

if is_cuda: 
    device = torch.device("cuda")
    print("GPU is available")
else:
    device = torch.device("cpu")
    print("GPU not available, CPU used")

GPU not available, CPU used


In [12]:
class RNN(nn.Module):
    def __init__(self, input_size, output_size, hidden_dim, n_layers):
        super(RNN, self).__init__()

        # defining some parameters
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers

        # defining layers
        self.rnn = nn.RNN(input_size, hidden_dim, n_layers, batch_first=True)
        # fc layer
        self.fc = nn.Linear(hidden_dim, output_size)
    
    def forward(self, x):
        batch_size = x.size(0)

        # initializing hidden state for first input using method defined below
        hidden = self.init_hidden(batch_size)

        # passing in the input and hidden state into the model and obtaining outputs
        out, hidden = self.rnn(x, hidden)

        # reshaping the outputs such that it can be fit into the fc layer
        out = out.contiguous().view(-1, self.hidden_dim)
        out = self.fc(out)

        return out, hidden

    def init_hidden(self, batch_size):
        # This method generates the first hidden state of zeros which we'll use in the forward pass
        # We'll send the tensor holding the hidden state to the device we specified earlier as well 
        hidden = torch.zeros(self.n_layers, batch_size, self.hidden_dim)
        return hidden

In [26]:
model = RNN(input_size=dict_size, output_size=dict_size, hidden_dim=50, n_layers=2)
model.to(device)

n_epochs = 100
lr = 0.01

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

In [27]:
for epoch in range(1, n_epochs + 1):
    optimizer.zero_grad()
    input_seq.to(device)
    output, hidden = model(input_seq)
    loss = criterion(output, target_seq.view(-1).long())
    loss.backward()
    optimizer.step()

    if epoch%10 == 0:
        print('Epoch: {}/{}.............'.format(epoch, n_epochs), end=' ')
        print("Loss: {:.4f}".format(loss.item()))

Epoch: 10/100............. Loss: 1.5287
Epoch: 20/100............. Loss: 0.3435
Epoch: 30/100............. Loss: 0.0805
Epoch: 40/100............. Loss: 0.0441
Epoch: 50/100............. Loss: 0.0378
Epoch: 60/100............. Loss: 0.0361
Epoch: 70/100............. Loss: 0.0354
Epoch: 80/100............. Loss: 0.0350
Epoch: 90/100............. Loss: 0.0348
Epoch: 100/100............. Loss: 0.0346


In [15]:
# this function takes in the model and character as arguments
# and returns the next character prediction

def predict(model, character):
    character = np.array([[char2int[c] for c in character]])
    character = one_hot_encode(character, dict_size, character.shape[1], 1)
    character = torch.from_numpy(character)
    character.to(device)

    out, hidden = model(character)

    prob = nn.functional.softmax(out[-1], dim=0).data
    char_ind = torch.max(prob, dim=0)[1].item()

    return int2char[char_ind], hidden

In [16]:
# This function takes the desired output length and input characters as arguments, returning the produced sentence
def sample(model, out_len, start='hey'):
    model.eval() # eval mode
    start = start.lower()
    # First off, run through the starting characters
    chars = [ch for ch in start]
    size = out_len - len(chars)
    # Now pass in the previous characters and get a new one
    for ii in range(size):
        char, h = predict(model, chars)
        chars.append(char)

    return ''.join(chars)

In [17]:
sample(model, 15, 'good')

'good i am fine '