In [1]:
import torch
from torch import nn

import numpy as np

Map all sentences characters to an integer

In [2]:
text = ['hey how are you','good i am fine','have a nice day']

chars = set(''.join(text))

int2char = dict(enumerate(chars))
char2int = {char: ind for ind, char in int2char.items()}

In [3]:
char2int

{'h': 0,
 'e': 1,
 ' ': 2,
 'd': 3,
 'f': 4,
 'u': 5,
 'w': 6,
 'r': 7,
 'm': 8,
 'v': 9,
 'o': 10,
 'n': 11,
 'i': 12,
 'a': 13,
 'g': 14,
 'c': 15,
 'y': 16}

Ensure the string length to be equal to the longest size, this will allow we train the model in batches 

In [4]:
maxlen = len(max(text, key=len))

for i in range(len(text)):
  while len(text[i])<maxlen:
      text[i] += ' '

In [5]:
input_seq = []
target_seq = []

for i in range(len(text)):
  input_seq.append(text[i][:-1])
  target_seq.append(text[i][1:])
  print("Input Sequence: {}\nTarget Sequence: {}".format(input_seq[i], target_seq[i]))

Input Sequence: hey how are yo
Target Sequence: ey how are you
Input Sequence: good i am fine
Target Sequence: ood i am fine 
Input Sequence: have a nice da
Target Sequence: ave a nice day


In [6]:
for i in range(len(text)):
    input_seq[i] = [char2int[character] for character in input_seq[i]]
    target_seq[i] = [char2int[character] for character in target_seq[i]]

In [8]:
dict_size = len(char2int)
seq_len = maxlen - 1
batch_size = len(text)

def one_hot_encode(sequence, dict_size, seq_len, batch_size):
    features = np.zeros((batch_size, seq_len, dict_size), dtype=np.float32)    
    for i in range(batch_size):
        for u in range(seq_len):
            features[i, u, sequence[i][u]] = 1
    return features

In [9]:
input_seq = one_hot_encode(input_seq, dict_size, seq_len, batch_size)

In [10]:
input_seq = torch.from_numpy(input_seq)
target_seq = torch.Tensor(target_seq)

In [11]:
device = torch.device( "cuda" if torch.cuda.is_available() else "cpu" )

In [21]:
class Model(nn.Module):
    def __init__(self, input_s, output, hidden, n_layers):
        super(Model, self).__init__()
        
        self.hidden = hidden
        self.n_layers = n_layers
        
        self.rnn = nn.RNN(input_s, hidden, n_layers, batch_first=True)
        self.fc = nn.Linear(hidden, output)
        
    def forward(self, x):
        batch_size = x.size(0)
        hidden = self.init_hidden(batch_size)
        out, hidden = self.rnn(x, hidden)
        
        out = out.contiguous().view(-1, self.hidden)
        return self.fc(out), hidden
    
    def init_hidden(self, batch_size):
        return torch.zeros(self.n_layers, batch_size, self.hidden)
        
        

In [22]:
model = Model(input_s=dict_size, output=dict_size, hidden=12, n_layers=1)
model.to(device)

n_epochs = 100
lr=0.01

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

In [23]:
def training_():
    for epoch in range(1, n_epochs + 1):
        optimizer.zero_grad() 
        input_seq.to(device)
        output, hidden = model(input_seq)
        loss = criterion(output, target_seq.view(-1).long())
        loss.backward() 
        optimizer.step() 

        if epoch%10 == 0:
            print('Epoch: {}/{}.............'.format(epoch, n_epochs), end=' ')
            print("Loss: {:.4f}".format(loss.item()))
training_()

Epoch: 10/100............. Loss: 2.4897
Epoch: 20/100............. Loss: 2.1467
Epoch: 30/100............. Loss: 1.8216
Epoch: 40/100............. Loss: 1.4637
Epoch: 50/100............. Loss: 1.1126
Epoch: 60/100............. Loss: 0.7986
Epoch: 70/100............. Loss: 0.5484
Epoch: 80/100............. Loss: 0.3835
Epoch: 90/100............. Loss: 0.2749
Epoch: 100/100............. Loss: 0.2048


In [26]:
def predict(model, character):
    character = np.array([[char2int[c] for c in character]])
    character = one_hot_encode(character, dict_size, character.shape[1], 1)
    character = torch.from_numpy(character)
    character.to(device)
    
    out, hidden = model(character)

    prob = nn.functional.softmax(out[-1], dim=0).data
    char_ind = torch.max(prob, dim=0)[1].item()

    return int2char[char_ind], hidden

def sample_valuation(model, out_len, start='hey'):
    model.eval() # eval mode
    start = start.lower()
    # First off, run through the starting characters
    chars = [ch for ch in start]
    size = out_len - len(chars)
    # Now pass in the previous characters and get a new one
    for ii in range(size):
        char, h = predict(model, chars)
        chars.append(char)

    return ''.join(chars)

In [31]:
sample_valuation(model, 15, 'Go')

'good i am fine '

We use one hot encoding to represent the textual data, because its computationaly expensive and it is not have embedded information  this approach have many downsides.

Most modern NLP solutions rely on word embeddings or unique contextual word representation.(eg. word2vec, bert)
