In [1]:
from io import open
import unicodedata
import string
import re
import random
import pandas as ps
import numpy as np
import os
import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F

device = torch.device("cuda")
device

device(type='cuda')

In [2]:
df = ps.read_csv(os.path.abspath('sentences.csv'), error_bad_lines=False)
df[:3]

b'Skipping line 96223: expected 1 fields, saw 2\n'


Unnamed: 0,Tom is not as fat as I am.
0,Is it OK if I open a can?
1,Tom said he would be thirteen next month.
2,I got Tom to do it for me.


In [3]:
class Util():
    
    def assign_word_count(self, word, dictionary):
        if word in dictionary:
            dictionary[word] = dictionary[word] + 1
        else:
            dictionary[word] = 1
        return dictionary
    
util = Util()

In [4]:
def prepare_sequence(seq, to_ix, isTarget):
    
    idxs = [to_ix[w] if w in to_ix.keys() else to_ix['unk'] for w in seq]
    if isTarget:
        idxs.append(0)
    else:
        idxs.append(1) # EOS
    return torch.tensor(idxs, dtype=torch.long).cuda()

In [5]:
training_data = []
validation_data = []
word_count = {}
ix2word = {}
word2ix = {}
for i, sentence in enumerate(df['Tom is not as fat as I am.']):
    sentence = sentence.translate(str.maketrans('', '', string.punctuation)) # remove punctuation
    sentence = sentence.split() # split in array
    sentence = [word.lower() for word in sentence] # lower everything 
    sentence.append('EOS')
    [util.assign_word_count(word, word_count) for word in sentence]
    reversed_s = sentence[::-1]
    if i < len(df['Tom is not as fat as I am.']) - 1001:
        training_data.append((sentence, reversed_s))
    else:
        validation_data.append((sentence, reversed_s))

word2ix['SOS'] = 0
word2ix['EOS'] = 1
word2ix['unk'] = 2
ix2word[0] = 'SOS'
ix2word[1] = 'EOS'
ix2word[2] = 'unk'

for sentence, target in training_data:
    for word in sentence:
        if word not in word2ix and word_count[word] > 5:
            word2ix[word] = len(word2ix)
            ix2word[len(word2ix) - 1] = word
for sentence, target in validation_data:
    for word in sentence:
        if word not in word2ix and word_count[word] > 5:
            word2ix[word] = len(word2ix)
            ix2word[len(word2ix) - 1] = word
        

In [6]:
class EncoderRNN(nn.Module):
    def __init__(self, vocab_size, hidden_size):
        super(EncoderRNN, self).__init__() # initialize the params/methods for nn Module class
        self.vocab_size = vocab_size
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(vocab_size, hidden_size)
        self.dropout = nn.Dropout(0.1)
        self.gru = nn.GRU(hidden_size, hidden_size)
        
    def forward(self, inputs, hidden):
        x = self.embedding(inputs)
        #x = self.dropout(x)
        x = x.view(1, 1, -1)
        out, hidden = self.gru(x)
        return out, hidden
    
    
    def init_hidden(self):
        return torch.zeros(1, 1, 255)

In [7]:
encoder = EncoderRNN(len(word2ix), 255).to(device)

In [8]:
class AttentionDecoder(nn.Module):
    def __init__(self, vocab_size, hidden_size):
        super(AttentionDecoder, self).__init__()
        self.hidden_size = hidden_size
        self.vocab_size = vocab_size

        self.embedding = nn.Embedding(vocab_size, hidden_size)
        self.gru = nn.GRU(hidden_size * 2, hidden_size) 
        self.attn = nn.Linear(2 * hidden_size, 1)
        self.last = nn.Linear(hidden_size, vocab_size)
    
    def init_hidden(self):
        return torch.zeros(1, 1, self.hidden_size)
    
    def forward(self, encoder_outputs, decoder_input, decoder_hidden):
        alpha = []
        for i in range(len(encoder_outputs)):
            concat = torch.cat((encoder_outputs[i], decoder_hidden[0]), dim=1) #concat encoder_outputs + decoder_hidden
            alpha_element = self.attn(concat) # linear regression the encoder output and hidden to find the attention weight coresponding to encoder output at time t
            alpha.append(alpha_element) # append the weight
        alpha = torch.cat(alpha,1)
        alpha_normalized = F.softmax(alpha,1)
        
        c = torch.bmm(alpha_normalized.view(1, 1, 10), encoder_outputs.view(1, -1, self.hidden_size))
        
        embedding = self.embedding(decoder_input[0]).view(1, 1, -1) #embedding of the last decoder output as input
    
        input_decoder = torch.cat((embedding, c), 2) # concat the (attention_norm | encoder_outputs) to the last input of decoder
        
        
        out, hidden = self.gru(input_decoder, decoder_hidden)

        out = self.last(out[0])
        out = F.log_softmax(out[0])
        
        return out, hidden

In [None]:
decoder = AttentionDecoder(len(word2ix), 255).cuda()

criterion = nn.NLLLoss()
optimizer_encoder = optim.Adam(encoder.parameters())
optimizer_decoder = optim.Adam(decoder.parameters())

In [None]:
def train(input_tensor, target_tensor, total, correct):
    encoder_hidden = encoder.init_hidden().cuda()
    
    optimizer_encoder.zero_grad()
    optimizer_decoder.zero_grad()
    
    input_length = input_tensor.size(0)
    target_length = target_tensor.size(0)
    
    encoder_outputs = torch.zeros(10, 1, encoder.hidden_size).cuda()
    loss = 0
    use_teacher_forcing = True if random.random() < 0.5 else False

    
    for word in range(input_length):
        encoder_output, encoder_hidden = encoder(input_tensor[word], encoder_hidden)
        encoder_outputs[word] = encoder_output[0, 0]
    decoder_input = torch.tensor([[0]]).cuda()
    decoder_hidden = encoder_hidden
    
    if not use_teacher_forcing:
        for i in range(target_length):
            decoder_output, decoder_hidden = decoder(encoder_outputs, decoder_input, decoder_hidden)
            topv, topi = decoder_output.topk(1)
            decoder_input = topi.detach().long().cuda()
            loss += criterion(decoder_output.view(1,-1), target_tensor[i].unsqueeze(0))
            total += 1
            if ix2word[decoder_input.item()] == ix2word[target_tensor[i].item()]:
                correct += 1
            
            if decoder_input == 0: # SOS daca e inversat opreste generarea
                #print("EOS")
                break
    else:
        for i in range(target_length):
            decoder_output, decoder_hidden = decoder(encoder_outputs, decoder_input, decoder_hidden)
            loss += criterion(decoder_output.view(1,-1), target_tensor[i].unsqueeze(0))
            decoder_input = target_tensor[i].unsqueeze(0).unsqueeze(1).unsqueeze(1).long().cuda()
    
    
    
    loss.backward()
    optimizer_encoder.step()
    optimizer_decoder.step()
    
    
    return loss, total, correct

In [None]:
def validate(input_tensor, target_tensor, total_v, correct_v):
    with torch.no_grad():
        encoder_hidden = encoder.init_hidden().cuda()
        encoder_outputs = torch.zeros(10, 1, encoder.hidden_size).cuda()
        
        for word in range(len(input_tensor)):
            encoder_out, encoder_hidden = encoder(input_tensor[word], encoder_hidden)
            encoder_outputs[word] = encoder_out[0,0]
            
        decoder_input = torch.tensor([[0]]).cuda()
        decoder_hidden = encoder_hidden
        
        for word in range(len(input_tensor)):
            decoder_output, decoder_hidden = decoder(encoder_outputs, decoder_input, decoder_hidden)
            topv, topi = decoder_output.topk(1)
            decoder_input = topi.detach().long().cuda()
            total_v += 1
            if ix2word[decoder_input.item()] == ix2word[target_tensor[word].item()]:
                correct_v += 1
            if decoder_input == 0:
                break
                
    return total_v, correct_v

In [None]:
def learn():
    for i in range(5):
        loss = 0
        correct = 0
        total = 0
        correct_v = 0
        total_v = 0
        print('Epoch {}'.format(i))
        for data, target in training_data:
            if len(data) < 10:
                loss_it, total_it, correct_it = train(prepare_sequence(target, word2ix, True), prepare_sequence(target, word2ix, True), total, correct)
                loss += loss_it
                total += total_it
                correct += correct_it
        for data, target in validation_data:
            if len(data) < 10:
                total_v_it, correct_v_it = validate(prepare_sequence(data, word2ix, False), prepare_sequence(target, word2ix, True), total_v, correct_v)
                total_v += total_v_it
                correct_v += correct_v_it
        print("-- Training results --")
        print(correct * 100 / total)   
        print(loss / len(training_data))
        print("-- Validation results --")
        print(correct_v * 100 / total_v)

learn()

Epoch 0




-- Training results --
2.5239410003783584
tensor(16.7485, device='cuda:0', grad_fn=<DivBackward0>)
-- Validation results --
18.061522585585948
Epoch 1
-- Training results --
38.65876891899636
tensor(12.0039, device='cuda:0', grad_fn=<DivBackward0>)
-- Validation results --
28.78440434610935
Epoch 2
