In [1]:
import torch.nn as nn
import numpy as np
import torch
import unicodedata
import string
from torch import optim
import torch.nn.functional as F
from tqdm import tqdm_notebook
device = torch.device("cuda")
device

device(type='cuda')

In [2]:
class Encoder(nn.Module):
    """Encoder class used on each input variable.
       Will be used in the decoder arhitecture to create the context vector.
    """
    def __init__(self, embedding_size, hidden_size, vocab_size):
        super(Encoder, self).__init__()
        self.hidden_size = hidden_size
        self.vocab_size = vocab_size
        self.embedding_size = embedding_size
        self.embedding = nn.Embedding(vocab_size, embedding_size)
        self.lstm = nn.LSTM(hidden_size, hidden_size)
        
    def init_hidden(self):
        return (torch.zeros([1, 1, self.hidden_size]).to(device), torch.zeros([1, 1, self.hidden_size]).to(device))
    
    def forward(self, inp, hidden):
        x = self.embedding(inp).view(1, 1, -1).to(device)
        out, hidden = self.lstm(x, hidden)
        
        return out, hidden

In [3]:
class AttentionDecoder(nn.Module):
    """Decoder architecture.
    
    """
    def __init__(self, hidden_size, output_size, embedding_size, vocab_size):
        super(AttentionDecoder, self).__init__()
        self.hidden_size = hidden_size
        self.vocab_size = vocab_size
        self.output_size = output_size
        self.embedding_size = embedding_size
        
        self.embedding = nn.Embedding(vocab_size, embedding_size)
        self.lstm = nn.LSTM(hidden_size, output_size)
        self.attn_lin = nn.Linear(3 * self.hidden_size, 1)
        self.input_lin = nn.Linear(2 * self.hidden_size, hidden_size)
        self.last_lin = nn.Linear(self.hidden_size, self.vocab_size)
        
        
    def init_hidden(self):
        return (torch.zeros([1, 1, self.hidden_size]).to(device), torch.zeros([1, 1, self.hidden_size]).to(device))
    
    def forward(self, inputs, hidden, encoder_outputs):
        x = self.embedding(inputs).view(1, 1, -1)
        x = F.relu(x)
        
        attn_values = []
        
        for i in range(len(encoder_outputs)):
            hidden_dec = torch.cat((hidden[0], hidden[1]), dim=2)
            concat_values = torch.cat((hidden_dec, encoder_outputs[i].view(1, 1, -1)), dim=2)
            attn_value = self.attn_lin(concat_values)
            attn_values.append(attn_value)
        # Create the attention weights to make the weighted average of the encoder values    
        alphas = torch.cat(attn_values, 1) # concat them to the second dimension [1, weights, 1]
        alphas_norm = F.softmax(alphas, dim=1)
        # Calculate the weighted average [1, 1, nr_encoded_words] * [ 1, nr_enoded_words, hidden_siz] -> [1, 1, hidd_siz]
        context_vec = torch.bmm(alphas_norm.view(1, 1, -1), encoder_outputs.view(1, -1, self.hidden_size))
       
        # Append the context vector to the last output
        decoder_input = torch.cat((context_vec, x), dim=2)
        # Make the input dimension usable for the lstm
        decoder_input = self.input_lin(decoder_input)
        decoder_input = F.relu(decoder_input)
        
        out, hidden = self.lstm(decoder_input, hidden)
        out = self.last_lin(out[0])
        out = F.log_softmax(out, dim=1)
        
        
        
        return out, hidden
        

        

In [4]:
class Util():
    def __init__(self, file_path):
        self.data = self.read_file(file_path)
        self.word_2_index, self.index_2_word = self.create_dictionaries(self.data)
        
        
    def read_file(self, file_path):
        f = open(file_path, encoding="mbcs")
        data = []
        
        for line in f.readlines():
            sentence = line.split('\t')[0]
            sentence = sentence.split(' ') # split into words
            sentence = [word.lower() for word in sentence] # make all the words lowercase
            sentence = [word.translate(str.maketrans('', '', string.punctuation)) for word in sentence]
            inverse = sentence[::-1]
            data.append((sentence, inverse))
            
        return data
        
    def create_dictionaries(self, data):
        word_2_index = {}
        index_2_word = {}
        
        word_2_index["SOS"] = 0
        index_2_word[0] = "SOS"
        word_2_index["EOS"] = 1
        index_2_word[1] = "EOS"
        
        for sentence, target in data:
            for word in sentence:
                if word not in word_2_index:
                    word_2_index[word] = len(word_2_index)
                    index_2_word[len(index_2_word)] = word
        
        
        return word_2_index, index_2_word
    
    def create_tensor_from_sentence(self, input_sentence, target_sentence):
        inp = []
        out = []
        
        inp.append(0)
        
        for word in input_sentence:
            inp.append(self.word_2_index[word])
        inp.append(1)
        
        out = inp[::-1]
        
        return torch.tensor(inp).cuda(), torch.tensor(out).cuda()
        
        
util = Util("ron.txt")

In [5]:
VOCAB_SIZE = len(util.word_2_index) # ?? IDK


encoder = Encoder(256, 256, VOCAB_SIZE).to(device)
decoder = AttentionDecoder(256, 256, 256, VOCAB_SIZE).to(device)


encoder_optimizer = optim.Adam(encoder.parameters())
decoder_optimizer = optim.Adam(decoder.parameters())

criterion = nn.NLLLoss()

In [6]:
def learn(input_tensor, target_tensor):
    
    encoder_hidden = encoder.init_hidden()
    encoder_outputs = torch.zeros([len(input_tensor), 1, 256]).cuda()
    loss = 0
    
    #clear the last gradients
    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()
    
    for i in range(len(input_tensor)):
        encoder_out, encoder_hidden = encoder(input_tensor[i], encoder_hidden)
        encoder_outputs[i] = encoder_out
    
    out = torch.tensor([1]).cuda() # eos
    decoder_hidden = encoder_hidden
    
    for i, targ in enumerate(target_tensor):
        # decoder output softmax of vocab size
        decoder_out, decoder_hidden = decoder(out.cuda(), decoder_hidden, encoder_outputs)
        # get the max value and the max index
        topv, topi = decoder_out.topk(1)
        # transform the max index in a tensor and feed it as an input
        out = topi.detach().long().cuda()
        # calculate the loss [batch_size, output_softmax], long_target_value
        loss += criterion(decoder_out.squeeze().unsqueeze(0).to(device), target_tensor[i].unsqueeze(0).to(device))
        
        
    loss.backward()

    encoder_optimizer.step()
    decoder_optimizer.step()   
    
    
    return loss / len(target_tensor)


In [7]:
def train():
    for i in range(25):
        loss = 0
        for inp, target in tqdm_notebook(util.data):
            inp_tensor, output_tensor = util.create_tensor_from_sentence(inp, target)
            loss += learn(inp_tensor, output_tensor)
        
        print(loss)
        
train()

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  after removing the cwd from sys.path.


HBox(children=(FloatProgress(value=0.0, max=2165.0), HTML(value='')))


tensor(6953.5405, device='cuda:0', grad_fn=<ThAddBackward>)


HBox(children=(FloatProgress(value=0.0, max=2165.0), HTML(value='')))


tensor(4244.9434, device='cuda:0', grad_fn=<ThAddBackward>)


HBox(children=(FloatProgress(value=0.0, max=2165.0), HTML(value='')))


tensor(2841.9692, device='cuda:0', grad_fn=<ThAddBackward>)


HBox(children=(FloatProgress(value=0.0, max=2165.0), HTML(value='')))


tensor(1701.8900, device='cuda:0', grad_fn=<ThAddBackward>)


HBox(children=(FloatProgress(value=0.0, max=2165.0), HTML(value='')))


tensor(855.6002, device='cuda:0', grad_fn=<ThAddBackward>)


HBox(children=(FloatProgress(value=0.0, max=2165.0), HTML(value='')))


tensor(386.4748, device='cuda:0', grad_fn=<ThAddBackward>)


HBox(children=(FloatProgress(value=0.0, max=2165.0), HTML(value='')))


tensor(170.8110, device='cuda:0', grad_fn=<ThAddBackward>)


HBox(children=(FloatProgress(value=0.0, max=2165.0), HTML(value='')))


tensor(97.6114, device='cuda:0', grad_fn=<ThAddBackward>)


HBox(children=(FloatProgress(value=0.0, max=2165.0), HTML(value='')))


tensor(59.6171, device='cuda:0', grad_fn=<ThAddBackward>)


HBox(children=(FloatProgress(value=0.0, max=2165.0), HTML(value='')))


tensor(38.5835, device='cuda:0', grad_fn=<ThAddBackward>)


HBox(children=(FloatProgress(value=0.0, max=2165.0), HTML(value='')))


tensor(26.1143, device='cuda:0', grad_fn=<ThAddBackward>)


HBox(children=(FloatProgress(value=0.0, max=2165.0), HTML(value='')))


tensor(24.4430, device='cuda:0', grad_fn=<ThAddBackward>)


HBox(children=(FloatProgress(value=0.0, max=2165.0), HTML(value='')))


tensor(22.2199, device='cuda:0', grad_fn=<ThAddBackward>)


HBox(children=(FloatProgress(value=0.0, max=2165.0), HTML(value='')))


tensor(16.0705, device='cuda:0', grad_fn=<ThAddBackward>)


HBox(children=(FloatProgress(value=0.0, max=2165.0), HTML(value='')))


tensor(6.7259, device='cuda:0', grad_fn=<ThAddBackward>)


HBox(children=(FloatProgress(value=0.0, max=2165.0), HTML(value='')))


tensor(7.5887, device='cuda:0', grad_fn=<ThAddBackward>)


HBox(children=(FloatProgress(value=0.0, max=2165.0), HTML(value='')))


tensor(9.6806, device='cuda:0', grad_fn=<ThAddBackward>)


HBox(children=(FloatProgress(value=0.0, max=2165.0), HTML(value='')))


tensor(5.8662, device='cuda:0', grad_fn=<ThAddBackward>)


HBox(children=(FloatProgress(value=0.0, max=2165.0), HTML(value='')))


tensor(11.5039, device='cuda:0', grad_fn=<ThAddBackward>)


HBox(children=(FloatProgress(value=0.0, max=2165.0), HTML(value='')))


tensor(19.5564, device='cuda:0', grad_fn=<ThAddBackward>)


HBox(children=(FloatProgress(value=0.0, max=2165.0), HTML(value='')))


tensor(6.3272, device='cuda:0', grad_fn=<ThAddBackward>)


HBox(children=(FloatProgress(value=0.0, max=2165.0), HTML(value='')))


tensor(1.1088, device='cuda:0', grad_fn=<ThAddBackward>)


HBox(children=(FloatProgress(value=0.0, max=2165.0), HTML(value='')))


tensor(6.5465, device='cuda:0', grad_fn=<ThAddBackward>)


HBox(children=(FloatProgress(value=0.0, max=2165.0), HTML(value='')))


tensor(14.3894, device='cuda:0', grad_fn=<ThAddBackward>)


HBox(children=(FloatProgress(value=0.0, max=2165.0), HTML(value='')))


tensor(7.2304, device='cuda:0', grad_fn=<ThAddBackward>)


In [27]:
util.word_2_index

{'SOS': 0,
 'EOS': 1,
 'hi': 2,
 'run': 3,
 'who': 4,
 'fire': 5,
 'help': 6,
 'jump': 7,
 'stop': 8,
 'wait': 9,
 'hello': 10,
 'hurry': 11,
 'relax': 12,
 'smile': 13,
 'attack': 14,
 'cheers': 15,
 'freeze': 16,
 'get': 17,
 'up': 18,
 'really': 19,
 'ask': 20,
 'tom': 21,
 'awesome': 22,
 'call': 23,
 'me': 24,
 'out': 25,
 'go': 26,
 'away': 27,
 'goodbye': 28,
 'hold': 29,
 'on': 30,
 'i': 31,
 'agree': 32,
 'im': 33,
 'ill': 34,
 'sad': 35,
 'its': 36,
 'ok': 37,
 'keep': 38,
 'it': 39,
 'open': 40,
 'perfect': 41,
 'tell': 42,
 'why': 43,
 'not': 44,
 'grab': 45,
 'him': 46,
 'how': 47,
 'cute': 48,
 'pay': 49,
 'back': 50,
 'calm': 51,
 'free': 52,
 'here': 53,
 'home': 54,
 'numb': 55,
 'sick': 56,
 'hurts': 57,
 'marry': 58,
 'may': 59,
 'terrific': 60,
 'fled': 61,
 'left': 62,
 'too': 63,
 'late': 64,
 'trust': 65,
 'use': 66,
 'this': 67,
 'fell': 68,
 'paid': 69,
 'bless': 70,
 'you': 71,
 'down': 72,
 'come': 73,
 'dont': 74,
 'fantastic': 75,
 'he': 76,
 'is': 77,
 'he

In [56]:
inpts = torch.tensor([5, 4, 2, 1, 2])

encoder_outputs = torch.zeros([5, 1, 256]).cuda()
encoder_hidden = encoder.init_hidden()

In [58]:
with torch.no_grad():
    for i, inp in enumerate(inpts):
        encoder_out, encoder_hidden = encoder(inp.cuda(), encoder_hidden)
        encoder_outputs[i] = encoder_out

    out = torch.tensor([1]).cuda() # EOS
    decoder_hidden = encoder_hidden

    for i in range(15):
        # decoder output softmax of vocab size
        decoder_out, decoder_hidden = decoder(out.cuda(), decoder_hidden, encoder_outputs)
        # get the max value and the max index
        topv, topi = decoder_out.topk(1)
        # transform the max index in a tensor and feed it as an input
        out = topi.detach().long().cuda()
        print(out)
        if out.item() == 0:
            break

tensor([[1]], device='cuda:0')
tensor([[927]], device='cuda:0')
tensor([[30]], device='cuda:0')
tensor([[26]], device='cuda:0')
tensor([[18]], device='cuda:0')
tensor([[27]], device='cuda:0')
tensor([[18]], device='cuda:0')
tensor([[1090]], device='cuda:0')
tensor([[0]], device='cuda:0')
