In [59]:
from __future__ import unicode_literals, print_function, division
from io import open
import unicodedata
import string
import re
import random

import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [2]:
EOS_token = 0
SOS_token = 1

class Lang:
    def __init__(self,name):
        
        self.name = name
        self.word2idx = {}
        self.word2count = {}
        self.idx2word = {0:'EOS',1:'SOS'}
        self.num_words = 2
        
    def add_sentence(self,sentence):
        for word in sentence.split(' '):
            self.add_word(word)
            
    def add_word(self,word):
        if word not in self.word2idx:
            self.word2idx[word] = self.num_words
            self.word2count[word] = 1
            self.idx2word[self.num_words] = word
            self.num_words += 1
        else:
            self.word2count[word] += 1
            

In [3]:
france = Lang('france')


In [4]:
france


<__main__.Lang at 0x10fae1358>

In [5]:
#Helper functions!
def unicodeToAscii(s):
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn'
    )

# Lowercase, trim, and remove non-letter characters
def normalizeString(s):
    s = unicodeToAscii(s.lower().strip())
    s = re.sub(r"([.!?])", r" \1", s)
    s = re.sub(r"[^a-zA-Z.!?]+", r" ", s)
    return s



# Read Data

In [6]:
def read_lang(path,lang1,lang2,reverse =False):
    print('Starting ! ! !')
    lines = open(path +'%s-%s.txt' % (lang1, lang2), encoding='utf-8').read().strip().split('\n')
    
    pairs = [[normalizeString(s) for s in l.split('\t')] for l in lines]
    
     # Reverse pairs, make Lang instances
    if reverse:
        pairs = [list(reversed(p)) for p in pairs]
        input_lang = Lang(lang2)
        output_lang = Lang(lang1)
    else:
        input_lang = Lang(lang1)
        output_lang = Lang(lang2)

    return input_lang, output_lang, pairs



In [94]:
class EncodeRNN(nn.Module):
    
    def __init__(self,input_size,hidden_size):
        super(EncodeRNN,self).__init__()
        self.hidden=hidden_size
        
        self.emb = nn.Embedding(input_size,hidden_size)
        self.gru = nn.GRU(hidden_size,hidden_size)
        
    def forward(self,x,hidden):
        embed = self.emb(x).view(1,1,-1)
        output,hidden = self.gru(embed,hidden)
        return output,hidden
    
    def initHidden(self):
        return torch.zeros(1, 1, self.hidden, device=device)
        

In [83]:
class DecodeRNN(nn.Module):
    def __init__(self,hidden,output):
        super(DecodeRNN,self).__init__()
        self.hidden = hidden
        
        self.emb = nn.Embedding(output,hidden) # word2vec dimension == hidden !
        self.gru = nn.GRU(hidden,hidden) # word to word ,same size
        self.fc = nn.Linear(hidden,output) # pick word from vocab!
        
    def forward(self,x,hidden):
        
        embed = self.emb(x).view(1,1,-1)
        output,hidden = nn.gru(embed,hidden)
        output = F.log_softmax(output,dim=1)
        return output,hidden
    
    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)
    
    

In [84]:
MAX_LENGTH = 10 # 10 words at large !

eng_prefixes = (
    "i am ", "i m ",
    "he is", "he s ",
    "she is", "she s ",
    "you are", "you re ",
    "we are", "we re ",
    "they are", "they re "
)

def filterPair(pair):
    return len(pair[0].split(' ')) < MAX_LENGTH and len(pair[1].split(' ')) < MAX_LENGTH and pair[0].startswith(eng_prefixes)
    #getting those pairs len(Eng) <10 and start with...
    
def filterPairs(pairs):
    return [pair for pair in pairs if filterPair(pair)]


In [85]:
def prepareData(lang1, lang2, reverse=False):
    input_lang, output_lang, pairs = read_lang('data/',lang1, lang2)
    print("Read %s sentence pairs" % len(pairs))
    pairs = filterPairs(pairs)

    print("Trimmed to %s sentence pairs" % len(pairs))
    print("Counting words...")
    for pair in pairs:
        
        input_lang.add_sentence(pair[0])
        output_lang.add_sentence(pair[1])
    print("Counted words:")
    print(input_lang.name, input_lang.num_words)
    print(output_lang.name, output_lang.num_words)
    return input_lang, output_lang, pairs


In [86]:
input_lang, output_lang, pairs = prepareData(lang1='eng', lang2='fra')
#print(random.choice(pairs))


Starting ! ! !
Read 170190 sentence pairs
Trimmed to 12702 sentence pairs
Counting words...
Counted words:
eng 3050
fra 4736


In [87]:
pairs[0] # One pair example!


['i m .', 'j ai ans .']

In [88]:
filterPair(pairs[0])

True

# Preparing work 

In [89]:
# How to do 
# we have raw token data ! words !
# now the workflow is that turn words into int, then we can turn int into our embedding(one hot vector)

def sentence2idx(lang,sentence):
    return [lang.word2idx[word] for word in sentence.split(' ')]

def sentenc2tensor(lang,sentence):
    idx = sentence2idx(lang,sentence)
    idx.append(EOS_token) ##  Adding the ending word!
    return torch.LongTensor(idx).view(-1, 1).to(DEVICE)

def pair2tensor(lang1,lang2,pair):
    
    x_tensor = sentenc2tensor(lang1,pair[0]) # input language >>>>>> index
    y_tensor = sentenc2tensor(lang2,pair[1]) # output language >>>>>>> index
    
    return x_tensor,y_tensor


# Attention !

In [360]:
class AttnDecoder(nn.Module): # Multiplicative Attention
    def __init__(self,hidden_size,output_size,dropout_rate = 0.2):
        super(AttnDecoder,self).__init__()
        
        self.hidden = hidden_size # this is word vecter dimension
        self.output = output_size # this is vocab size
        self.drop = nn.Dropout(dropout_rate)
        
        
        self.emb = nn.Embedding(output_size,hidden_size)
        
        self.rnn = nn.GRU(hidden_size,hidden_size) # <EOS> inital and as the first word vector!
        self.h2newoutput = nn.Linear(hidden_size * 2, hidden_size)
        self.toword = nn.Linear(hidden_size,output_size)
        
    def forward(self,x,hidden,encoder_matrix):
        
        emb_vecter = self.emb(x)
        #print(emb_vecter.size())
        score = F.softmax(torch.matmul(emb_vecter,torch.t(encoder_matrix)),dim=-1)
        #print(score)
        new_context = torch.matmul(score,encoder_matrix)
        #print(new_context.size())
        output,hidden = self.rnn(emb_vecter,hidden)
        hidden = torch.cat([new_context,output],dim = 2)
        #print(new_input.size())
        output = F.tanh(self.h2newoutput(hidden))
        output = self.drop(output)
        final_output = F.log_softmax(self.toword(output),dim = 2)
        return final_output,hidden
    
    def initHidden(self):
        return torch.zeros(1, 1, self.hidden, device=device)
        


# test

In [361]:
x,y = pair2tensor(input_lang,output_lang,pairs[9])
x

tensor([[ 2],
        [ 3],
        [11],
        [ 4],
        [ 0]])

In [362]:
e = EncodeRNN(input_lang.num_words,50)
d = AttnDecoder(50,output_lang.num_words)

In [363]:
e_hidden = e.initHidden()
encoder_matrix = torch.zeros(x.size(0),50)
for i in range(x.size(0)):
    out,e_hidden = e(x[i],e_hidden)
    encoder_matrix[i] = out

In [364]:
print(encoder_matrix)

tensor([[ 2.6333e-01,  2.0939e-01,  1.9787e-01,  3.5635e-01, -3.5455e-02,
         -2.7447e-01, -3.8042e-01, -3.0422e-01,  1.2252e-01, -2.1466e-01,
         -2.0349e-01, -9.8748e-02,  4.0235e-02, -3.6948e-01, -9.9609e-02,
          3.4834e-01,  9.1425e-02, -9.9497e-02,  2.1240e-01,  3.0851e-01,
          2.1479e-01,  2.9206e-01,  1.5038e-01,  2.9462e-01,  2.8715e-02,
         -8.6148e-02,  2.0119e-01, -2.1509e-01,  1.5292e-01,  1.1958e-01,
         -1.2617e-01,  1.3298e-01,  9.1545e-02, -1.5589e-01,  6.1138e-02,
          2.5503e-01,  1.2153e-01, -3.0593e-03,  1.6935e-01, -1.2639e-01,
         -5.7941e-02,  1.3696e-01,  3.1911e-02, -4.2443e-02,  5.9668e-02,
          4.7592e-01, -2.8284e-01, -1.5854e-02, -1.4259e-01,  1.9773e-01],
        [ 7.1972e-02,  7.5361e-02,  2.9119e-01, -5.7573e-02, -3.4091e-02,
         -1.1899e-01, -2.2229e-01, -2.4887e-01,  5.9378e-01, -3.3829e-01,
         -4.4178e-01, -9.9675e-02, -3.3254e-01, -5.9565e-02, -2.7380e-01,
          3.2901e-01, -1.5358e-01, -3

In [365]:
d_hidden = d.initHidden()
decoder_input = torch.tensor([[SOS_token]], device=DEVICE)
out,hidden = d(decoder_input,d_hidden,encoder_matrix)

In [366]:
hidden.size()

torch.Size([1, 1, 100])

In [367]:
torch.max(out,dim=2)

torch.return_types.max(
values=tensor([[-8.0691]], grad_fn=<MaxBackward0>),
indices=tensor([[3919]]))

In [368]:
teacher_forcing_ratio = 0.5
def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion):
    encoder_hidden = encoder.initHidden()

    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    input_length = input_tensor.size(0)
    target_length = target_tensor.size(0)

    encoder_outputs = torch.zeros(input_length, encoder.hidden, device=device)

    loss = 0

    for ei in range(input_length):
        encoder_output, encoder_hidden = encoder(
            input_tensor[ei], encoder_hidden)
        encoder_outputs[ei] = encoder_output

    decoder_input = torch.tensor([[SOS_token]], device=device)

    decoder_hidden = encoder_hidden

    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False

    if use_teacher_forcing:
        # Teacher forcing: Feed the target as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden, encoder_outputs)

            loss += criterion(decoder_output.squeeze().view(1,-1), target_tensor[di])
            decoder_input = target_tensor[di]  # Teacher forcing

    else:
        # Without teacher forcing: use its own predictions as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
            topv, topi = decoder_output.topk(1)
            decoder_input = topi.squeeze().detach()  # detach from history as input

            loss += criterion(decoder_output.squeeze().view(1,-1), target_tensor[di])
            if decoder_input.item() == EOS_token:
                break

    loss.backward()

    encoder_optimizer.step()
    decoder_optimizer.step()

    return loss.item() / target_length






In [369]:
def trainIters(encoder, decoder, n_iters, print_every=1000, plot_every=100, learning_rate=0.01):
    start = time.time()
    plot_losses = []
    print_loss_total = 0  # Reset every print_every
    plot_loss_total = 0  # Reset every plot_every

    encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)
    training_pairs = [pair2tensor(input_lang,output_lang,random.choice(pairs))
                      for i in range(n_iters)]
    criterion = nn.NLLLoss()

    for iter in range(1, n_iters + 1):
        training_pair = training_pairs[iter - 1]
        input_tensor = training_pair[0]
        target_tensor = training_pair[1]

        loss = train(input_tensor, target_tensor, encoder,
                     decoder, encoder_optimizer, decoder_optimizer, criterion)
        print_loss_total += loss
        plot_loss_total += loss

        if iter % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            print('%s (%d %d%%) %.4f' % (timeSince(start, iter / n_iters),
                                         iter, iter / n_iters * 100, print_loss_avg))

        if iter % plot_every == 0:
            plot_loss_avg = plot_loss_total / plot_every
            plot_losses.append(plot_loss_avg)
            plot_loss_total = 0

    showPlot(plot_losses)

In [370]:
import matplotlib.pyplot as plt
plt.switch_backend('agg')
import matplotlib.ticker as ticker
import numpy as np
import time

def showPlot(points):
    plt.figure()
    fig, ax = plt.subplots()
    # this locator puts ticks at regular intervals
    loc = ticker.MultipleLocator(base=0.2)
    ax.yaxis.set_major_locator(loc)
    plt.plot(points)

In [371]:
hidden_size = 256
encoder1 = EncodeRNN(input_lang.num_words, hidden_size).to(device)
attn_decoder1 = AttnDecoder(hidden_size, output_lang.num_words, dropout_rate=0.1).to(device)

trainIters(encoder1, attn_decoder1, 75000, print_every=5000)

RuntimeError: input must have 3 dimensions, got 2

In [326]:
d = torch.randint(0,5,(10,300)).float()
d.size()

torch.Size([10, 300])

In [327]:
c = torch.randint(0,5,(1,1,300)).float()
c.size()

torch.Size([1, 1, 300])

In [328]:
s = F.softmax(torch.matmul(c, torch.t(d)),dim=2)
s

tensor([[[0.0000e+00, 1.0089e-43, 1.5629e-18, 9.6025e-24, 9.9998e-01,
          0.0000e+00, 0.0000e+00, 4.9060e-35, 1.6701e-05, 0.0000e+00]]])

In [330]:
new = torch.matmul(s,d)
new

tensor([[[1.9205e-23, 3.0000e+00, 3.0000e+00, 3.0000e+00, 3.0000e+00,
          3.0000e+00, 2.0000e+00, 3.0000e+00, 3.9999e+00, 1.0000e+00,
          9.9998e-01, 1.0000e+00, 1.0000e+00, 3.1257e-18, 1.6701e-05,
          2.0000e+00, 3.0000e+00, 3.0000e+00, 2.0000e+00, 2.0000e+00,
          3.9999e+00, 4.0000e+00, 2.0000e+00, 6.2514e-18, 2.0000e+00,
          4.0000e+00, 9.9998e-01, 2.0000e+00, 3.0000e+00, 4.0000e+00,
          3.0000e+00, 1.0000e+00, 3.0000e+00, 3.9999e+00, 4.0000e+00,
          1.0000e+00, 1.0000e+00, 2.0000e+00, 2.9999e+00, 3.0000e+00,
          3.0000e+00, 6.6806e-05, 4.0000e+00, 3.0000e+00, 9.9998e-01,
          3.0000e+00, 3.9999e+00, 3.0000e+00, 2.0000e+00, 4.0000e+00,
          3.0000e+00, 1.0000e+00, 2.0000e+00, 6.2514e-18, 1.0001e+00,
          1.0000e+00, 3.0000e+00, 2.0000e+00, 1.0001e+00, 2.0000e+00,
          1.0001e+00, 1.0000e+00, 9.9998e-01, 2.9999e+00, 3.0000e+00,
          3.1257e-18, 2.9999e+00, 1.0000e+00, 2.0000e+00, 3.9999e+00,
          3.0000e+00

AttributeError: 'NLLLoss' object has no attribute 'file'