In [1]:
from io import open
import unicodedata
import string
import re
import random

import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F

In [7]:
# for start and end of string
SOS_token = 0
EOS_token = 1

# language class
class Lang:
    def __init__(self, name):
        self.name = name
        self.word2index = {}
        self.word2count = {}
        self.index2word = {0: "SOS", 1: "EOS"}
        self.n_words = 2
        
    def addSentence(self, sentence):
        for word in sentence.split(' '):
            self.addWord(word)
            
    def addWord(self, word):
        if word not in self.word2index:
            self.word2index[word] = self.n_words
            self.word2count[word] = 1
            self.index2word[self.n_words] = word
            self.n_words += 1
        else:
            self.word2count[word] += 1
        

In [8]:
# this part copied straight from tutorial

# Turn a Unicode string to plain ASCII, thanks to
# http://stackoverflow.com/a/518232/2809427
def unicodeToAscii(s):
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn'
    )

# Lowercase, trim, and remove non-letter characters
def normalizeString(s):
    s = unicodeToAscii(s.lower().strip())
    s = re.sub(r"([.!?])", r" \1", s) # add a space before punctuation
    s = re.sub(r"[^a-zA-Z.!?]+", r" ", s) # replace everything thats not a letter, ., ?, or ! with space
    return s


In [9]:
def readLangs(lang1, lang2, reverse=False):
    
    lines = open("lang_data/fra-eng/fra.txt").read().strip().split("\n")
    
    # each line is a tab-separated value
    pairs = [[normalizeString(lang) for lang in line.split("\t")] for line in lines]
    
    # if want to translate in other way:
    if reverse:
        pairs = [list(reversed(p)) for p in pairs]
        input_lang = Lang(lang2)
        output_lang = Lang(lang1)
    else:
        input_lang = Lang(lang1)
        output_lang = Lang(lang2)
        
    return input_lang, output_lang, pairs

In [10]:
# BS filtering to make stuff run faster - also copied straight from website
MAX_LENGTH = 10

eng_prefixes = (
    "i am ", "i m ",
    "he is", "he s ",
    "she is", "she s",
    "you are", "you re ",
    "we are", "we re ",
    "they are", "they re "
)


def filterPair(p):
    return len(p[0].split(' ')) < MAX_LENGTH and \
        len(p[1].split(' ')) < MAX_LENGTH and \
        p[1].startswith(eng_prefixes)


def filterPairs(pairs):
    return [pair for pair in pairs if filterPair(pair)]

In [11]:
# also copied...
def prepareData(lang1, lang2, reverse=False):
    input_lang, output_lang, pairs = readLangs(lang1, lang2, reverse)
    print("Read %s sentence pairs" % len(pairs))
    pairs = filterPairs(pairs)
    print("Trimmed to %s sentence pairs" % len(pairs))
    print("Counting words...")
    for pair in pairs:
        input_lang.addSentence(pair[0])
        output_lang.addSentence(pair[1])
    print("Counted words:")
    print(input_lang.name, input_lang.n_words)
    print(output_lang.name, output_lang.n_words)
    return input_lang, output_lang, pairs


input_lang, output_lang, pairs = prepareData('eng', 'fra', True)
print(random.choice(pairs))

Read 160872 sentence pairs
Trimmed to 12244 sentence pairs
Counting words...
Counted words:
fra 4785
eng 3116
['c est une fille honnete .', 'she s an honest girl .']


Preprocessing done :) 

In [89]:
class EncoderRNN(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(EncoderRNN, self).__init__()
        self.hidden_size = hidden_size

        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)

    def forward(self, input, hidden):
        embedded = self.embedding(input).view(1, 1, -1)
        output = embedded
        output, hidden = self.gru(output, hidden)
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size)#, device=device)
# class EncoderRNN(nn.Module):
#     def __init__(self, input_size, hidden_size):
#         super(EncoderRNN, self).__init__()
#         self.hidden_size = hidden_size
        
#         self.embedding = nn.Embedding(input_size, hidden_size)
#         self.gru = nn.GRU(hidden_size, hidden_size)
        
#     def forward(self, x, hidden):
#         print(x)
#         embedded = self.embedding(x).view(1, 1 -1)
#         output = embedded
#         output, hidden = self.gru(output, hidden)
#         return output, hidden
    
#     def initHidden(self):
#         return torch.zeros(1, 1, self.hidden_size)

In [57]:
class DecoderRNN(nn.Module):
    
    def __init__(self, hidden_size, output_size):
        super(DecoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.output_size = output_size
        
        # still need an embedding
        self.embedding = nn.Embedding(output_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)
        self.out = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1) # assume because we have 1-d data
        
    def forward(self, x, hidden):
        output = self.embedding(x).view(1, 1, -1)
        output = F.relu(output) # regularization thing
        output, hidden = self.gru(output, hidden)
        output = self.softmax(self.out(output[0])) # output is only going to have a single thing, so this is legal i guess
        return output, hidden
        

In [64]:
def indexesFromSentence(lang, sent):
    return [lang.word2index[w] for w in sent.split(' ')]

def tensorFromSentence(lang, sent):
    indexes = indexesFromSentence(lang, sent)
    indexes.append(EOS_token)
    return torch.tensor(indexes, dtype=torch.long).view(-1, 1)

def tensorsFromPair(pair):
    input_tensor = tensorFromSentence(input_lang, pair[0])
    target_tensor = tensorFromSentence(output_lang, pair[1])
    return (input_tensor, target_tensor)


    

In [46]:
# how often we use the target input as input to our decoder rather than our decoder's guess
# while training 
teacher_forcing_ratio = 0.5 

def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer,
         criterion, max_length=MAX_LENGTH):
    encoder_hidden = encoder.initHidden() # just 0's
    
    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()
    
    input_length = input_tensor.size(0)
    target_length = target_tensor.size(0)
    
    encoder_outputs = torch.zeros(max_length, encoder.hidden_size)
    
    loss = 0 # mission accomplished ;)
    
    # actually run the thing that encodes
    for ei in range(input_length):
        encoder_output, encoder_hidden = encoder(
            input_tensor[ei], encoder_hidden)
        encoder_outputs[ei] = encoder_output[0, 0]
        
    
    # now its decoder time - this part changes somewhat if you add attention
    decoder_input = torch.tensor([[SOS_token]])
    decoder_hidden = encoder_hidden # no need for an init function
    
    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False
    
    if use_teacher_forcing:
        # target is next input
        for di in range(target_length):
            decoder_output, decoder_hidden = decoder(
                decoder_input, decoder_hidden)
            loss += criterion(decoder_output, target_tensor[di])
            decoder_input = target_tensor[di] # bc we're using teacher focing
            
    else:
        for di in range(target_length):
            decoder_output, decoder_hidden = decoder(
                decoder_input, decoder_hidden)
            topv, topi = decoder_output.topk(1) # returns a tuple of the largest value and its index as tensors
            decoder_input = topi.squeeze().detach() # I'm not totally sure what this does
            loss += criterion(decoder_output, target_tensor[di])
            
            if decoder_input.item() == EOS_token:
                break # we're done with this sentence - we don't have to do this above bc it goes to the end of the string automatically
    
    
    # backprop
    loss.backward()
    
    # SGD or something
    encoder_optimizer.step()
    decoder_optimizer.step()
    
    return loss.item()/target_length # not sure what this is, but we can see I guess
            
            

In [47]:
# copied directly for profiling...
import time
import math


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (- %s)' % (asMinutes(s), asMinutes(rs))

In [92]:
# Actually do the training:

def trainIters(encoder, decoder, n_iters, print_every=1000, plot_every=100, learning_rate = 0.01):
    start = time.time()
    plot_losses = []
    print_loss_total = 0
    plot_loss_total = 0
    
    encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)
    training_pairs = [tensorsFromPair(random.choice(pairs)) for i in range(n_iters)] # only this many?
    criterion = nn.NLLLoss() # this is... negative log likelihood loss
                             # it's the same as cross-entropy loss bc of the log softmax in the last layer
    
    for iter_i in range(1, n_iters + 1):
        training_pair = training_pairs[iter_i -1]
        input_tensor = training_pair[0]
        target_tensor = training_pair[1]
        
        loss = train(input_tensor, target_tensor, encoder, decoder,
                     encoder_optimizer, decoder_optimizer, criterion)
        
        print_loss_total += 1
        plot_loss_total += 1
        
        if iter_i % print_every == 0:
            print_loss_avg = print_loss_total / print_every # calc avg loss
            print_loss_total = 0
            print('%s (%d %d%%) %.4f' % (timeSince(start, iter_i / n_iters),
                                         iter_i, iter_i / n_iters * 100, print_loss_avg))
        # for plotting loss
        if iter_i % plot_every == 0:
            plot_loss_avg = plot_loss_total / plot_every
            plot_losses.append(plot_loss_avg)
            plot_loss_total = 0

    showPlot(plot_losses)
    

In [103]:
%matplotlib inline

#shamelessly copied from tutorial... yikes
import matplotlib.pyplot as plt
#plt.switch_backend('agg')
import matplotlib.ticker as ticker
import numpy as np


def showPlot(points):
    plt.figure()
    fig, ax = plt.subplots()
    # this locator puts ticks at regular intervals
    loc = ticker.MultipleLocator(base=0.2)
    ax.yaxis.set_major_locator(loc)
    plt.plot(points)
    plt.show()

In [97]:
# Same as training, just no targets - just rum the thing through the network

def evaluate(encoder, decoder, sentence, max_len=MAX_LENGTH):
    with torch.no_grad(): # not totally sure what this does tbh - probably stops from updating gradients like we do in training because we are done with training
        input_tensor = tensorFromSentence(input_lang, sentence)
        input_length = input_tensor.size(0) # just the size of the first dimension
        encoder_hidden = encoder.initHidden()
        encoder_outputs = torch.zeros(max_len, encoder.hidden_size)
        
        for ei in range(input_length):
            encoder_output, encoder_hidden = encoder(
                input_tensor[ei], encoder_hidden)
            encoder_outputs[ei] += encoder_output[0, 0] # is a vector 
        
        # decoder - would have to change if added attention
        decoder_input = torch.tensor([[SOS_token]])
        decoder_hidden = encoder_hidden
        decoded_words = []
        
        for di in range(max_len):
            decoder_output, decoder_hidden = decoder(
                decoder_input, decoder_hidden)
            topv, topi = decoder_output.topk(1)
            
            # translate to actual words 
            if topi.item() == EOS_token:
                decoded_words.append("<EOS>")
                break
            else:
                decoded_words.append(output_lang.index2word[topi.item()])
            
            decoder_input = topi.squeeze().detach()
            
            
        return decoded_words
        
        
        
    

In [98]:
def evaluateRandomLines(encoder, decoder, n=10):
    for i in range(n):
        pair = random.choice(pairs)
        print('>', pair[0])
        print('=', pair[1])
        output_words = evaluate(encoder, decoder, pair[0])
        print('<', ' '.join(output_words))
        print()

In [93]:
# now actually do the thing!
hidden_size = 256
encoder1 = EncoderRNN(input_lang.n_words, hidden_size)
decoder1 = DecoderRNN(hidden_size, output_lang.n_words)

trainIters(encoder1, decoder1, 75000, print_every=5000)

2m 57s (- 41m 20s) (5000 6%) 1.0000
5m 55s (- 38m 31s) (10000 13%) 1.0000
8m 54s (- 35m 37s) (15000 20%) 1.0000
11m 56s (- 32m 49s) (20000 26%) 1.0000
19m 28s (- 38m 57s) (25000 33%) 1.0000
22m 28s (- 33m 42s) (30000 40%) 1.0000
25m 29s (- 29m 8s) (35000 46%) 1.0000
28m 30s (- 24m 56s) (40000 53%) 1.0000
50m 18s (- 33m 32s) (45000 60%) 1.0000
53m 18s (- 26m 39s) (50000 66%) 1.0000
56m 20s (- 20m 29s) (55000 73%) 1.0000
59m 20s (- 14m 50s) (60000 80%) 1.0000
71m 27s (- 10m 59s) (65000 86%) 1.0000
74m 28s (- 5m 19s) (70000 93%) 1.0000
77m 30s (- 0m 0s) (75000 100%) 1.0000


In [99]:
evaluateRandomLines(encoder1, decoder1, 10)

> nous sommes pas en train de regarder .
= we re not looking .
< we re not looking . <EOS>

> ils se trouvent juste derriere moi .
= they re right behind me .
< they re right behind me . <EOS>

> il a la cinquantaine bien tassee .
= he s in his late fifties .
< he s in at his fifties . <EOS>

> je prends soin de moi .
= i m looking after myself .
< i m looking a myself of . <EOS>

> tu as fini de travailler non ?
= you re done working aren t you ?
< you re done working aren t you ? <EOS>

> il est en pyjama .
= he is in pajamas .
< he s in . . <EOS>

> tu es tres occupee .
= you re very busy .
< you re very busy . <EOS>

> elle est hors de danger .
= she is out of danger .
< she is out danger . <EOS>

> je me fais des soucis pour sa securite .
= i am concerned for her safety .
< i am concerned for her safety . <EOS>

> je ne souffre pas de lubies .
= i m not delusional .
< i m not sure . <EOS>



In [106]:
showPlot(trainIters.plot_losses)

AttributeError: 'function' object has no attribute 'plot_losses'

In [24]:
torch.zeros(1, 1, 4)

tensor([[[0., 0., 0., 0.]]])

In [40]:
torch.tensor(a).view(-1 , 1).topk(1)

tensor([[1],
        [2],
        [3],
        [4]])

In [44]:
torch.tensor(a).topk(1)

(tensor([4]), tensor([3]))

In [72]:
torch.tensor(a).size()[0]


4

In [78]:
torch.tensor([25])

tensor([25])

In [87]:
t = torch.tensor([[-0.8183,  0.6509, -0.7775, -1.4160,  0.7811,  0.8288, -0.1686,  0.5646,
          1.4202, -0.1010, -1.8720, -1.4071,  0.3098, -0.8393, -0.2920,  0.2280,
          0.2360, -0.8734,  0.8589,  0.0237, -0.0105,  0.1652, -1.1221,  0.9603,
          0.1970, -0.2509, -0.5855,  0.5183,  0.6946,  0.0736,  0.9767,  0.5792,
         -0.0312, -1.4024,  0.3942,  0.5707,  0.4053,  0.5660, -1.8723,  1.6933,
          1.0951,  1.4414,  0.3009,  0.1762, -0.7756, -0.5205, -0.8247, -0.4371,
          0.9644,  0.5760,  1.5931, -0.9677,  0.6735,  0.5148, -0.5505, -0.6410,
          0.0239, -0.0303, -0.6336, -0.2867, -0.8388,  0.9322, -0.5291,  0.1694,
          0.0947,  0.9172, -0.1857, -0.1289,  0.2437,  1.3823, -0.0694,  0.4305,
          0.5292,  0.8521, -0.9946,  0.6015,  0.3973, -0.3843, -0.9620, -0.3600,
          1.4966,  0.7945, -0.9318,  1.2036, -1.3363,  0.9426, -0.4320, -0.1362,
          0.3154, -1.9034,  1.4595, -0.7882, -1.8531, -1.0643,  2.1001, -1.8699,
          0.2824,  1.7580,  0.9011,  0.8399,  1.1082,  0.1434,  0.2387,  0.4536,
         -0.7322, -1.4969,  0.2760, -0.4184,  0.3214,  0.3108,  0.6868,  0.2431,
          0.7639,  1.5494,  0.5707,  0.2255, -0.5867, -1.0935,  0.8158,  0.2679,
          1.4457,  0.5782,  0.3450, -0.6154, -0.5931, -1.8968, -0.0847, -0.6118,
          1.5199,  0.7472,  0.0590, -0.5100, -0.1591,  0.6751, -0.7925, -1.2338,
         -0.6377, -1.0088, -1.3172,  1.0568, -1.0751,  0.4422, -1.3927,  0.9583,
          0.6041,  0.3221, -0.9782,  0.6495,  0.9313,  0.8721,  0.3172,  0.5276,
          0.2107,  0.3229,  0.0928,  0.3852, -0.2961,  0.8587, -0.4703,  1.1611,
          1.3347,  0.7322,  0.0376,  0.3937,  0.1368,  1.5847,  0.2463,  0.4411,
         -0.2400,  0.7326, -0.5570, -0.6583, -0.4954, -0.1063,  0.5333,  1.4538,
         -0.3872, -0.2830, -0.1765,  1.3330,  1.1949,  1.1706,  0.1179, -0.3406,
         -0.2441,  0.6684,  1.5012,  0.7600, -0.5098, -0.1431, -0.5578,  0.0684,
         -0.4613, -0.9837, -0.9606,  1.5722, -1.7310,  0.5229, -0.2257, -0.4074,
          0.0748,  2.1557,  1.9826,  0.0707,  0.0983,  0.3841, -0.0819,  0.5578,
         -2.1486,  0.5874, -0.0385,  0.3261, -1.2199, -0.7162,  1.0639, -0.9613,
         -1.0904, -0.3295, -1.4279, -0.7445,  0.6628,  1.3855,  1.5763,  0.9051,
          0.5788, -0.7529, -1.4163,  0.7441,  1.1355,  0.2101,  0.4652, -0.5745,
         -0.0343,  2.5943, -0.0520, -0.6894,  0.3463, -0.3206, -0.6341,  2.1523,
         -1.8168,  0.6194,  0.6981, -0.5470, -1.4172,  1.5467,  1.3849,  0.8182,
         -0.5301,  0.5223,  1.2540, -0.4110, -1.1922, -0.7801,  0.9858, -0.4021]])

In [88]:
t.view(1, 1, -1)

tensor([[[-0.8183,  0.6509, -0.7775, -1.4160,  0.7811,  0.8288, -0.1686,
           0.5646,  1.4202, -0.1010, -1.8720, -1.4071,  0.3098, -0.8393,
          -0.2920,  0.2280,  0.2360, -0.8734,  0.8589,  0.0237, -0.0105,
           0.1652, -1.1221,  0.9603,  0.1970, -0.2509, -0.5855,  0.5183,
           0.6946,  0.0736,  0.9767,  0.5792, -0.0312, -1.4024,  0.3942,
           0.5707,  0.4053,  0.5660, -1.8723,  1.6933,  1.0951,  1.4414,
           0.3009,  0.1762, -0.7756, -0.5205, -0.8247, -0.4371,  0.9644,
           0.5760,  1.5931, -0.9677,  0.6735,  0.5148, -0.5505, -0.6410,
           0.0239, -0.0303, -0.6336, -0.2867, -0.8388,  0.9322, -0.5291,
           0.1694,  0.0947,  0.9172, -0.1857, -0.1289,  0.2437,  1.3823,
          -0.0694,  0.4305,  0.5292,  0.8521, -0.9946,  0.6015,  0.3973,
          -0.3843, -0.9620, -0.3600,  1.4966,  0.7945, -0.9318,  1.2036,
          -1.3363,  0.9426, -0.4320, -0.1362,  0.3154, -1.9034,  1.4595,
          -0.7882, -1.8531, -1.0643,  2.1001, -1.86