In [15]:
import os
import numpy as np
import random
import math
import time
from math import log, ceil

import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [3]:
pad_chr = chr(0)
start_chr = chr(1)
end_chr = chr(2)

In [7]:
def get_batch(lang1, lang2, alphabet1, alphabet2, num_samples):
    sample = random.sample(range(len(lang1)), num_samples)
    
    lang1_sample = [lang1[i] + end_chr for i in sample]
    lang2_in_sample = [start_chr + lang2[i] for i in sample]
    lang2_out_sample = [lang2[i] + end_chr for i in sample]
    
    #store the lengths of each sentence
    len1, len2 = list(map(len, lang1_sample)), list(map(len, lang2_out_sample))
    maxlen1, maxlen2 = max(len1), max(len2)
    
    lang1_sample = [[alphabet1[i] for i in x.ljust(maxlen1, pad_chr)] for x in lang1_sample]
    lang2_in_sample = [[alphabet2[i] for i in x.ljust(maxlen2, pad_chr)] for x in lang2_in_sample]
    lang2_out_sample = [[alphabet2[i] for i in x.ljust(maxlen2, pad_chr)] for x in lang2_out_sample]
    
    #dimensions order: (text length, batch size, channels)
    lang1_sample = np.transpose(np.asarray(lang1_sample), (1, 0, 2))
    lang2_in_sample = np.transpose(np.asarray(lang2_in_sample), (1, 0, 2))
    lang2_out_sample = np.transpose(np.asarray(lang2_out_sample), (1, 0, 2))
    
    return lang1_sample, lang2_in_sample, lang2_out_sample, len1, len2

In [8]:
def get_max_len(instances):
    return max(map(lambda x: len(x) + 1, instances))

In [9]:
def build_alphabet_converter(instances):
    chars = set.union(*map(set, instances))
    converter = dict(zip(chars, range(3, len(chars) + 3)))
    converter[pad_chr] = 0
    converter[start_chr] = 1
    converter[end_chr] = 2
    return converter

In [10]:
def build_alphabet_converter_2d(instances):
    chars = set.union(*map(set, instances))
    max_len = ceil(log(len(chars) + 3, 2))
    binary_strings = [list(map(int, bin(i)[2:].rjust(max_len, '0'))) for i in range(3, len(chars) + 3)]
    converter = dict(zip(chars, binary_strings))
    converter[pad_chr] = [0] * max_len
    
    start_val = [0] * (max_len - 1)
    start_val.append(1)
    converter[start_chr] = start_val
    
    end_val = [0] * (max_len - 2)
    end_val.extend([1, 0])
    converter[end_chr] = end_val
    
    return converter

In [14]:
class EncoderRNN(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(EncoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.gru = nn.GRU(input_size, hidden_size)

    def forward(self, input, hidden):
        return self.gru(input, hidden)

    def initHidden(self, batch_size):
        return torch.zeros(1, batch_size, self.hidden_size, device=device)


In [None]:
class DecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size):
        super(DecoderRNN, self).__init__()
        self.hidden_size = hidden_size
        
        self.gru = nn.GRU(output_size, hidden_size)
        self.out = nn.Linear(hidden_size, output_size)
        #self.softmax = nn.LogSoftmax(dim=1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, input, hidden):
        output = F.relu(input)
        output, hidden = self.gru(output, hidden)
        output = self.sigmoid(self.out(output[0]))
        return output, hidden

    def initHidden(self, batch_size):
        return torch.zeros(1, batch_size, self.hidden_size, device=device)

In [14]:
teacher_forcing_ratio = 0.5

In [15]:
def train(x, y_in, y_out, x_len, y_len, encoder, decoder, encoder_optim, decoder_optim, criterion):
    encoder_hidden = encoder.initHidden()
    
    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()
    
    input_length = x.size(0)
    target_length = y_in.size(0) #y_in and y_out are the same length
    
    #encoder_outputs = torch.zeros(input_length, encoder.hidden_size, device=device)
    
    loss = 0
    
    for i in range(x_len):
        encoder_output, encoder_hidden = encoder(x[i], encoder_hidden)
    
    decoder_hidden = encoder_hidden
    
    if random.random() < teacher_forcing_ratio:
        for i in range(y_len):
            decoder_output, decoder_hidden = decoder(y_in[i], decoder_hidden)
            loss += criterion(decoder_output, y_out[i])
    else:
        decoder_input = y_in[0]
        for i in range(y_len):
            decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
            decoder_input = decoder_output.round().detach()
            loss += criterion(decoder_output, y_out[i])
      
    #TODO: Loss masking!!!
    loss.backward()
    
    encoder_optimizer.step()
    decoder_optimizer.step()
    
    return loss.item() / y_len

In [16]:
def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (- %s)' % (asMinutes(s), asMinutes(rs))

In [4]:
f = open("fra-eng/fra.txt", "r")

In [5]:
lines = [line.strip("\n").split("\t") for line in f]

In [11]:
alpha1 = build_alphabet_converter_2d(lang1)
alpha2 = build_alphabet_converter_2d(lang2)

max_lang1 = get_max_len(lang1)
max_lang2 = get_max_len(lang2)

In [6]:
lang1 = [x[0] for x in lines]
lang2 = [x[1] for x in lines]

In [13]:
x, y_in, y_out, x_len, y_len = get_batch(lang1, lang2, alpha1, alpha2, 100)
print(x.shape, y_in.shape, y_out.shape, len(x_len), len(y_len))

(60, 100, 7) (67, 100, 7) (67, 100, 7) 100 100


In [None]:
def trainIters(encoder, decoder, n_iters, print_every=1000, plot_every=100, learning_rate=0.01):
    start = time.time()
    plot_losses = []
    print_loss_total = 0
    plot_loss_total = 0
    
    encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)
    
    criterion = nn.NLLLoss()
    
    for i in range(1, n_iters + 1):
        x, y_in, y_out, x_len, y_len = get_batch(lang1, lang2, alpha1, alpha2, batch_size)
        loss = train(x, y_in, y_out, x_len, y_len, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion)
        
        print_loss_total += loss
        plot_loss_total += loss

In [16]:
max_lang1

287