In [2]:
import os
import numpy as np
import random
from math import log, ceil

import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [5]:
pad_chr = chr(0)
start_chr = chr(1)
end_chr = chr(2)

In [6]:
f = open("fra-eng/fra.txt", "r")

In [7]:
lines = [line.strip("\n").split("\t") for line in f]

In [8]:
lang1 = [x[0] for x in lines]
lang2 = [x[1] for x in lines]

In [22]:
def get_batch(lang1, lang2, alphabet1, alphabet2, num_samples):
    sample = random.sample(range(len(lang1)), num_samples)
    
    lang1_sample = [lang1[i] + end_chr for i in sample]
    lang2_in_sample = [start_chr + lang2[i] for i in sample]
    lang2_out_sample = [lang2[i] + end_chr for i in sample]
    
    maxlen1, maxlen2 = max(map(len, lang1_sample)), max(map(len, lang2_in_sample))
    
    lang1_sample = [[alphabet1[i] for i in x.ljust(maxlen1, pad_chr)] for x in lang1_sample]
    lang2_in_sample = [[alphabet2[i] for i in x.ljust(maxlen2, pad_chr)] for x in lang2_in_sample]
    lang2_out_sample = [[alphabet2[i] for i in x.ljust(maxlen2, pad_chr)] for x in lang2_out_sample]
    
    #dimensions order: (text length, batch size, channels)
    lang1_sample = np.transpose(np.asarray(lang1_sample), (1, 0, 2))
    lang2_in_sample = np.transpose(np.asarray(lang2_in_sample), (1, 0, 2))
    lang2_out_sample = np.transpose(np.asarray(lang2_out_sample), (1, 0, 2))
    
    return lang1_sample, lang2_in_sample, lang2_out_sample

In [23]:
def get_max_len(instances):
    return max(map(lambda x: len(x) + 1, instances))

In [10]:
def build_alphabet_converter(instances):
    chars = set.union(*map(set, instances))
    converter = dict(zip(chars, range(3, len(chars) + 3)))
    converter[pad_chr] = 0
    converter[start_chr] = 1
    converter[end_chr] = 2
    return converter

In [11]:
def build_alphabet_converter_2d(instances):
    chars = set.union(*map(set, instances))
    max_len = ceil(log(len(chars) + 3, 2))
    binary_strings = [list(map(int, bin(i)[2:].rjust(max_len, '0'))) for i in range(3, len(chars) + 3)]
    converter = dict(zip(chars, binary_strings))
    converter[pad_chr] = [0] * max_len
    
    start_val = [0] * (max_len - 1)
    start_val.append(1)
    converter[start_chr] = start_val
    
    end_val = [0] * (max_len - 2)
    end_val.extend([1, 0])
    converter[end_chr] = end_val
    
    return converter

In [28]:
alpha1 = build_alphabet_converter_2d(lang1)
alpha2 = build_alphabet_converter_2d(lang2)

max_lang1 = get_max_len(lang1)
max_lang2 = get_max_len(lang2)

In [21]:
x, y_in, y_out = get_batch(lang1, lang2, alpha1, alpha2, 100)
print(x.shape, y_in.shape, y_out.shape)

(71, 100, 7) (76, 100, 7) (76, 100, 7)


In [14]:
class EncoderRNN(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(EncoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.gru = nn.GRU(input_size, hidden_size)

    def forward(self, input, hidden):
        return self.gru(input, hidden)

    def initHidden(self, batch_size):
        return torch.zeros(1, batch_size, self.hidden_size, device=device)


In [17]:
class DecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size):
        super(DecoderRNN, self).__init__()
        self.hidden_size = hidden_size
        
        self.gru = nn.GRU(output_size, hidden_size)
        self.out = nn.Linear(hidden_size, output_size)
        #self.softmax = nn.LogSoftmax(dim=1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, input, hidden):
        output = F.relu(input)
        output, hidden = self.gru(output, hidden)
        output = self.sigmoid(self.out(output[0]))
        return output, hidden

    def initHidden(self, batch_size):
        return torch.zeros(1, batch_size, self.hidden_size, device=device)

In [19]:
teacher_forcing_ratio = 0.5

In [None]:
def train(x, y_in, y_out, encoder, decoder, encoder_optim, decoder_optim, criterion):
    encoder_hidden = encoder.initHidden()
    
    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()
    
    input_length = x.size(0)
    target_length = y_in.size(0) #y_in and y_out are the same length
    
    #encoder_outputs = torch.zeros(input_length, encoder.hidden_size, device=device)
    