In [14]:
from io import open
import unicodedata
import re
import random

import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
from IPython.display import clear_output
from torch.utils.data import TensorDataset, DataLoader, Dataset
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import time
import math
import tqdm

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [2]:
%matplotlib inline

In [3]:
!unzip rus-eng.zip

Archive:  rus-eng.zip
  inflating: rus.txt                 
replace _about.txt? [y]es, [n]o, [A]ll, [N]one, [r]ename: ^C


In [None]:
!mv rus.txt eng-rus.txt

In [None]:
!tail eng-rus.txt

I've heard that you should never date anyone who is less than half your age plus seven. Tom is now 30 years old and Mary is 17. How many years will Tom need to wait until he can start dating Mary?	Я слышал, что никогда не следует встречаться с кем-то вдвое младше вас плюс семь лет. Тому 30 лет, a Мэри 17. Сколько лет Тому нужно ждать до тех пор, пока он сможет начать встречаться с Мэри?	CC-BY 2.0 (France) Attribution: tatoeba.org #10068197 (CK) & #10644473 (notenoughsun)
I do have one final ask of you as your president, the same thing I asked when you took a chance on me eight years ago. I'm asking you to believe, not in my ability to bring about change but in yours.	У меня же, как у вашего президента, есть к вам последняя просьба. Та же самая, что и восемь лет назад, когда вы оказали мне своё доверие. Я прошу вас верить, но не в мои способности добиться перемен, а в ваши.	CC-BY 2.0 (France) Attribution: tatoeba.org #5762723 (BHO) & #6390123 (odexed)
In today's world, we have to equip 

In [2]:
SOS_token = 0
EOS_token = 1


class Lang:
    def __init__(self, name):
        self.name = name
        self.word2index = {}
        self.word2count = {}
        self.index2word = {0: "SOS", 1: "EOS"}
        self.n_words = 2  # Count SOS and EOS

    def addSentence(self, sentence):
        for word in sentence.split(' '):
            self.addWord(word)

    def addWord(self, word):
        if word not in self.word2index:
            self.word2index[word] = self.n_words
            self.word2count[word] = 1
            self.index2word[self.n_words] = word
            self.n_words += 1
        else:
            self.word2count[word] += 1

In [3]:
# Turn a Unicode string to plain ASCII, thanks to
# http://stackoverflow.com/a/518232/2809427
def unicodeToAscii(s):
    parts = s.split('\t')[:2]

    result = []
    for part in parts:
        normalized = unicodedata.normalize('NFC', part)
        result.append(normalized)
        
    return '\t'.join(result)

# Lowercase, trim, and remove non-letter characters


def normalizeString(s):
    s = unicodeToAscii(s.lower().strip())
    s = re.sub(r"([.!?])", r" \1", s)
    s = re.sub(r"[^a-zA-Zа-яА-Яйё.!?]+", r" ", s)
    return s

In [4]:
with open('eng-rus.txt', encoding='utf-8') as f:
    text = f.read().strip().split('\n')

text[:2]

['Go.\tМарш!\tCC-BY 2.0 (France) Attribution: tatoeba.org #2877272 (CM) & #1159202 (shanghainese)',
 'Go.\tИди.\tCC-BY 2.0 (France) Attribution: tatoeba.org #2877272 (CM) & #5898247 (marafon)']

In [4]:
def readLangs(lang1, lang2, reverse=False):
    print("Reading lines...")

    # Read the file and split into lines
    lines = open('%s-%s.txt' % (lang1, lang2), encoding='utf-8').\
        read().strip().split('\n')

    # Split every line into pairs and normalize
    pairs = [[normalizeString(s) for s in l.split('\t')][:2] for l in lines]

    # Reverse pairs, make Lang instances
    if reverse:
        pairs = [list(reversed(p)) for p in pairs]
        input_lang = Lang(lang2)
        output_lang = Lang(lang1)
    else:
        input_lang = Lang(lang1)
        output_lang = Lang(lang2)

    return input_lang, output_lang, pairs

In [5]:
MAX_LENGTH = 10

eng_prefixes = (
    "i am ", "i m ",
    "he is", "he s ",
    "she is", "she s",
    "you are", "you re ",
    "we are", "we re ",
    "they are", "they re "
)


def filterPair(p):
    return len(p[0].split(' ')) < MAX_LENGTH and \
        len(p[1].split(' ')) < MAX_LENGTH and \
        p[1].startswith(eng_prefixes)


def filterPairs(pairs):
    return [pair for pair in pairs if filterPair(pair)]

In [6]:
def prepareData(lang1, lang2, reverse=False):
    input_lang, output_lang, pairs = readLangs(lang1, lang2, reverse)
    print("Read %s sentence pairs" % len(pairs))
    pairs = filterPairs(pairs)
    print("Trimmed to %s sentence pairs" % len(pairs))
    print("Counting words...")
    for pair in pairs:
        input_lang.addSentence(pair[0])
        output_lang.addSentence(pair[1])
    print("Counted words:")
    print(input_lang.name, input_lang.n_words)
    print(output_lang.name, output_lang.n_words)
    return input_lang, output_lang, pairs


input_lang, output_lang, pairs = prepareData('eng', 'rus', True)
print(random.choice(pairs))

Reading lines...
Read 519900 sentence pairs
Trimmed to 30087 sentence pairs
Counting words...
Counted words:
rus 10529
eng 4377
['я не занимаюсь .', 'i am not studying .']


The Encoder
-----------





In [35]:
class EncoderRNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers=1, model_type="GRU"):
        super(EncoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.model_type = model_type

        self.embedding = nn.Embedding(input_size, hidden_size)
        if model_type == 'LSTM':
            self.rnn = nn.LSTM(hidden_size, hidden_size, num_layers=num_layers, batch_first=True)
        else:
            self.rnn = nn.GRU(hidden_size, hidden_size, num_layers=num_layers, batch_first=True)

    def forward(self, input, hidden):
        embedded = self.embedding(input)
        output = embedded
        output, hidden = self.rnn(output, hidden)
        return output, hidden

    def initHidden(self, batch_size=1):
        if self.model_type == "LSTM":
            return (torch.zeros(self.num_layers, batch_size, self.hidden_size, device=device),
                    torch.zeros(self.num_layers, batch_size, self.hidden_size, device=device))
        return torch.zeros(self.num_layers, batch_size, self.hidden_size, device=device)

The Decoder
-----------




In [36]:
class DecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size, num_layers=1, model_type="GRU"):
        super(DecoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.model_type = model_type

        self.embedding = nn.Embedding(output_size, hidden_size)
        if model_type == 'LSTM':
            self.rnn = nn.LSTM(hidden_size, hidden_size, num_layers=num_layers, batch_first=True)
        else:
            self.rnn = nn.GRU(hidden_size, hidden_size, num_layers=num_layers, batch_first=True)
        self.out = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=-1)

    def forward(self, input, hidden):
        output = self.embedding(input)
        output = F.relu(output)
        output, hidden = self.rnn(output, hidden)
        output = self.softmax(self.out(output.squeeze(1)))
        return output, hidden

    def initHidden(self, batch_size=1):
        if self.model_type == "LSTM":
            return (torch.zeros(self.num_layers, batch_size, self.hidden_size, device=device),
                    torch.zeros(self.num_layers, batch_size, self.hidden_size, device=device))
        return torch.zeros(self.num_layers, batch_size, self.hidden_size, device=device)

In [9]:
def indexesFromSentence(lang, sentence):
    return [lang.word2index[word] for word in sentence.split(' ')]


def tensorFromSentence(lang, sentence):
    indexes = indexesFromSentence(lang, sentence)
    indexes.append(EOS_token)
    return torch.tensor(indexes, dtype=torch.long, device=device).view(-1, 1)


def tensorsFromPair(pair):
    input_tensor = tensorFromSentence(input_lang, pair[0])
    target_tensor = tensorFromSentence(output_lang, pair[1])
    return (input_tensor, target_tensor)

In [10]:
teacher_forcing_ratio = 0.5


def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=MAX_LENGTH):
    batch_size = input_tensor.size(0)
    encoder_hidden = encoder.initHidden(batch_size)

    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    input_length = input_tensor.size(1)
    target_length = target_tensor.size(1)

    encoder_outputs = torch.zeros(batch_size, max_length, encoder.hidden_size, device=device)

    loss = 0

    for ei in range(input_length):
        encoder_output, encoder_hidden = encoder(input_tensor[:, ei, :], encoder_hidden)
        encoder_outputs[:, ei, :] = encoder_output.squeeze(1)

    decoder_input = torch.tensor([[SOS_token]] * batch_size, device=device).view(batch_size, 1)

    decoder_hidden = encoder_hidden

    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False

    if use_teacher_forcing:
        # Teacher forcing: Feed the target as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
            loss += criterion(decoder_output, target_tensor[:, di, :].squeeze(1))
            decoder_input = target_tensor[:, di, :] # Teacher forcing

    else:
        # Without teacher forcing: use its own predictions as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
            _, topi = decoder_output.topk(1)
            decoder_input = topi.detach() # detach from history as input

            loss += criterion(decoder_output, target_tensor[:, di, :].squeeze(1))
            if (decoder_input == EOS_token).any():
                break

    loss.backward()

    encoder_optimizer.step()
    decoder_optimizer.step()

    return loss.item() / target_length

In [12]:
def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (- %s)' % (asMinutes(s), asMinutes(rs))

In [15]:
plt.switch_backend('agg')
import matplotlib.ticker as ticker


def showPlot(points, name):
    plt.figure()
    fig, ax = plt.subplots()
    # this locator puts ticks at regular intervals
    loc = ticker.MultipleLocator(base=0.2)
    ax.yaxis.set_major_locator(loc)
    plt.plot(points)
    plt.savefig(name)

In [16]:
def trainIters(encoder, decoder, n_iters, name, batch_size=8, print_every=1000, plot_every=100, learning_rate=0.01):
    start = time.time()
    plot_losses = []
    print_loss_total = 0  # Reset every print_every
    plot_loss_total = 0  # Reset every plot_every

    encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)
    
    training_pairs = [tensorsFromPair(random.choice(pairs)) for _ in range(n_iters)]
    input_tensors = nn.utils.rnn.pad_sequence([pair[0] for pair in training_pairs], batch_first=True, padding_value=0)
    target_tensors = nn.utils.rnn.pad_sequence([pair[1] for pair in training_pairs], batch_first=True, padding_value=0)
    dataset = TensorDataset(input_tensors, target_tensors)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
    
    criterion = nn.NLLLoss()
    iter_count = 0

    for _ in range(1, (n_iters // batch_size) + 1):
        for batch_input, batch_target in dataloader:
            if iter_count >= n_iters:
                break

            loss = train(batch_input, batch_target, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion)
            print_loss_total += loss * batch_input.size(0)
            plot_loss_total += loss * batch_input.size(0)
            iter_count += batch_input.size(0)

            if iter_count % print_every == 0:
                print_loss_avg = print_loss_total / print_every
                print_loss_total = 0
                print('%s (%d %d%%) %.4f' % (timeSince(start, iter_count / n_iters), iter_count, iter_count / n_iters * 100, print_loss_avg))

            if iter_count % plot_every == 0:
                plot_loss_avg = plot_loss_total / plot_every
                plot_losses.append(plot_loss_avg)
                plot_loss_total = 0

    showPlot(plot_losses, name=name)

In [75]:
def evaluate(encoder, decoder, sentence, max_length=MAX_LENGTH):
    with torch.no_grad():
        input_tensor = tensorFromSentence(input_lang, sentence)
        input_tensor = input_tensor.view(1, -1, 1)
        input_length = input_tensor.size(1)

        encoder_hidden = encoder.initHidden(batch_size=1)

        encoder_outputs = torch.zeros(1, max_length, encoder.hidden_size, device=device)

        for ei in range(input_length):
            encoder_output, encoder_hidden = encoder(input_tensor[:, ei, :], encoder_hidden)
            encoder_outputs[:, ei, :] = encoder_output.squeeze(1)

        decoder_input = torch.tensor([[SOS_token]], device=device).view(1, 1)  # SOS

        decoder_hidden = encoder_hidden

        decoded_words = []

        for di in range(max_length):
            decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
            _, topi = decoder_output.data.topk(1)
            if topi.item() == EOS_token:
                decoded_words.append('<EOS>')
                break
            else:
                decoded_words.append(output_lang.index2word[topi.item()])

            decoder_input = topi.view(1, 1).detach()

        return decoded_words

In [76]:
def evaluateRandomly(encoder, decoder, n=10):
    for i in range(n):
        pair = random.choice(pairs)
        print('>', pair[0])
        print('=', pair[1])
        output_words = evaluate(encoder, decoder, pair[0])
        output_sentence = ' '.join(output_words)
        print('<', output_sentence)
        print('')

In [73]:
hidden_size = 256
batch_size = 1
encoder1 = EncoderRNN(input_lang.n_words, hidden_size).to(device)
decoder1 = DecoderRNN(hidden_size, output_lang.n_words).to(device)

trainIters(encoder1, decoder1, 75000, batch_size=batch_size, print_every=5000, name='GRU_1_layer.png')

1m 3s (- 14m 48s) (5000 6%) 2.5891
2m 3s (- 13m 21s) (10000 13%) 2.1628
3m 3s (- 12m 13s) (15000 20%) 1.9841
4m 3s (- 11m 8s) (20000 26%) 1.8442
5m 2s (- 10m 4s) (25000 33%) 1.7299
5m 59s (- 8m 58s) (30000 40%) 1.6447
6m 55s (- 7m 55s) (35000 46%) 1.5458
7m 53s (- 6m 54s) (40000 53%) 1.4774
8m 51s (- 5m 54s) (45000 60%) 1.4263
9m 51s (- 4m 55s) (50000 66%) 1.3675
10m 51s (- 3m 56s) (55000 73%) 1.3051
11m 50s (- 2m 57s) (60000 80%) 1.2745
12m 50s (- 1m 58s) (65000 86%) 1.2015
13m 50s (- 0m 59s) (70000 93%) 1.1740
14m 50s (- 0m 0s) (75000 100%) 1.1310


In [77]:
evaluateRandomly(encoder1, decoder1, 20)

> я очень благодарен .
= i m very grateful .
< i m very grateful to be about . <EOS>

> ты красивая .
= you re beautiful .
< you are beautiful beautiful beautiful beautiful . <EOS>

> он пользуется любовью его людей .
= he is popular with his men .
< he s really his his his . <EOS>

> я сегодня возвращаюсь в бостон .
= i m going back to boston today .
< i m going to boston to boston . SOS SOS

> он умён и привлекателен .
= he s intelligent and good looking .
< he s really and and and and . <EOS>

> я готовлюсь .
= i m getting ready .
< i m getting ready to . <EOS>

> вы очень прямолинейны .
= you re very direct .
< you re very very very very you . <EOS>

> прости что наорала на тебя .
= i m sorry i yelled at you .
< i m sorry i yelled at you . <EOS>

> я не очень уверен насчёт тома .
= i m not so sure about tom .
< i m not sure about tom tom . <EOS>

> я повариха .
= i m a cook .
< i m on a my book . . <EOS>

> вы достаточно взрослая чтобы это понимать .
= you are old enough to underst

# **The RNN with 2 GRU layers**

In [39]:
hidden_size = 256
batch_size = 1
encoder2 = EncoderRNN(input_lang.n_words, hidden_size, num_layers=2).to(device)
decoder2 = DecoderRNN(hidden_size, output_lang.n_words, num_layers=2).to(device)

trainIters(encoder2, decoder2, 75000, batch_size=batch_size, print_every=5000, name='GRU_2_layer.png')

0m 55s (- 12m 57s) (5000 6%) 3.1578
1m 49s (- 11m 52s) (10000 13%) 2.6981
2m 41s (- 10m 45s) (15000 20%) 2.4538
3m 34s (- 9m 48s) (20000 26%) 2.2457
4m 26s (- 8m 53s) (25000 33%) 2.0903
5m 19s (- 7m 59s) (30000 40%) 1.9685
6m 13s (- 7m 7s) (35000 46%) 1.8434
7m 10s (- 6m 16s) (40000 53%) 1.7107
8m 6s (- 5m 24s) (45000 60%) 1.6267
9m 2s (- 4m 31s) (50000 66%) 1.5722
9m 58s (- 3m 37s) (55000 73%) 1.4471
10m 55s (- 2m 43s) (60000 80%) 1.4022
11m 51s (- 1m 49s) (65000 86%) 1.3500
12m 47s (- 0m 54s) (70000 93%) 1.2782
13m 45s (- 0m 0s) (75000 100%) 1.2218


In [40]:
evaluateRandomly(encoder2, decoder2, 20)

> мы отмечаем юбилей .
= we re celebrating our anniversary .
< we re making . <EOS>

> прошу прощения что так опоздал .
= i m sorry that i m so late .
< i m sorry that i m so late . <EOS>

> мне восемнадцать лет .
= i m eighteen .
< i m old . <EOS>

> она в отчаянии .
= she s desperate .
< she s wearing . <EOS>

> я уверен что у вас были добрые намерения .
= i m sure you meant well .
< i m sure your intentions you re good . <EOS>

> я не такой ленивый как том .
= i m not as lazy as tom .
< i m not as lucky as tom is . <EOS>

> боюсь ты опоздал .
= i m afraid you re too late .
< i m afraid you re so naive . <EOS>

> ты ревнуешь .
= you re jealous .
< you re the . <EOS>

> вы идёте на вечеринку .
= you re going to a party .
< you re our our . . <EOS>

> я не пытаюсь произвести на тебя впечатление .
= i m not trying to impress you .
< i m not trying to impress you . <EOS>

> у меня аллергия на кошек .
= i m allergic to cats .
< i m allergic to . <EOS>

> я придерживаюсь своего плана .
= i

# **The RNN with 2 GRU layers with batches**

In [78]:
hidden_size = 256
batch_size = 8
encoder3 = EncoderRNN(input_lang.n_words, hidden_size, num_layers=2).to(device)
decoder3 = DecoderRNN(hidden_size, output_lang.n_words, num_layers=2).to(device)

trainIters(encoder3, decoder3, 300000, batch_size=batch_size, print_every=30000, name='GRU_2_layer_with_batches.png')

1m 12s (- 10m 48s) (30000 10%) 2.3166
2m 4s (- 8m 18s) (60000 20%) 1.8679
2m 57s (- 6m 53s) (90000 30%) 1.6661
3m 50s (- 5m 45s) (120000 40%) 1.5450
4m 43s (- 4m 43s) (150000 50%) 1.4353
5m 35s (- 3m 43s) (180000 60%) 1.3369
6m 27s (- 2m 46s) (210000 70%) 1.2496
7m 20s (- 1m 50s) (240000 80%) 1.1795
8m 13s (- 0m 54s) (270000 90%) 1.1099
9m 5s (- 0m 0s) (300000 100%) 1.0406


In [79]:
evaluateRandomly(encoder3, decoder3, 20)

> я рада что с томом всё хорошо .
= i m glad tom is ok .
< i m glad tom is still here . <EOS>

> я ревнивый .
= i m jealous .
< i m the same i in my class . <EOS>

> я в поезде .
= i m on the train .
< i m in boston in this room . <EOS>

> я просто за тебя беспокоюсь .
= i m just worried about you .
< i m just worried about what happened . <EOS>

> мы все счастливы помочь .
= we are all happy to help .
< we re all trying to help tom . <EOS>

> я найду тебя .
= i m going to find you .
< i m going to miss you a lot . <EOS>

> не уверен что вам это понравится .
= i m not sure you ll like it .
< i m not sure you can do that . <EOS>

> я не планирую никуда сегодня идти .
= i m not planning to go anywhere today .
< i m not going to boston this week . <EOS>

> вы очень грязные .
= you re really dirty .
< you re very very to be married . <EOS>

> ты не дурак .
= you re not stupid .
< you re not a not but anymore . <EOS>

> я просто не уверен .
= i m just not sure .
< i m just not sure of anyth

# **The RNN with 2 LSTM layers with batches**

In [80]:
hidden_size = 256
batch_size = 8
encoder4 = EncoderRNN(input_lang.n_words, hidden_size, num_layers=2, model_type="LSTM").to(device)
decoder4 = DecoderRNN(hidden_size, output_lang.n_words, num_layers=2, model_type="LSTM").to(device)

trainIters(encoder4, decoder4, 300000, batch_size=batch_size, print_every=30000, name='LSTM_2_layer_with_batches.png')

1m 17s (- 11m 37s) (30000 10%) 2.4427
2m 15s (- 9m 3s) (60000 20%) 1.9160
3m 12s (- 7m 30s) (90000 30%) 1.7427
4m 11s (- 6m 16s) (120000 40%) 1.6024
5m 8s (- 5m 8s) (150000 50%) 1.5116
6m 7s (- 4m 4s) (180000 60%) 1.3970
7m 6s (- 3m 2s) (210000 70%) 1.3091
8m 5s (- 2m 1s) (240000 80%) 1.2196
9m 4s (- 1m 0s) (270000 90%) 1.1468
10m 2s (- 0m 0s) (300000 100%) 1.0785


In [81]:
evaluateRandomly(encoder4, decoder4, n=20)

> она вдруг потеряла сознание .
= she suddenly lost consciousness .
< she is at her her . <EOS>

> счастлив за вас обоих .
= i m happy for you both .
< i m happy about you with tom . <EOS>

> прости что заставил тебя ждать .
= i m sorry that i made you wait .
< i m sorry that you have tom . <EOS>

> я пошла обратно работать .
= i m going back to work .
< i m going to boston for this . <EOS>

> мы лучше всех .
= we re the best .
< we re thinking and what i can . <EOS>

> я не специалист .
= i m not an expert .
< i m not at this with tom . <EOS>

> ты странная .
= you re weird .
< you re free with i can you . <EOS>

> я ем огурец .
= i m eating a cucumber .
< i m good at french . <EOS>

> он боится своего отца .
= he is afraid of his father .
< he is sure of his his mother . <EOS>

> мы сыты по горло коррупцией в государстве .
= we are sick and tired of political corruption .
< we re tired of of your room . <EOS>

> рада за тебя .
= i m happy for you .
< i m thinking to do that you . <EO