In [None]:
from io import open
import unicodedata
import string
import re
import random

import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

**1) Возьмите англо-русскую пару фраз [manythings](https://www.manythings.org/anki/)**

In [None]:
!tail eng-rus.txt

We need to uphold laws against discrimination — in hiring, and in housing, and in education, and in the criminal justice system. That is what our Constitution and our highest ideals require.	Нам нужно отстаивать законы против дискриминации при найме на работу, в жилищной сфере, в сфере образования и правоохранительной системе. Этого требуют наша Конституция и высшие идеалы.	CC-BY 2.0 (France) Attribution: tatoeba.org #5762728 (BHO) & #6390439 (odexed)
I've heard that you should never date anyone who is less than half your age plus seven. Tom is now 30 years old and Mary is 17. How many years will Tom need to wait until he can start dating Mary?	Я слышал, что никогда не следует встречаться с кем-то вдвое младше вас плюс семь лет. Тому 30 лет, a Мэри 17. Сколько лет Тому нужно ждать до тех пор, пока он сможет начать встречаться с Мэри?	CC-BY 2.0 (France) Attribution: tatoeba.org #10068197 (CK) & #10644473 (notenoughsun)
I do have one final ask of you as your president, the same thing I a

In [None]:
SOS_token = 0
EOS_token = 1


class Lang:
    def __init__(self, name):
        self.name = name
        self.word2index = {}
        self.word2count = {}
        self.index2word = {0: "SOS", 1: "EOS"}
        self.n_words = 2  # Count SOS and EOS

    def addSentence(self, sentence):
        for word in sentence.split(' '):
            self.addWord(word)

    def addWord(self, word):
        if word not in self.word2index:
            self.word2index[word] = self.n_words
            self.word2count[word] = 1
            self.index2word[self.n_words] = word
            self.n_words += 1
        else:
            self.word2count[word] += 1

In [None]:
# Turn a Unicode string to plain ASCII, thanks to
# http://stackoverflow.com/a/518232/2809427
def unicodeToAscii(s):
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn'
    )

# Lowercase, trim, and remove non-letter characters


def normalizeString(s):
    s = unicodeToAscii(s.lower().strip())
    s = s.lower()
    s = re.sub('[.!?]','',s)
    return s

In [None]:
def readLangs(lang1, lang2, reverse=False):
    print("Reading lines...")

    # Read the file and split into lines
    lines = open('%s-%s.txt' % (lang1, lang2), encoding='utf-8').\
        read().strip().split('\n')
    lines = [i.split('\tCC-BY', 1)[0] for i in lines]

    # Split every line into pairs and normalize
    pairs = [[normalizeString(s) for s in l.split('\t')] for l in lines]

    # Reverse pairs, make Lang instances
    if reverse:
      pairs = [list(reversed(p)) for p in pairs]
      input_lang = Lang(lang2)
      output_lang = Lang(lang1)
    else:
        input_lang = Lang(lang1)
        output_lang = Lang(lang2)

    return input_lang, output_lang, pairs

In [None]:
MAX_LENGTH = 20

eng_prefixes = (
    "i am ", "i m ",
    "he is", "he s ",
    "she is", "she s",
    "you are", "you re ",
    "we are", "we re ",
    "they are", "they re "
)

def filterPair(p):
    return len(p[0].split(' ')) < MAX_LENGTH and \
        len(p[1].split(' ')) < MAX_LENGTH and \
        p[1].startswith(eng_prefixes)

def filterPairs(pairs):
    return [pair for pair in pairs if filterPair(pair)]

In [None]:
def prepareData(lang1, lang2, reverse=False):
    input_lang, output_lang, pairs = readLangs(lang1, lang2, reverse)
    print("Read %s sentence pairs" % len(pairs))
    pairs = filterPairs(pairs)
    print("Trimmed to %s sentence pairs" % len(pairs))
    print("Counting words...")
    for pair in pairs:
        input_lang.addSentence(pair[0])
        output_lang.addSentence(pair[1])
    print("Counted words:")
    print(input_lang.name, input_lang.n_words)
    print(output_lang.name, output_lang.n_words)
    return input_lang, output_lang, pairs


input_lang, output_lang, pairs = prepareData('eng', 'rus', True)
print(random.choice(pairs))

Reading lines...
Read 479223 sentence pairs
Trimmed to 4983 sentence pairs
Counting words...
Counted words:
rus 4737
eng 2434
['он почти все время дома', 'he is almost always home']


**2) Обучите на них seq2seq по аналогии с занятием. Оцените полученное качество**

In [None]:
class EncoderRNN(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(EncoderRNN, self).__init__()
        self.hidden_size = hidden_size

        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)

    def forward(self, input, hidden):
        embedded = self.embedding(input).view(1, 1, -1)
        output = embedded
        output, hidden = self.gru(output, hidden)
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

In [None]:
class DecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size):
        super(DecoderRNN, self).__init__()
        self.hidden_size = hidden_size

        self.embedding = nn.Embedding(output_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)
        self.out = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, input, hidden):
        output = self.embedding(input).view(1, 1, -1)
        output = F.relu(output)
        output, hidden = self.gru(output, hidden)
        output = self.softmax(self.out(output[0]))
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

In [None]:
def indexesFromSentence(lang, sentence):
    return [lang.word2index[word] for word in sentence.split(' ')]


def tensorFromSentence(lang, sentence):
    indexes = indexesFromSentence(lang, sentence)
    indexes.append(EOS_token)
    return torch.tensor(indexes, dtype=torch.long, device=device).view(-1, 1)


def tensorsFromPair(pair):
    input_tensor = tensorFromSentence(input_lang, pair[0])
    target_tensor = tensorFromSentence(output_lang, pair[1])
    return (input_tensor, target_tensor)

In [None]:
teacher_forcing_ratio = 0.5


def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=MAX_LENGTH):
    encoder_hidden = encoder.initHidden()

    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    input_length = input_tensor.size(0)
    target_length = target_tensor.size(0)

    encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

    loss = 0

    for ei in range(input_length):
        encoder_output, encoder_hidden = encoder(
            input_tensor[ei], encoder_hidden)
        encoder_outputs[ei] = encoder_output[0, 0]

    decoder_input = torch.tensor([[SOS_token]], device=device)

    decoder_hidden = encoder_hidden

    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False

    if use_teacher_forcing:
        # Teacher forcing: Feed the target as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden = decoder(
                decoder_input, decoder_hidden)
            loss += criterion(decoder_output, target_tensor[di])
            decoder_input = target_tensor[di]  # Teacher forcing

    else:
        # Without teacher forcing: use its own predictions as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden = decoder(
                decoder_input, decoder_hidden)
            topv, topi = decoder_output.topk(1)
            decoder_input = topi.squeeze().detach()  # detach from history as input

            loss += criterion(decoder_output, target_tensor[di])
            if decoder_input.item() == EOS_token:
                break

    loss.backward()

    encoder_optimizer.step()
    decoder_optimizer.step()

    return loss.item() / target_length

In [None]:
import time
import math


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (- %s)' % (asMinutes(s), asMinutes(rs))

In [None]:
import matplotlib.pyplot as plt
plt.switch_backend('agg')
import matplotlib.ticker as ticker
import numpy as np


def showPlot(points):
    plt.figure()
    fig, ax = plt.subplots()
    # this locator puts ticks at regular intervals
    loc = ticker.MultipleLocator(base=0.2)
    ax.yaxis.set_major_locator(loc)
    plt.plot(points)

In [None]:
def trainIters(encoder, decoder, n_iters, print_every=1000, plot_every=100, learning_rate=0.01):
    start = time.time()
    plot_losses = []
    print_loss_total = 0  # Reset every print_every
    plot_loss_total = 0  # Reset every plot_every

    encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)
    training_pairs = [tensorsFromPair(random.choice(pairs))
                      for i in range(n_iters)]
    criterion = nn.NLLLoss()

    for iter in range(1, n_iters + 1):
        training_pair = training_pairs[iter - 1]
        input_tensor = training_pair[0]
        target_tensor = training_pair[1]

        loss = train(input_tensor, target_tensor, encoder,
                     decoder, encoder_optimizer, decoder_optimizer, criterion)
        print_loss_total += loss
        plot_loss_total += loss

        if iter % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            print('%s (%d %d%%) %.4f' % (timeSince(start, iter / n_iters),
                                         iter, iter / n_iters * 100, print_loss_avg))

        if iter % plot_every == 0:
            plot_loss_avg = plot_loss_total / plot_every
            plot_losses.append(plot_loss_avg)
            plot_loss_total = 0

    showPlot(plot_losses)

In [None]:
def evaluate(encoder, decoder, sentence, max_length=MAX_LENGTH):
    with torch.no_grad():
        input_tensor = tensorFromSentence(input_lang, sentence)
        input_length = input_tensor.size()[0]
        encoder_hidden = encoder.initHidden()

        encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

        for ei in range(input_length):
            encoder_output, encoder_hidden = encoder(input_tensor[ei],
                                                     encoder_hidden)
            encoder_outputs[ei] += encoder_output[0, 0]

        decoder_input = torch.tensor([[SOS_token]], device=device)  # SOS

        decoder_hidden = encoder_hidden

        decoded_words = []

        for di in range(max_length):
            decoder_output, decoder_hidden = decoder(
                decoder_input, decoder_hidden)
            topv, topi = decoder_output.data.topk(1)
            if topi.item() == EOS_token:
                decoded_words.append('<EOS>')
                break
            else:
                decoded_words.append(output_lang.index2word[topi.item()])

            decoder_input = topi.squeeze().detach()

        return decoded_words

In [None]:
def evaluateRandomly(encoder, decoder, n=10):
    for i in range(n):
        pair = random.choice(pairs)
        print('>', pair[0])
        print('=', pair[1])
        output_words = evaluate(encoder, decoder, pair[0])
        output_sentence = ' '.join(output_words)
        print('<', output_sentence)
        print('')

In [None]:
hidden_size = 256
encoder1 = EncoderRNN(input_lang.n_words, hidden_size).to(device)
decoder1 = DecoderRNN(hidden_size, output_lang.n_words).to(device)

trainIters(encoder1, decoder1, 75000, print_every=5000)

0m 51s (- 12m 1s) (5000 6%) 3.2675
1m 38s (- 10m 41s) (10000 13%) 2.7231
2m 30s (- 10m 1s) (15000 20%) 2.3076
3m 18s (- 9m 6s) (20000 26%) 1.8898
4m 8s (- 8m 17s) (25000 33%) 1.5618
4m 58s (- 7m 27s) (30000 40%) 1.2406
5m 47s (- 6m 37s) (35000 46%) 1.0334
6m 37s (- 5m 48s) (40000 53%) 0.8032
7m 27s (- 4m 58s) (45000 60%) 0.6348
8m 17s (- 4m 8s) (50000 66%) 0.4951
9m 6s (- 3m 18s) (55000 73%) 0.4006
9m 56s (- 2m 29s) (60000 80%) 0.2825
10m 46s (- 1m 39s) (65000 86%) 0.2252
11m 36s (- 0m 49s) (70000 93%) 0.1761
12m 27s (- 0m 0s) (75000 100%) 0.1289


In [None]:
evaluateRandomly(encoder1, decoder1)

> она способна на все
= she is capable of anything
< she is capable of anything <EOS>

> я готов пока что с этим мириться
= i am prepared to put up with it for the time being
< i am prepared to put up with it for the time being <EOS>

> вы замечательные
= you are wonderful
< you are wonderful <EOS>

> он идет очень медленно
= he is walking very slowly
< he is walking very slowly <EOS>

> я очень благодарен вам за ваш совет
= i am very thankful to you for your advice
< i am very thankful to you for your advice <EOS>

> боюсь, это не так просто
= i am afraid it's not so easy
< i am afraid it's not so stupid <EOS>

> он умом не блещет
= he is not the sharpest knife in the drawer
< he is not the sharpest knife in the drawer <EOS>

> я готов следовать за вами
= i am ready to follow you
< i am ready to follow you <EOS>

> я умныи
= i am smart
< i am smart <EOS>

> она для него недостаточно хороша
= she isn't good enough for him
< she isn't good enough for him <EOS>



**3) Попробуйте добавить +1 рекуррентный слой в encoder и decoder**

In [None]:
class EncoderRNN2(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(EncoderRNN2, self).__init__()
        self.hidden_size = hidden_size

        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru_layer1 = nn.GRU(hidden_size, hidden_size)
        self.gru_layer2 = nn.GRU(hidden_size, hidden_size)  # Второй рекуррентный слой

    def forward(self, input, hidden):
        embedded = self.embedding(input).view(1, 1, -1)

        # Пропускаем через первый рекуррентный слой
        output, hidden = self.gru_layer1(embedded, hidden)

        # Пропускаем через второй рекуррентный слой
        output, hidden = self.gru_layer2(output, hidden)

        return output, hidden

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)


In [None]:
class DecoderRNN2(nn.Module):
    def __init__(self, hidden_size, output_size):
        super(DecoderRNN2, self).__init__()
        self.hidden_size = hidden_size

        self.embedding = nn.Embedding(output_size, hidden_size)
        self.gru_layer1 = nn.GRU(hidden_size, hidden_size)
        self.gru_layer2 = nn.GRU(hidden_size, hidden_size)  # Второй рекуррентный слой
        self.out = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, input, hidden):
        output = self.embedding(input).view(1, 1, -1)
        output = F.relu(output)

        # Пропускаем через первый рекуррентный слой
        output, hidden = self.gru_layer1(output, hidden)

        # Пропускаем через второй рекуррентный слой
        output, hidden = self.gru_layer2(output, hidden)

        output = self.softmax(self.out(output[0]))
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

In [None]:
hidden_size = 256
encoder2 = EncoderRNN2(input_lang.n_words, hidden_size).to(device)
decoder2 = DecoderRNN2(hidden_size, output_lang.n_words).to(device)

trainIters(encoder2, decoder2, 75000, print_every=5000)

1m 15s (- 17m 35s) (5000 6%) 3.4759
2m 25s (- 15m 44s) (10000 13%) 2.9203
3m 37s (- 14m 29s) (15000 20%) 2.8022
4m 50s (- 13m 19s) (20000 26%) 2.6969
6m 4s (- 12m 9s) (25000 33%) 2.5508
7m 19s (- 10m 58s) (30000 40%) 2.3517
8m 33s (- 9m 47s) (35000 46%) 2.1100
9m 47s (- 8m 34s) (40000 53%) 1.9077
11m 2s (- 7m 21s) (45000 60%) 1.6367
12m 17s (- 6m 8s) (50000 66%) 1.4646
13m 33s (- 4m 55s) (55000 73%) 1.2715
14m 50s (- 3m 42s) (60000 80%) 1.0579
16m 3s (- 2m 28s) (65000 86%) 0.9077
17m 16s (- 1m 14s) (70000 93%) 0.7722
18m 32s (- 0m 0s) (75000 100%) 0.6479


**4) Попробуйте заменить GRU ячейки на lstm-ячейки**

In [None]:
class EncoderLSTM(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(EncoderLSTM, self).__init__()
        self.hidden_size = hidden_size

        self.embedding = nn.Embedding(input_size, hidden_size)
        self.lstm = nn.LSTM(hidden_size, hidden_size)

    def forward(self, input, hidden):
        embedded = self.embedding(input).view(1, 1, -1)
        output, hidden = self.lstm(embedded, hidden)
        return output, hidden

    def initHidden(self):
        return (torch.zeros(1, 1, self.hidden_size, device=device),
                torch.zeros(1, 1, self.hidden_size, device=device))

In [None]:
class DecoderLSTM(nn.Module):
    def __init__(self, hidden_size, output_size):
        super(DecoderLSTM, self).__init__()
        self.hidden_size = hidden_size

        self.embedding = nn.Embedding(output_size, hidden_size)
        self.lstm = nn.LSTM(hidden_size, hidden_size)
        self.out = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, input, hidden):
        output = self.embedding(input).view(1, 1, -1)
        output = F.relu(output)
        output, hidden = self.lstm(output, hidden)
        output = self.softmax(self.out(output[0]))
        return output, hidden

    def initHidden(self):
        return (torch.zeros(1, 1, self.hidden_size, device=device),
                torch.zeros(1, 1, self.hidden_size, device=device))

In [None]:
hidden_size = 256
encoder3 = EncoderLSTM(input_lang.n_words, hidden_size).to(device)
decoder3 = DecoderLSTM(hidden_size, output_lang.n_words).to(device)

trainIters(encoder3, decoder3, 75000, print_every=5000)

0m 50s (- 11m 49s) (5000 6%) 3.3352
1m 39s (- 10m 46s) (10000 13%) 2.9307
2m 28s (- 9m 53s) (15000 20%) 2.6739
3m 17s (- 9m 2s) (20000 26%) 2.3213
4m 6s (- 8m 13s) (25000 33%) 1.9881
4m 56s (- 7m 25s) (30000 40%) 1.6538
5m 48s (- 6m 38s) (35000 46%) 1.3565
7m 2s (- 6m 9s) (40000 53%) 1.1407
8m 4s (- 5m 22s) (45000 60%) 0.9392
9m 0s (- 4m 30s) (50000 66%) 0.7663
9m 54s (- 3m 36s) (55000 73%) 0.6337
10m 46s (- 2m 41s) (60000 80%) 0.4967
11m 42s (- 1m 48s) (65000 86%) 0.3976
12m 33s (- 0m 53s) (70000 93%) 0.2950
13m 26s (- 0m 0s) (75000 100%) 0.2543


In [None]:
evaluateRandomly(encoder3, decoder3)

> она захлопнула дверь
= she slammed the door
< cigarette glance soon scolded teaching critic stealing precisely scolded loyal lives loyal sociology surprised doctor, stealing scolded doubts stealing forced

> он агент нью-иоркскои страховои компании
= he is an insurance agent for a new york company
< o'clock critic stealing precisely been separated compromise kids stealing forward stealing precisely been stealing forward stealing precisely been separated compromise

> она говорит, что хочет измениться
= she says she wants to change
< sizes bathing agriculture agriculture teaching agriculture teaching smiling agriculture teaching soon scolded catch smoking met stealing forced smoking forced smoking

> она хорошеет
= she is getting prettier
< cigarette glance soon manhood ready stealing precisely been points sentence insects stealing shall stealing forced getting stealing scolded smile later

> он — носитель англииского языка
= he is a native english speaker
< cigarette gets step sociol

In [None]:
import pandas as pd

In [None]:
df = pd.DataFrame({'Model': ['GRU','GRU 2 layers','LSTM'],
                   'Loss':[0.1289, 0.6479, 0.2543]})
df

Unnamed: 0,Model,Loss
0,GRU,0.1289
1,GRU 2 layers,0.6479
2,LSTM,0.2543
