1. Возьмите англо-русскую пару фраз (https://www.manythings.org/anki/).
2. Обучите на них seq2seq по аналогии с занятием. Оцените полученное качество.
3. Попробуйте добавить +1 рекуррентный слой в encoder и decoder.
4. Попробуйте заменить GRU ячейки на lstm-ячейки Оцените качество во всех случаях.

In [1]:
from io import open
import unicodedata
import string
import re
import pandas as pd
import numpy as np
import random
from itertools import product
import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
# Visualisation
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [3]:
!wget https://www.manythings.org/anki/rus-eng.zip
!unzip rus-eng.zip

--2025-01-30 08:55:16--  https://www.manythings.org/anki/rus-eng.zip
Resolving www.manythings.org (www.manythings.org)... 173.254.30.110
Connecting to www.manythings.org (www.manythings.org)|173.254.30.110|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 16305013 (16M) [application/zip]
Saving to: ‘rus-eng.zip’


2025-01-30 08:55:17 (18.8 MB/s) - ‘rus-eng.zip’ saved [16305013/16305013]

Archive:  rus-eng.zip
  inflating: rus.txt                 
  inflating: _about.txt              


In [4]:
!head rus.txt

Go.	Марш!	CC-BY 2.0 (France) Attribution: tatoeba.org #2877272 (CM) & #1159202 (shanghainese)
Go.	Иди.	CC-BY 2.0 (France) Attribution: tatoeba.org #2877272 (CM) & #5898247 (marafon)
Go.	Идите.	CC-BY 2.0 (France) Attribution: tatoeba.org #2877272 (CM) & #5898250 (marafon)
Hi.	Здравствуйте.	CC-BY 2.0 (France) Attribution: tatoeba.org #538123 (CM) & #402127 (odexed)
Hi.	Привет!	CC-BY 2.0 (France) Attribution: tatoeba.org #538123 (CM) & #466968 (katjka)
Hi.	Хай.	CC-BY 2.0 (France) Attribution: tatoeba.org #538123 (CM) & #467233 (timsa)
Hi.	Здрасте.	CC-BY 2.0 (France) Attribution: tatoeba.org #538123 (CM) & #3803577 (marafon)
Hi.	Здоро́во!	CC-BY 2.0 (France) Attribution: tatoeba.org #538123 (CM) & #3854188 (marafon)
Hi.	Приветик!	CC-BY 2.0 (France) Attribution: tatoeba.org #538123 (CM) & #7234283 (marafon)
Run!	Беги!	CC-BY 2.0 (France) Attribution: tatoeba.org #906328 (papabear) & #1569978 (Biga)


In [5]:
SOS_token = 0
EOS_token = 1
lang1 = 'rus'
lang2 = 'eng'

class Lang:
    def __init__(self, name):
        self.name = name
        self.word2index = {}
        self.word2count = {}
        self.index2word = {0: "SOS", 1: "EOS"}
        self.n_words = 2  # Count SOS and EOS

    def addSentence(self, sentence):
        for word in sentence.split(' '):
            self.addWord(word)

    def addWord(self, word):
        if word not in self.word2index:
            self.word2index[word] = self.n_words
            self.word2count[word] = 1
            self.index2word[self.n_words] = word
            self.n_words += 1
        else:
            self.word2count[word] += 1

In [6]:
# Turn a Unicode string to plain ASCII, thanks to
# http://stackoverflow.com/a/518232/2809427
def unicodeToAscii(s):
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn'
    )

# Lowercase, trim, and remove non-letter characters
def normalizeString(s):
    s = unicodeToAscii(s.lower().strip())
    s = re.sub(r"([.!?])", r" \1", s)
    s = re.sub(r"[^a-zA-Zа-яА-Я.!?]+", r" ", s)
    return s

In [7]:
def readLangs(lang1, lang2, reverse=False):
    print("Reading lines...")

    # Read the file and split into lines    #data/%s-%s.txt
    # lines = open('data/%s-%s.txt' % (lang1, lang2), encoding='utf-8').read().strip().split('\n')
    lines = open('rus.txt', encoding='utf-8').read().strip().split('\n')

    # Split every line into pairs and normalize
    # pairs = [[normalizeString(s) for s in l.split('\t')] for l in lines]
    pairs = [[normalizeString(s) for s in l.split('\t')[:-1]] for l in lines]

    # Reverse pairs, make Lang instances
    if reverse:
        pairs = [list(reversed(p)) for p in pairs]
        input_lang = Lang(lang2)
        output_lang = Lang(lang1)
    else:
        input_lang  = Lang(lang1)
        output_lang = Lang(lang2)

    return input_lang, output_lang, pairs

In [8]:
MAX_LENGTH = 10

eng_prefixes = (
    "i am ", "i m ",
    "he is", "he s ",
    "she is", "she s",
    "you are", "you re ",
    "we are", "we re ",
    "they are", "they re "
)


def filterPair(p):
    return len(p[0].split(' ')) < MAX_LENGTH and \
        len(p[1].split(' ')) < MAX_LENGTH and \
        p[1].startswith(eng_prefixes)


def filterPairs(pairs):
    return [pair for pair in pairs if filterPair(pair)]

In [11]:
def prepareData(lang1, lang2, reverse=False):
    input_lang, output_lang, pairs = readLangs(lang1, lang2, reverse)
    print("Read %s sentence pairs" % len(pairs))
    pairs = filterPairs(pairs)
    print("Trimmed to %s sentence pairs" % len(pairs))
    print("Counting words...")
    for pair in pairs:
        input_lang.addSentence(pair[0])
        output_lang.addSentence(pair[1])
    print("Counted words:")
    print(input_lang.name, input_lang.n_words)
    print(output_lang.name, output_lang.n_words)
    return input_lang, output_lang, pairs


input_lang, output_lang, pairs = prepareData('rus', 'eng', True)
print(random.choice(pairs))

Reading lines...
Read 496059 sentence pairs
Trimmed to 28719 sentence pairs
Counting words...
Counted words:
eng 10177
rus 4303
['даю тебе последнии шанс .', 'i m giving you one last chance .']


### The Encoder

In [24]:
class EncoderRNN(nn.Module):
    def __init__(self, input_size, hidden_size, n_layers, rnn_type):
        super(EncoderRNN, self).__init__()
        self.hidden_size = hidden_size

        self.embedding = nn.Embedding(input_size, hidden_size)
        self.rnn = rnn_type(hidden_size, hidden_size, n_layers)

    def forward(self, input, hidden):
        embedded = self.embedding(input).view(1, 1, -1)
        output = embedded
        output, hidden = self.rnn(output, hidden)
        return output, hidden

    def initHidden(self, n_layers):
        return torch.zeros(n_layers, 1, self.hidden_size, device=device)

### The Decoder

In [25]:
class DecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size, n_layers, rnn_type):
        super(DecoderRNN, self).__init__()
        self.hidden_size = hidden_size

        self.embedding = nn.Embedding(output_size, hidden_size)
        self.rnn = rnn_type(hidden_size, hidden_size, n_layers)
        self.out = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, input, hidden):
        output = self.embedding(input).view(1, 1, -1)
        output = F.relu(output)
        output, hidden = self.rnn(output, hidden)
        output = self.softmax(self.out(output[0]))
        return output, hidden

    def initHidden(self, n_layers):
        return torch.zeros(n_layers, 1, self.hidden_size, device=device)

In [14]:
def indexesFromSentence(lang, sentence):
    return [lang.word2index[word] for word in sentence.split(' ')]


def tensorFromSentence(lang, sentence):
    indexes = indexesFromSentence(lang, sentence)
    indexes.append(EOS_token)
    return torch.tensor(indexes, dtype=torch.long, device=device).view(-1, 1)


def tensorsFromPair(pair):
    input_tensor = tensorFromSentence(input_lang, pair[0])
    target_tensor = tensorFromSentence(output_lang, pair[1])
    return (input_tensor, target_tensor)

In [33]:
teacher_forcing_ratio = 0.5


def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, rnn_ty, n_layers, max_length=MAX_LENGTH):

    if rnn_ty.__name__ == 'LSTM':
        encoder_hidden = (encoder.initHidden(n_layers), encoder.initHidden(n_layers))
    else:
        encoder_hidden = encoder.initHidden(n_layers)

    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    input_length = input_tensor.size(0)
    target_length = target_tensor.size(0)

    encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

    loss = 0

    for ei in range(input_length):
        encoder_output, encoder_hidden = encoder(
            input_tensor[ei], encoder_hidden)
        encoder_outputs[ei] = encoder_output[0, 0]

    decoder_input = torch.tensor([[SOS_token]], device=device)

    decoder_hidden = encoder_hidden

    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False

    if use_teacher_forcing:
        # Teacher forcing: Feed the target as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden = decoder(
                decoder_input, decoder_hidden)
            loss += criterion(decoder_output, target_tensor[di])
            decoder_input = target_tensor[di]  # Teacher forcing

    else:
        # Without teacher forcing: use its own predictions as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden = decoder(
                decoder_input, decoder_hidden)
            topv, topi = decoder_output.topk(1)
            decoder_input = topi.squeeze().detach()  # detach from history as input

            loss += criterion(decoder_output, target_tensor[di])
            if decoder_input.item() == EOS_token:
                break

    loss.backward()

    encoder_optimizer.step()
    decoder_optimizer.step()

    return loss.item() / target_length

In [34]:
import time
import math


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (- %s)' % (asMinutes(s), asMinutes(rs))

In [35]:
import matplotlib.pyplot as plt
plt.switch_backend('agg')
import matplotlib.ticker as ticker
import numpy as np


def showPlot(points):
    plt.figure()
    fig, ax = plt.subplots()
    # this locator puts ticks at regular intervals
    loc = ticker.MultipleLocator(base=0.2)
    ax.yaxis.set_major_locator(loc)
    plt.plot(points)

In [36]:
def trainIters(encoder, decoder, n_iters, rnn_typ, nu_layers, print_every=1000, plot_every=100, learning_rate=0.01):
    start = time.time()
    plot_losses = []
    print_loss_total = 0  # Reset every print_every
    plot_loss_total = 0  # Reset every plot_every

    encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)
    training_pairs = [tensorsFromPair(random.choice(pairs))
                      for i in range(n_iters)]
    criterion = nn.NLLLoss()

    for iter in range(1, n_iters + 1):
        training_pair = training_pairs[iter - 1]
        input_tensor = training_pair[0]
        target_tensor = training_pair[1]

        loss = train(input_tensor, target_tensor, encoder,
                     decoder, encoder_optimizer, decoder_optimizer, criterion, rnn_typ, nu_layers)
        print_loss_total += loss
        plot_loss_total += loss

        if iter % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            print('%s (%d %d%%) %.4f' % (timeSince(start, iter / n_iters),
                                         iter, iter / n_iters * 100, print_loss_avg))

        if iter % plot_every == 0:
            plot_loss_avg = plot_loss_total / plot_every
            plot_losses.append(plot_loss_avg)
            plot_loss_total = 0

    showPlot(plot_losses)

In [37]:
def evaluate(encoder, decoder, sentence, n_layers, Rnn, max_length=MAX_LENGTH):
    with torch.no_grad():
        input_tensor = tensorFromSentence(input_lang, sentence)
        input_length = input_tensor.size()[0]

        if Rnn.__name__ == 'LSTM':
            encoder_hidden = (encoder.initHidden(n_layers), encoder.initHidden(n_layers))
        else:
            encoder_hidden = encoder.initHidden(n_layers)

        encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

        for ei in range(input_length):
            encoder_output, encoder_hidden = encoder(input_tensor[ei],
                                                     encoder_hidden)
            encoder_outputs[ei] += encoder_output[0, 0]

        decoder_input = torch.tensor([[SOS_token]], device=device)  # SOS

        decoder_hidden = encoder_hidden

        decoded_words = []

        for di in range(max_length):
            decoder_output, decoder_hidden = decoder(
                decoder_input, decoder_hidden)
            topv, topi = decoder_output.data.topk(1)
            if topi.item() == EOS_token:
                decoded_words.append('<EOS>')
                break
            else:
                decoded_words.append(output_lang.index2word[topi.item()])

            decoder_input = topi.squeeze().detach()

        return decoded_words

In [38]:
def evaluateRandomly(encoder, decoder, _layers, rnn_, n=10):
    for i in range(n):
        pair = random.choice(pairs)
        print('>', pair[0])
        print('=', pair[1])
        output_words = evaluate(encoder, decoder, pair[0], _layers, rnn_)
        output_sentence = ' '.join(output_words)
        print('<', output_sentence)
        print('')

### 1 слой + GRU

In [39]:
hidden_size = 256
num_layers = 1
rnn_type = nn.GRU #nn.LSTM, nn.GRU
encoder1 = EncoderRNN(input_lang.n_words, hidden_size, num_layers, rnn_type).to(device)
decoder1 = DecoderRNN(hidden_size, output_lang.n_words, num_layers, rnn_type).to(device)

trainIters(encoder1, decoder1, 75000, rnn_type, num_layers, print_every=5000)

1m 15s (- 17m 33s) (5000 6%) 3.1449
2m 27s (- 15m 57s) (10000 13%) 2.6563
3m 40s (- 14m 40s) (15000 20%) 2.3745
4m 51s (- 13m 20s) (20000 26%) 2.1816
6m 3s (- 12m 7s) (25000 33%) 2.0401
7m 16s (- 10m 54s) (30000 40%) 1.9030
8m 27s (- 9m 40s) (35000 46%) 1.7830
9m 40s (- 8m 27s) (40000 53%) 1.6821
10m 52s (- 7m 14s) (45000 60%) 1.6021
12m 4s (- 6m 2s) (50000 66%) 1.5190
13m 17s (- 4m 49s) (55000 73%) 1.4584
14m 28s (- 3m 37s) (60000 80%) 1.3765
15m 41s (- 2m 24s) (65000 86%) 1.3180
16m 53s (- 1m 12s) (70000 93%) 1.2881
18m 6s (- 0m 0s) (75000 100%) 1.2043


In [40]:
evaluateRandomly(encoder1, decoder1, num_layers, rnn_type)

> на неи солнечные очки .
= she s wearing sunglasses .
< she s wearing a hat . <EOS>

> вы такои сексуальныи .
= you re so sexy .
< you re so perfect . <EOS>

> я у тебя помощи не прошу .
= i m not asking for your help .
< i m not asking for your help . <EOS>

> мы остановились в другои гостинице .
= we re staying in a different hotel .
< we re working at the hotel . <EOS>

> ты поранишься .
= you are going to get hurt .
< you re a . <EOS>

> мы теперь семья .
= we re family now .
< we re a family now . <EOS>

> ты дрожишь . замерз ?
= you re shivering . are you cold ?
< you re shivering right ? <EOS>

> ты великодушна .
= you re generous .
< you re generous . <EOS>

> он хорошо известен у нас в стране .
= he is well known in our country .
< he is in in a of of . <EOS>

> ты самодовольна .
= you re vain .
< you re a . <EOS>



### 2 слоя + GRU

In [41]:
hidden_size = 256
num_layers = 2
rnn_type = nn.GRU #nn.LSTM, nn.GRU
encoder1 = EncoderRNN(input_lang.n_words, hidden_size, num_layers, rnn_type).to(device)
decoder1 = DecoderRNN(hidden_size, output_lang.n_words, num_layers, rnn_type).to(device)

trainIters(encoder1, decoder1, 75000, rnn_type, num_layers, print_every=5000)

1m 39s (- 23m 12s) (5000 6%) 3.1412
3m 16s (- 21m 16s) (10000 13%) 2.6938
4m 52s (- 19m 30s) (15000 20%) 2.4589
6m 29s (- 17m 51s) (20000 26%) 2.2498
8m 3s (- 16m 6s) (25000 33%) 2.0775
9m 39s (- 14m 29s) (30000 40%) 1.9774
11m 13s (- 12m 49s) (35000 46%) 1.8524
12m 47s (- 11m 11s) (40000 53%) 1.7408
14m 22s (- 9m 34s) (45000 60%) 1.6199
15m 56s (- 7m 58s) (50000 66%) 1.5283
17m 31s (- 6m 22s) (55000 73%) 1.4842
19m 6s (- 4m 46s) (60000 80%) 1.3690
20m 41s (- 3m 10s) (65000 86%) 1.3135
22m 15s (- 1m 35s) (70000 93%) 1.2822
23m 50s (- 0m 0s) (75000 100%) 1.2130


In [42]:
evaluateRandomly(encoder1, decoder1, num_layers, rnn_type)

> я теперь себя чувствую очень виноватои .
= i m feeling very guilty now .
< i m feeling very good . <EOS>

> мои рост метр пятьдесят восемь .
= i am five feet two inches tall .
< i am five two two tall tall . <EOS>

> я сеичас один .
= i m alone now .
< i m alone now . <EOS>

> я рад что ты пришел меня проведать .
= i m glad you came to see me .
< i m glad you were here than me . <EOS>

> я не программист .
= i m not a programmer .
< i m not going . <EOS>

> я не достоин .
= i m not worthy .
< i m not a . <EOS>

> я сеичас иду в гостиницу .
= i m going to the hotel now .
< i m going to a school now . <EOS>

> его поезд ушел .
= he s missed the boat .
< he s missed the . . <EOS>

> мы просто ищем тома .
= we re just looking for tom .
< we re just looking for tom . . <EOS>

> уверен что том вам поможет .
= i m sure tom will help you .
< i m sure tom will help you . <EOS>



### 1 слой + LSTM

In [43]:
hidden_size = 256
num_layers = 1
rnn_type = nn.LSTM #nn.LSTM, nn.GRU
encoder1 = EncoderRNN(input_lang.n_words, hidden_size, num_layers, rnn_type).to(device)
decoder1 = DecoderRNN(hidden_size, output_lang.n_words, num_layers, rnn_type).to(device)

trainIters(encoder1, decoder1, 75000, rnn_type, num_layers, print_every=5000)

1m 20s (- 18m 43s) (5000 6%) 3.2157
2m 38s (- 17m 7s) (10000 13%) 2.7744
3m 56s (- 15m 46s) (15000 20%) 2.5621
5m 15s (- 14m 27s) (20000 26%) 2.3949
6m 35s (- 13m 10s) (25000 33%) 2.2522
7m 54s (- 11m 51s) (30000 40%) 2.1148
9m 14s (- 10m 33s) (35000 46%) 1.9867
10m 33s (- 9m 14s) (40000 53%) 1.8910
11m 53s (- 7m 55s) (45000 60%) 1.8070
13m 12s (- 6m 36s) (50000 66%) 1.7681
14m 33s (- 5m 17s) (55000 73%) 1.6669
15m 52s (- 3m 58s) (60000 80%) 1.5822
17m 12s (- 2m 38s) (65000 86%) 1.5450
18m 32s (- 1m 19s) (70000 93%) 1.4507
19m 51s (- 0m 0s) (75000 100%) 1.4202


In [44]:
evaluateRandomly(encoder1, decoder1, num_layers, rnn_type)

> я начинаю думать что ты прав .
= i m beginning to think you re right .
< i m beginning to think you re right . <EOS>

> я сыт по горло ее ворчанием .
= i m fed up with her grumbling .
< i m sick and tired of her . . <EOS>

> я жду что том победит .
= i m expecting tom to win .
< i m waiting for tom to come . <EOS>

> вы пессимист .
= you re pessimistic .
< you re pessimistic . <EOS>

> вы очень смелая .
= you re very brave .
< you are very ambitious . <EOS>

> я благодарен за все что у меня есть .
= i m grateful for everything i have .
< i m grateful i have everything . <EOS>

> ты непоследователен .
= you re inconsistent .
< you re getting . <EOS>

> на нее иногда находит депрессия .
= she sometimes gets depressed .
< she s on the the . . <EOS>

> я пришел домои пораньше .
= i m home early .
< i m a lot of . . <EOS>

> он типичныи японец .
= he s a typical japanese man .
< he s a teacher of . <EOS>



### 2 слоя + LSTM

In [45]:
hidden_size = 256
num_layers = 2
rnn_type = nn.LSTM #nn.LSTM, nn.GRU
encoder1 = EncoderRNN(input_lang.n_words, hidden_size, num_layers, rnn_type).to(device)
decoder1 = DecoderRNN(hidden_size, output_lang.n_words, num_layers, rnn_type).to(device)

trainIters(encoder1, decoder1, 75000, rnn_type, num_layers, print_every=5000)

1m 44s (- 24m 23s) (5000 6%) 3.3266
3m 27s (- 22m 26s) (10000 13%) 2.8889
5m 10s (- 20m 43s) (15000 20%) 2.6745
6m 54s (- 19m 0s) (20000 26%) 2.5363
8m 38s (- 17m 16s) (25000 33%) 2.3819
10m 23s (- 15m 34s) (30000 40%) 2.2676
12m 6s (- 13m 50s) (35000 46%) 2.1205
13m 50s (- 12m 6s) (40000 53%) 2.0603
15m 34s (- 10m 23s) (45000 60%) 1.9377
17m 18s (- 8m 39s) (50000 66%) 1.8492
19m 3s (- 6m 55s) (55000 73%) 1.7541
20m 47s (- 5m 11s) (60000 80%) 1.6955
22m 32s (- 3m 28s) (65000 86%) 1.6374
24m 16s (- 1m 44s) (70000 93%) 1.5655
26m 0s (- 0m 0s) (75000 100%) 1.4901


In [46]:
evaluateRandomly(encoder1, decoder1, num_layers, rnn_type)

> мы одержим победу .
= we re going to win .
< we re going to . . <EOS>

> мы встали рано .
= we re up early .
< we re getting early . <EOS>

> они мертвые .
= they are dead .
< they are getting . <EOS>

> вы чересчур вежливы .
= you re too polite .
< you re too good . <EOS>

> я сын тома .
= i m tom s son .
< i m tom s brother . <EOS>

> я почти готов идти .
= i m about ready to go .
< i m ready ready to go . <EOS>

> я чудесно провожу время .
= i m having an awesome time .
< i m a a time time . <EOS>

> ты сексист .
= you re a sexist .
< you re a little liar . <EOS>

> я жду что мне кто нибудь поможет .
= i m waiting for someone to help me .
< i m waiting for a to answer here . <EOS>

> мы не заинтересованы .
= we re not interested .
< we re not . . <EOS>



## Выводы:
1 слой + GRU: loss = 1.2043, time = 18 minute.  
2 слоя + GRU: loss = 1.2130, time = 24 minute.  
1 слой + LSTM: loss = 1.4202, time = 20 minute.  
2 слоя + LSTM: loss = 1.4901, time = 26 minute.


Функция потерь меньше всего в случае использования GRU и эта модель также обучается быстрее, в обоих кейсах увеличение числа слоев не привело к увеличению качества, но время обучения выросло.