# Домашнее задание по теме «Механизм внимания»

1. Возьмите англо-русскую пару фраз (https://www.manythings.org/anki/)
2. Обучите на них seq2seq with attention
    1. На основе скалярного произведения
    2. На основе MLP
3. Оцените качествотво

# Загрузка библиотек

In [5]:
import warnings
warnings.filterwarnings('ignore')

In [6]:
import time
import math

In [7]:
import codecs
import os
from io import open

In [8]:
import unicodedata
import string
import re
import random

In [9]:
import numpy as np
import pandas as pd

In [10]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

In [11]:
from tqdm.notebook import tqdm

In [12]:
from itertools import product

# Параметры GPU

In [14]:
print(f"CUDA поддерживается системой?")
if torch.cuda.is_available() == True:
    print(f"CUDA-версия: {torch.version.cuda}")
    cuda_id = torch.cuda.current_device()
    print(f"ID текущего CUDA устройства:{torch.cuda.current_device()}")
    print(f"Имя текущего CUDA устройства:{torch.cuda.get_device_name(cuda_id)}")
else:
    print(f"Нет")

CUDA поддерживается системой?
CUDA-версия: 11.8
ID текущего CUDA устройства:0
Имя текущего CUDA устройства:NVIDIA GeForce RTX 2060


# Выбор процессора

In [16]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

# Импорт данных

In [18]:
file = codecs.open( "rus.txt", "r", "utf-8" )
data = file.read()
file.close()

In [19]:
print(data[-2000:])

ng.	Поскольку сайтов, посвящённых какой-либо теме, как правило, несколько, я обычно просто нажимаю на кнопку "назад", если попадаю на страницу со всплывающей рекламой. Я просто перехожу на следующую страницу, найденную гуглом, и надеюсь найти что-то менее раздражающее.	CC-BY 2.0 (France) Attribution: tatoeba.org #954270 (CK) & #6383010 (odexed)
If someone who doesn't know your background says that you sound like a native speaker, it means they probably noticed something about your speaking that made them realize you weren't a native speaker. In other words, you don't really sound like a native speaker.	Если кто-то незнакомый говорит, что вы говорите как носитель языка, это значит, что он, вероятно, заметил что-то в вашей речи, что дало ему понять, что вы не носитель. Другими словами, вы не говорите как носитель.	CC-BY 2.0 (France) Attribution: tatoeba.org #953936 (CK) & #10644468 (notenoughsun)
Doubtless there exists in this world precisely the right woman for any given man to marry an

# Входные параметры

In [21]:
SEED = 1234
random.seed(SEED)
MAX_LENGTH = 10
SOS_token = 0
EOS_token = 1
lang1 = 'rus'
lang2 = 'eng'

In [22]:
if device=='cuda':
  torch.cuda.manual_seed(SEED)
  torch.backends.cudnn.deterministic = True
else:
  torch.manual_seed(SEED)

In [23]:
eng_prefixes = (
    "i am ", "i m ",
    "he is", "he s ",
    "she is", "she s",
    "you are", "you re ",
    "we are", "we re ",
    "they are", "they re "
)

rnn_types = [nn.LSTM, nn.GRU]

attn_types = ['scalar', 'mlp']
cols = ['RNN_Type', 'attn_types', 'loss', 'perplexity', 'learning_time']
PATH = os.path.abspath("rus.txt")

# Функции

## Фасовка слов по словарям

In [26]:
class Lang:
    def __init__(self, name):
        self.name = name
        self.word2index = {}
        self.word2count = {}
        self.index2word = {0: "SOS", 1: "EOS"}
        self.n_words = 2  # Count SOS and EOS

    def addSentence(self, sentence):
        for word in sentence.split(' '):
            self.addWord(word)

    def addWord(self, word):
        if word not in self.word2index:
            self.word2index[word] = self.n_words
            self.word2count[word] = 1
            self.index2word[self.n_words] = word
            self.n_words += 1
        else:
            self.word2count[word] += 1

## Кодировка в ASCII

In [28]:
def unicodeToAscii(s):
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn'
    )

## Нормализация строк

In [30]:
def normalizeString(s):
    s = unicodeToAscii(s.lower().strip())
    s = re.sub(r"([.!?])", r" \1", s)
    s = re.sub(r"[^a-zA-Zа-яА-ЯёЁ.!?]+", r" ", s)
    # s = re.sub(r"[^a-zA-Z.!?]+", r" ", s)
    return s

## Чтение файла

In [32]:
def readLangs(lang1, lang2, reverse=False):
    print("Reading lines...")

    # Read the file and split into lines
    lines = open('rus.txt', encoding='utf-8').\
        read().strip().split('\n')

    # Split every line into pairs and normalize
    pairs = [[normalizeString(s) for s in l.split('\t')][:2] for l in lines] #

    # Reverse pairs, make Lang instances
    if reverse:
        pairs = [list(reversed(p)) for p in pairs]
        input_lang = Lang(lang2)
        output_lang = Lang(lang1)
    else:
        input_lang = Lang(lang1)
        output_lang = Lang(lang2)

    return input_lang, output_lang, pairs

## Токенизация слов

In [34]:
def filterPair(p, prefixes):
    return len(p[0].split(' ')) < MAX_LENGTH and \
        len(p[1].split(' ')) < MAX_LENGTH and \
        p[1].startswith(prefixes)

def filterPairs(pairs, prefixes):
    return [pair for pair in pairs if filterPair(pair, prefixes)]

## Подготовка данных

In [36]:
def prepareData(lang1, lang2, prefixes, reverse=False):
    input_lang, output_lang, pairs = readLangs(lang1, lang2, reverse)
    print("Read %s sentence pairs" % len(pairs))
    pairs = filterPairs(pairs, prefixes)
    print("Trimmed to %s sentence pairs" % len(pairs))
    print("Counting words...")
    for pair in pairs:
        input_lang.addSentence(pair[0])
        output_lang.addSentence(pair[1])
    print("Counted words:")
    print(input_lang.name, input_lang.n_words)
    print(output_lang.name, output_lang.n_words)
    return input_lang, output_lang, pairs

## Кодировщик

In [38]:
class EncoderRNN(nn.Module):
    def __init__(self, input_size, hidden_size, rnn_type):
        super(EncoderRNN, self).__init__()
        self.hidden_size = hidden_size

        self.embedding = nn.Embedding(input_size, hidden_size)
        self.rnn = rnn_type(hidden_size, hidden_size)

    def forward(self, input, hidden):
        embedded = self.embedding(input).view(1, 1, -1)
        output = embedded
        output, hidden = self.rnn(output, hidden)
        return output, hidden
        
    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

## Декодеровщик на основе скалярного произведения

In [40]:
class AttnDecoderRNN1(nn.Module):
    def __init__(self, hidden_size, output_size, rnn_type, dropout_p=0.1, max_length=MAX_LENGTH):
        super(AttnDecoderRNN1, self).__init__()
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.dropout_p = dropout_p
        self.max_length = max_length

        self.embedding = nn.Embedding(self.output_size, self.hidden_size)
        self.attn = nn.Linear(self.hidden_size * 2, self.max_length)
        self.attn_combine = nn.Linear(self.hidden_size * 2, self.hidden_size)
        self.dropout = nn.Dropout(self.dropout_p)
        self.rnn = rnn_type(hidden_size, hidden_size)
        self.out = nn.Linear(self.hidden_size, self.output_size)

    def forward(self, input, hidden, encoder_outputs):
        embedded = self.embedding(input).view(1, 1, -1)
        embedded = self.dropout(embedded)

        attn_weights = F.softmax((embedded[0] @ encoder_outputs.T)/self.max_length**0.5, dim=1)
        attn_applied = torch.bmm(attn_weights.unsqueeze(0),
                                 encoder_outputs.unsqueeze(0))

        output = torch.cat((embedded[0], attn_applied[0]), 1)
        output = self.attn_combine(output).unsqueeze(0)

        output = F.relu(output)
        output, hidden = self.rnn(output, hidden)

        output = F.log_softmax(self.out(output[0]), dim=1)
        return output, hidden, attn_weights

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

## Декодировщик на основе MLP

In [42]:
class AttnDecoderRNN2(nn.Module):
    def __init__(self, hidden_size, output_size, rnn_type, dropout_p=0.1, max_length=MAX_LENGTH):
        super(AttnDecoderRNN2, self).__init__()
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.dropout_p = dropout_p
        self.max_length = max_length

        self.embedding = nn.Embedding(self.output_size, self.hidden_size)
        self.attn = nn.Linear(self.hidden_size * 2, self.max_length)
        self.attn_combine = nn.Linear(self.hidden_size * 2, self.hidden_size)
        self.dropout = nn.Dropout(self.dropout_p)
        self.rnn = rnn_type(hidden_size, hidden_size)
        self.out = nn.Linear(self.hidden_size, self.output_size)
        self.rnn_type_name = rnn_type.__name__

    def forward(self, input, hidden, encoder_outputs):
        embedded = self.embedding(input).view(1, 1, -1)
        embedded = self.dropout(embedded)

        if  self.rnn_type_name == 'LSTM':
            attn_weights = F.softmax(self.attn(torch.cat((embedded[0], hidden[0][0]), 1)), dim=1)
        else:
             attn_weights = F.softmax(self.attn(torch.cat((embedded[0], hidden[0]), 1)), dim=1)
        attn_applied = torch.bmm(attn_weights.unsqueeze(0),
                                 encoder_outputs.unsqueeze(0))

        output = torch.cat((embedded[0], attn_applied[0]), 1)
        output = self.attn_combine(output).unsqueeze(0)

        output = F.tanh(output)
        output, hidden = self.rnn(output, hidden)

        output = F.log_softmax(self.out(output[0]), dim=1)
        return output, hidden, attn_weights

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

## Формирование тензора

In [44]:
def indexesFromSentence(lang, sentence):
    return [lang.word2index[word] for word in sentence.split(' ')]


def tensorFromSentence(lang, sentence):
    indexes = indexesFromSentence(lang, sentence)
    indexes.append(EOS_token)
    return torch.tensor(indexes, dtype=torch.long, device=device).view(-1, 1)

def tensorsFromPair(pair, input_lang, output_lang):
    input_tensor = tensorFromSentence(input_lang, pair[0])
    target_tensor = tensorFromSentence(output_lang, pair[1])
    return (input_tensor, target_tensor)

## Обучение

In [46]:
def train(input_tensor, target_tensor,
          encoder, decoder, encoder_optimizer, decoder_optimizer,
          criterion, max_length, rnn_type, teacher_forcing_ratio):

    if rnn_type.__name__ == 'LSTM':
        encoder_hidden = (encoder.initHidden(), encoder.initHidden())
    else:
        encoder_hidden = encoder.initHidden()

    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    input_length = input_tensor.size(0)
    target_length = target_tensor.size(0)

    encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

    loss = 0
    total_words = 0
    for ei in range(input_length):
        encoder_output, encoder_hidden = encoder(
            input_tensor[ei], encoder_hidden)
        encoder_outputs[ei] = encoder_output[0, 0]

    decoder_input = torch.tensor([[SOS_token]], device=device)

    decoder_hidden = encoder_hidden

    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False

    if use_teacher_forcing:
        # Teacher forcing: Feed the target as the next input

        for di in range(target_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input, decoder_hidden, encoder_outputs)

            loss += criterion(decoder_output, target_tensor[di])
            decoder_input = target_tensor[di]  # Teacher forcing
            total_words += target_tensor[di].numel()

    else:
        # Without teacher forcing: use its own predictions as the next input

        for di in range(target_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
            topv, topi = decoder_output.topk(1)
            decoder_input = topi.squeeze().detach()  # detach from history as input

            loss += criterion(decoder_output, target_tensor[di])
            total_words += target_tensor[di].numel()
            if decoder_input.item() == EOS_token:
                break

    loss.backward()

    encoder_optimizer.step()
    decoder_optimizer.step()
    avg_loss = loss / total_words
    perplexity = torch.exp(avg_loss)
    return loss.item() / target_length, perplexity.item()

## Для расчёта длительности обучения

In [48]:
def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (- %s)' % (asMinutes(s), asMinutes(rs))

## Итеративное обучение

In [50]:
def trainIters(pair_sentenses, encoder, decoder, learning_rate, n_iters,
               max_length, rnn_type, teacher_forcing_ratio, input_lan, output_lan,
               print_every=5000, plot_every=500):

    start = time.time()
    print_loss_total = 0
    print_loss_avg = 0
    print_perplexity = 0

    encoder_optimizer = optim.Adam(encoder.parameters())
    decoder_optimizer = optim.Adam(decoder.parameters())

    training_pairs = [tensorsFromPair(random.choice(pair_sentenses),
                                      input_lan, output_lan)
                                      for i in range(n_iters)]
    criterion = nn.NLLLoss()

    for iter in tqdm(range(1, n_iters + 1)):
        training_pair = training_pairs[iter - 1]
        input_tensor = training_pair[0]
        target_tensor = training_pair[1]

        loss, perplexity = train(input_tensor, target_tensor, encoder, decoder,
                     encoder_optimizer, decoder_optimizer, criterion,
                     max_length, rnn_type, teacher_forcing_ratio)

        print_loss_total += loss
        print_perplexity += perplexity

        if iter % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_perplexity_avg = print_perplexity / print_every
            print_loss_total = 0
            print_perplexity = 0

            print('%s (%d %d%%) %.4f %.4f'  % (timeSince(start, iter / n_iters),
                                         iter, iter / n_iters * 100, print_loss_avg, print_perplexity_avg ))
    return print_loss_avg, print_perplexity_avg, timeSince(start, iter / n_iters)

## Механизм внимания

In [52]:
def showAttention(input_sentence, output_words, attentions):
    # Set up figure with colorbar
    fig = plt.figure()
    ax = fig.add_subplot(111)
    cax = ax.matshow(attentions.numpy(), cmap='bone')
    fig.colorbar(cax)

    # Set up axes
    ax.set_xticklabels([''] + input_sentence.split(' ') +
                       ['<EOS>'], rotation=90)
    ax.set_yticklabels([''] + output_words)

    # Show label at every tick
    ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
    ax.yaxis.set_major_locator(ticker.MultipleLocator(1))

    plt.show()

In [53]:
def evaluateAndShowAttention(encoder2, decoder2, max_length, input_sentence):
    output_words, attentions = evaluate(
        encoder2, decoder2, input_sentence, max_length,
             rnn_type, input_lang, output_lang)
    print('input =', input_sentence)
    print('output =', ' '.join(output_words))
    showAttention(input_sentence, output_words, attentions)

## Оценка

In [55]:
def evaluate(encoder, decoder, sentence, max_length,
             rnn_type, inp_lang, out_lang):

    with torch.no_grad():
        input_tensor = tensorFromSentence(inp_lang, sentence)
        input_length = input_tensor.size()[0]

        if rnn_type.__name__ == 'LSTM':
            encoder_hidden = (encoder.initHidden(), encoder.initHidden())
        else:
            encoder_hidden = encoder.initHidden()

        encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

        for ei in range(input_length):
            encoder_output, encoder_hidden = encoder(input_tensor[ei],
                                                     encoder_hidden)
            encoder_outputs[ei] += encoder_output[0, 0]

        decoder_input = torch.tensor([[SOS_token]], device=device)  # SOS

        decoder_hidden = encoder_hidden

        decoded_words = []
        decoder_attentions = torch.zeros(max_length, max_length)

        for di in range(max_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
            decoder_attentions[di] = decoder_attention.data
            topv, topi = decoder_output.data.topk(1)
            if topi.item() == EOS_token:
                decoded_words.append('<EOS>')
                break
            else:
                decoded_words.append(out_lang.index2word[topi.item()])

            decoder_input = topi.squeeze().detach()
        return decoded_words, decoder_attentions[:di + 1]

In [56]:
def evaluateRandomly(pair_sen, encoder, decoder, max_length,
                     rnn_, in_lang, o_lang,  n=10):

    for i in range(n):
        pair = random.choice(pair_sen)
        print('>', pair[0])
        print('=', pair[1])
        output_words, _ = evaluate(encoder, decoder, pair[0], MAX_LENGTH,
                                rnn_, in_lang, o_lang)
        output_sentence = ' '.join(output_words)
        print('<', output_sentence)
        print('')

## Функция для тестирования модели

In [58]:
def test_model(max_length, hidden_size = 256,
               learn_rate = 0.006, prefixes=eng_prefixes, reverse=False,
               teach_force_ratio = 0.5, attn_type = 'scalar',
               num_iters = 500, every = 100):

        encoder2 = EncoderRNN(input_lang.n_words, hidden_size, rnn_type
                               ).to(device)
        if attn_type == 'scalar':
          decoder2 = AttnDecoderRNN1(hidden_size, output_lang.n_words, rnn_type
                                ).to(device)
        elif attn_type == 'mlp':
          decoder2 = AttnDecoderRNN2(hidden_size, output_lang.n_words, rnn_type
                                ).to(device)
        print(f'{rnn_type.__name__}, {attn_type} \n\ntraining')
        print('===========================')
        loss_, perplexity, learning_time = trainIters(pair_s, encoder2, decoder2, learn_rate, num_iters,
                          max_length, rnn_type, teach_force_ratio,
                          input_lang, output_lang, every)

        print('\nevaluation\n')
        evaluateRandomly(pair_s, encoder2, decoder2, max_length,
                         rnn_type, input_lang, output_lang)
        print('----------------------------------------------------------------------')
        string          = [rnn_type.__name__, attn_type, loss_, perplexity,  learning_time]
        df.loc[len(df)] = string

# Обучение

## Подготовительное

In [61]:
input_lang, output_lang, pair_s = prepareData(lang1, lang2, prefixes=eng_prefixes, reverse=True)
print(random.choice(pair_s))
df = pd.DataFrame(columns=cols)

Reading lines...
Read 496059 sentence pairs
Trimmed to 28719 sentence pairs
Counting words...
Counted words:
eng 10177
rus 4303
['я младше его на два года .', 'i m two years younger than he is .']


## seq2seq с механизмом внимания scalar (GRU)

In [63]:
rnn_type = nn.GRU
test_model(max_length = MAX_LENGTH,
           learn_rate = 0.0001, prefixes=eng_prefixes, reverse=True, teach_force_ratio = 0.5,
           attn_type = 'scalar', num_iters = 20000, every = 2000)

GRU, scalar 

training


  0%|          | 0/20000 [00:00<?, ?it/s]

0m 44s (- 6m 42s) (2000 10%) 3.1170 89.8668
1m 26s (- 5m 46s) (4000 20%) 2.7386 40.8410
2m 9s (- 5m 1s) (6000 30%) 2.4986 33.0894
2m 51s (- 4m 17s) (8000 40%) 2.3710 33.2025
3m 37s (- 3m 37s) (10000 50%) 2.2277 34.8826
4m 24s (- 2m 56s) (12000 60%) 2.1245 25.7380
5m 11s (- 2m 13s) (14000 70%) 2.0436 30.6652
6m 2s (- 1m 30s) (16000 80%) 1.9833 23.5864
6m 57s (- 0m 46s) (18000 90%) 1.8757 26.2345
7m 53s (- 0m 0s) (20000 100%) 1.8718 26.6177

evaluation

> мы закрыты до дальнеишего уведомления .
= we are closed until further notice .
< we are never satisfied . <EOS>

> у него неприятности .
= he is in trouble .
< he is in trouble . <EOS>

> я в хорошеи форме .
= i m in good shape .
< i m in good . <EOS>

> я к этому не готова .
= i m not ready for this .
< i m not ready to that . <EOS>

> боюсь что вы не можете поити туда .
= i m afraid you can t go there .
< i m afraid you can t there you . <EOS>

> вы наивны .
= you re naive .
< you re so . <EOS>

> я намного больше чем ты .
= i m a lot

## seq2seq с механизмом внимания scalar (LSTM)

In [65]:
rnn_type = nn.LSTM
test_model(max_length = MAX_LENGTH,
           learn_rate = 0.0001, prefixes=eng_prefixes, reverse=True, teach_force_ratio = 0.5,
           attn_type = 'scalar', num_iters = 20000, every = 2000)

LSTM, scalar 

training


  0%|          | 0/20000 [00:00<?, ?it/s]

0m 58s (- 8m 47s) (2000 10%) 3.1291 87.7061
1m 55s (- 7m 40s) (4000 20%) 2.7966 34.2899
2m 52s (- 6m 42s) (6000 30%) 2.6726 30.6411
3m 50s (- 5m 45s) (8000 40%) 2.5410 30.2487
4m 47s (- 4m 47s) (10000 50%) 2.4614 27.2598
5m 43s (- 3m 49s) (12000 60%) 2.3552 27.3516
6m 39s (- 2m 51s) (14000 70%) 2.2960 28.7493
7m 36s (- 1m 54s) (16000 80%) 2.2638 29.4868
8m 32s (- 0m 56s) (18000 90%) 2.1730 28.5271
9m 28s (- 0m 0s) (20000 100%) 2.1500 23.3005

evaluation

> я здесь не в отпуске .
= i m not here for a vacation .
< i m not in here here . <EOS>

> я не настолько глупа !
= i m not that stupid .
< i m not that . . <EOS>

> ты мне чего то недоговариваешь .
= you re not telling me something .
< you re going to need me . <EOS>

> я помогу тому .
= i m going to help tom .
< i m going to . . <EOS>

> ты командуешь .
= you re in command .
< you re a . <EOS>

> мы большие счастливчики .
= we re very fortunate .
< we re both in . <EOS>

> я привык к здешнему климату .
= i m accustomed to the climate

## seq2seq с механизмом внимания MLP (GRU)

In [67]:
rnn_type = nn.GRU
test_model(max_length = MAX_LENGTH,
           learn_rate = 0.0001, prefixes=eng_prefixes, reverse=True, teach_force_ratio = 0.5,
           attn_type = 'mlp', num_iters = 20000, every = 2000)

GRU, mlp 

training


  0%|          | 0/20000 [00:00<?, ?it/s]

0m 53s (- 7m 59s) (2000 10%) 3.2014 88.9710
1m 46s (- 7m 6s) (4000 20%) 2.7373 40.7191
2m 37s (- 6m 7s) (6000 30%) 2.5683 44.9015
3m 29s (- 5m 14s) (8000 40%) 2.3912 32.8919
4m 14s (- 4m 14s) (10000 50%) 2.2313 33.1566
4m 57s (- 3m 18s) (12000 60%) 2.0905 25.8470
5m 40s (- 2m 25s) (14000 70%) 2.0575 24.3882
6m 29s (- 1m 37s) (16000 80%) 2.0522 38.1707
7m 18s (- 0m 48s) (18000 90%) 1.9586 25.0213
8m 9s (- 0m 0s) (20000 100%) 1.9084 25.8520

evaluation

> она в плохом настроении .
= she s in a bad mood .
< she is in a bad . <EOS>

> я старше чем ты думаешь .
= i m older than you think i am .
< i m older than you than are you . <EOS>

> ему день ото дня становится лучше .
= he is getting better day by day .
< he s better to his her . . <EOS>

> я на три месяца младше вас .
= i m three months younger than you .
< i m three years younger than you . <EOS>

> они наши друзья .
= they re our friends .
< they re friends friends . <EOS>

> он сегодня берет выходнои .
= he is taking a day off tod

## seq2seq с механизмом внимания MLP (LSTM)

In [69]:
rnn_type = nn.LSTM
test_model(max_length = MAX_LENGTH,
           learn_rate = 0.0001, prefixes=eng_prefixes, reverse=True, teach_force_ratio = 0.5,
           attn_type = 'mlp', num_iters = 20000, every = 2000)

LSTM, mlp 

training


  0%|          | 0/20000 [00:00<?, ?it/s]

0m 51s (- 7m 42s) (2000 10%) 3.2005 87.0836
1m 44s (- 6m 57s) (4000 20%) 2.8400 36.7685
2m 34s (- 6m 1s) (6000 30%) 2.7034 33.0238
3m 22s (- 5m 4s) (8000 40%) 2.5722 28.1855
4m 12s (- 4m 12s) (10000 50%) 2.4540 33.0975
5m 4s (- 3m 22s) (12000 60%) 2.4229 26.9914
5m 56s (- 2m 32s) (14000 70%) 2.3337 26.9467
6m 46s (- 1m 41s) (16000 80%) 2.2707 28.8761
7m 34s (- 0m 50s) (18000 90%) 2.2328 26.9653
8m 22s (- 0m 0s) (20000 100%) 2.1435 26.4678

evaluation

> я напугана .
= i m frightened .
< i m a . <EOS>

> я уже так опаздываю .
= i m already so late .
< i m just a student . <EOS>

> мы не живем вместе .
= we re separated .
< we re not longer anymore . <EOS>

> он лучше нас всех .
= he s better than us all .
< he is the very of . <EOS>

> вы следующии в очереди на повышение .
= you are the next in line for promotion .
< you re the best at the best . <EOS>

> мы боимся смерти .
= we re afraid of death .
< we re going to . . <EOS>

> я рад что ты здесь .
= i m glad you re here .
< i m glad y

# Итоги

In [72]:
df.sort_values(by='loss')

Unnamed: 0,RNN_Type,attn_types,loss,perplexity,learning_time
0,GRU,scalar,1.871842,26.617664,7m 53s (- 0m 0s)
2,GRU,mlp,1.908412,25.852019,8m 9s (- 0m 0s)
3,LSTM,mlp,2.143499,26.467788,8m 22s (- 0m 0s)
1,LSTM,scalar,2.150015,23.300525,9m 28s (- 0m 0s)


Лучшее качество модели по loss показала модель вида GRU с механизмом внимания scalar.