# Task

Машинный перевод на базе декодера-энкодера RNN

# Libraries

In [1]:
!pip install --upgrade torch 



In [2]:
import os
import unicodedata
import re
import random

import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
plt.switch_backend('agg')

import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
os.listdir()

['translator.ipynb', 'Лекция 9.ipynb', 'data', 'venv']

In [4]:
os.listdir('data')

['eng-fra.txt', 'names']

In [5]:
with open('data/eng-fra.txt', 'rt', encoding='utf-8') as file:
    text = file.readlines()

In [6]:
text[-10:]

 "No matter how much you try to convince people that chocolate is vanilla, it'll still be chocolate, even though you may manage to convince yourself and a few others that it's vanilla.\tPeu importe le temps que tu passeras à essayer de convaincre les gens que le chocolat est de la vanille, ça restera toujours du chocolat, même si tu réussis à convaincre toi et quelques autres que c'est de la vanille.\n",
 "A child who is a native speaker usually knows many things about his or her language that a non-native speaker who has been studying for years still does not know and perhaps will never know.\tUn enfant qui est un locuteur natif connaît habituellement de nombreuses choses sur son langage qu'un locuteur non-natif qui a étudié pendant des années ignore encore et peut-être ne saura jamais.\n",
 "There are four main causes of alcohol-related death. Injury from car accidents or violence is one. Diseases like cirrhosis of the liver, cancer, heart and blood system diseases are the others.\tI

In [7]:
SOS_token = 0
EOS_token = 1

class Lang:
    def __init__(self, name):
        self.name = name # language
        self.word2index = {} # индекс слова
        self.word2count = {} # счетчик слова
        # токен слова
        self.index2word = {0: "SOS", # starts of sentence
                           1: "EOS"} # end of sentence
        self.n_words = 2  # Count SOS and EOS количество слов

    def addSentence(self, sentence):
        """Добавление предложения"""
        for word in sentence.split(' '): # разбивка через пробел
            self.addWord(word) # добавление слова

    def addWord(self, word):
        """Добавление слова"""
        if word not in self.word2index: # при новом слове
            self.word2index[word] = self.n_words # индекс слова
            self.word2count[word] = 1  # количетво слов 
            self.index2word[self.n_words] = word # слово по индексу
            self.n_words += 1
        else:
            self.word2count[word] += 1  # величение счетчика данного слова

In [8]:
def unicodeToAscii(s):
    return ''.join(c for c in unicodedata.normalize('NFD', s) if unicodedata.category(c) != 'Mn')

def normalizeString(s):
    """нормализация текста путем перевода в нижний регистр и обрезки краевых пробелов"""
    s = unicodeToAscii(s.lower().strip()) 
    s = re.sub(r"([.!?])", r" \1", s) # между данными знаками пунктуации вставляем пробел
    s = re.sub(r"[^a-zA-Z.!?]+", r" ", s) # 
    return s

In [9]:
def readLangs(lang1, lang2, reverse=False):

    # Read the file and split into lines
    with open(f'data/{lang1}-{lang2}.txt', encoding='utf-8') as file:
        lines = file.read().strip().split('\n')

    # Split every line into pairs and normalize
    pairs = [[normalizeString(s) for s in l.split('\t')] for l in lines]

    # Reverse pairs, make Lang instances
    if reverse:
        pairs = [list(reversed(p)) for p in pairs]
        input_lang = Lang(lang2)
        output_lang = Lang(lang1)
    else:
        input_lang = Lang(lang1)
        output_lang = Lang(lang2)

    return input_lang, output_lang, pairs

сокращаем использование памяти

In [10]:
MAX_LENGTH = 10

# сокращения
eng_prefixes = (
    "i am ", "i m ",
    "he is", "he s ",
    "she is", "she s",
    "you are", "you re ",
    "we are", "we re ",
    "they are", "they re "
)


def filterPair(p) -> bool:
    return all((len(p[0].split(' ')) < MAX_LENGTH, len(p[1].split(' ')) < MAX_LENGTH, p[1].startswith(eng_prefixes)))


def filterPairs(pairs):
    """Фильтрация пар"""
    return [pair for pair in pairs if filterPair(pair)]

In [11]:
def prepareData(lang1, lang2, reverse=False):
    input_lang, output_lang, pairs = readLangs(lang1, lang2, reverse)
    print(f"Read {len(pairs)} sentence pairs")
    pairs = filterPairs(pairs)
    print("Trimmed to %s sentence pairs" % len(pairs))
    for pair in pairs:
        input_lang.addSentence(pair[0])
        output_lang.addSentence(pair[1])
    print("Counted words:")
    print(input_lang.name, input_lang.n_words)
    print(output_lang.name, output_lang.n_words)
    return input_lang, output_lang, pairs


input_lang, output_lang, pairs = prepareData('eng', 'fra', True)
print(random.choice(pairs))

Read 135842 sentence pairs
Trimmed to 10853 sentence pairs
Counting words...
Counted words:
fra 4489
eng 2925
['j apprends vite .', 'i m a fast learner .']


The Encoder
-----------





In [12]:
class EncoderRNN(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(EncoderRNN, self).__init__()
        self.hidden_size = hidden_size

        self.embedding = nn.Embedding(input_size, hidden_size) # слой эмбедингов
        self.gru = nn.GRU(hidden_size, hidden_size) # ячейка gru

    def forward(self, input, hidden):
        embedded = self.embedding(input).view(1, 1, -1) # получаем эмбединг и приводим к нужному виду
        output = embedded
        output, hidden = self.gru(output, hidden) # проброс в gru
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device) # заполнение нулями

The Decoder
-----------




In [13]:
class DecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size):
        super(DecoderRNN, self).__init__()
        self.hidden_size = hidden_size

        self.embedding = nn.Embedding(output_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)
        self.out = nn.Linear(hidden_size, output_size) # скрытое представление в 
        self.softmax = nn.LogSoftmax(dim=1) # нормирование чтобы суммирование в единицу

    def forward(self, input, hidden):
        output = self.embedding(input).view(1, 1, -1) # получаем эмбединг и правим его размерность
        output = F.relu(output) # ф-я активации
        output, hidden = self.gru(output, hidden) # gru
        output = self.softmax(self.out(output[0])) # поносвязный слой
        return output, hidden

    def initHidden(self):
        """Функция инициализации скрытого состояния"""
        return torch.zeros(1, 1, self.hidden_size, device=device)

In [14]:
def indexesFromSentence(lang, sentence):
    """Функция перевода предложения в последовательность индексов"""
    return [lang.word2index[word] for word in sentence.split(' ')]


def tensorFromSentence(lang, sentence):
    """Функция преобразование предлоения в тензор"""
    indexes = indexesFromSentence(lang, sentence)
    indexes.append(EOS_token) # конец предложения
    return torch.tensor(indexes, dtype=torch.long, device=device).view(-1, 1)


def tensorsFromPair(pair):
    """Функция перевода в тензор пары предложения"""
    input_tensor = tensorFromSentence(input_lang, pair[0])
    target_tensor = tensorFromSentence(output_lang, pair[1])
    return (input_tensor, target_tensor)

In [15]:
teacher_forcing_ratio = 0.5 # в половине случаев будет случаться в половине -нет


def train(input_tensor: torch.tensor, target_tensor: torch.tensor, 
          encoder, decoder, 
          encoder_optimizer, decoder_optimizer, 
          criterion, #loss function
          max_length=MAX_LENGTH, # ограничение генерации
          ):
    """Получение на 1 предложении"""
    encoder_hidden = encoder.initHidden() # первое скрытое представление

    # сброс градиентов
    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    # тензор из индексов входного и выходного предложения
    input_length, target_length = input_tensor.size(0), target_tensor.size(0) 

    encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device) # хранилище

    loss = 0

    for ei in range(input_length): # пропускание по слову через encoder
        # обновленный контекс и представление очередного слова
        encoder_output, encoder_hidden = encoder(input_tensor[ei], # индекс токена
                                                 encoder_hidden) # текущее представление
        encoder_outputs[ei] = encoder_output[0, 0] # пусть лежит тут

    decoder_input = torch.tensor([[SOS_token]], device=device) # стартовый токен

    decoder_hidden = encoder_hidden # передача скрытого рпедставления из encoder в decoder

    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False # применение teacher_forcing_ratio или нет

    if use_teacher_forcing:
        # Teacher forcing: Feed the target as the next input
        for di in range(target_length): # бежим по целевому тензору
            # распределение вероятности следующего слова и скрытый слой после генерации для последующих слов
            decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
            loss += criterion(decoder_output, target_tensor[di])
            decoder_input = target_tensor[di]  # Teacher forcing
    else:
        # Without teacher forcing: use its own predictions as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
            topv, topi = decoder_output.topk(1) # топ 1 лучшего токена слова
            decoder_input = topi.squeeze().detach()  # detach from history as input

            loss += criterion(decoder_output, target_tensor[di])
            if decoder_input.item() == EOS_token: break

    loss.backward()

    encoder_optimizer.step()
    decoder_optimizer.step()

    return loss.item() / target_length

In [16]:
import time


def asMinutes(s):
    m = s // 60
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (- %s)' % (asMinutes(s), asMinutes(rs))

In [None]:
def showPlot(points):
    plt.figure()
    fig, ax = plt.subplots()
    # this locator puts ticks at regular intervals
    loc = ticker.MultipleLocator(base=0.2)
    ax.yaxis.set_major_locator(loc)
    plt.plot(points)

In [17]:
def trainIters(encoder, decoder, 
               n_iters, # эпохи
               print_every=1_000, plot_every=100, learning_rate=0.01):
    """Тренировка на парах"""
    start = time.time()
    plot_losses = [] # ошибки для прорисовки
    print_loss_total = 0  # Reset every print_every
    plot_loss_total = 0  # Reset every plot_every

    encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate) # оптимизаторы
    decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)
    training_pairs = [tensorsFromPair(random.choice(pairs)) for i in range(n_iters)] # тензоры пар
    criterion = nn.NLLLoss() # негативный логарифм правдоподобия

    for iter in range(1, n_iters + 1):
        training_pair = training_pairs[iter - 1] # выбор пары
        input_tensor = training_pair[0] # выходной тензор
        target_tensor = training_pair[1] # выходной тензор
 
        loss = train(input_tensor, target_tensor, 
                     encoder, decoder, 
                     encoder_optimizer, decoder_optimizer, 
                     criterion)
        print_loss_total += loss
        plot_loss_total += loss

        if iter % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            print('%s (%d %d%%) %.4f' % (timeSince(start, iter / n_iters),
                                         iter, iter / n_iters * 100, print_loss_avg))

        if iter % plot_every == 0:
            plot_loss_avg = plot_loss_total / plot_every
            plot_losses.append(plot_loss_avg)
            plot_loss_total = 0

    showPlot(plot_losses)

In [19]:
def evaluate(encoder, decoder, sentence, max_length=MAX_LENGTH):
    """Применение модели для перевода"""
    with torch.no_grad(): # отключение градиента
        input_tensor = tensorFromSentence(input_lang, sentence) # перевод в тензор
        input_length = input_tensor.size()[0] 
        encoder_hidden = encoder.initHidden() # инициализоруем скрытое представление

        encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device) # инициализация срытого представления

        for ei in range(input_length): # прогоняем данные через encoder
            encoder_output, encoder_hidden = encoder(input_tensor[ei], encoder_hidden)
            encoder_outputs[ei] += encoder_output[0, 0]

        decoder_input = torch.tensor([[SOS_token]], device=device)  # создаем стартовый токен SOS

        decoder_hidden = encoder_hidden # передача скрытого состояния из encoder в decoder

        decoded_words = [] # пустой список сгенерированных слов

        for di in range(max_length): # генерация до максимально предела по количеству
            decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
            topv, topi = decoder_output.data.topk(1) # топ 1 по вероятности индекс слова
            if topi.item() == EOS_token:
                decoded_words.append('<EOS>')
                break
            else:
                decoded_words.append(output_lang.index2word[topi.item()]) # получение слова по индексу

            decoder_input = topi.squeeze().detach() # отброс единичной размерности и отбрасываем градиенты

        return decoded_words

In [20]:
def evaluateRandomly(encoder, decoder, n=10):
    """Демнтсрация"""
    for _ in range(n):
        pair = random.choice(pairs) # берет случайную пару 
        print('>', pair[0]) # входное предложение
        print('=', pair[1]) # выходное предложение
        output_words = evaluate(encoder, decoder, pair[0]) # перевод предложения
        output_sentence = ' '.join(output_words)
        print('<', output_sentence) # печать сгенерированного рпедложения
        print()

In [21]:
hidden_size = 256
encoder1 = EncoderRNN(input_lang.n_words, hidden_size).to(device)
decoder1 = DecoderRNN(hidden_size, output_lang.n_words).to(device)

trainIters(encoder1, decoder1, 75_000, print_every=5_000)

1m 56s (- 27m 16s) (5000 6%) 2.9716
3m 40s (- 23m 52s) (10000 13%) 2.3972
5m 26s (- 21m 44s) (15000 20%) 2.0621
7m 17s (- 20m 4s) (20000 26%) 1.8393
8m 59s (- 17m 58s) (25000 33%) 1.5858
10m 47s (- 16m 11s) (30000 40%) 1.4390
12m 35s (- 14m 23s) (35000 46%) 1.2647
14m 28s (- 12m 39s) (40000 53%) 1.1325
16m 16s (- 10m 50s) (45000 60%) 1.0142
18m 10s (- 9m 5s) (50000 66%) 0.9339
20m 6s (- 7m 18s) (55000 73%) 0.8228
21m 59s (- 5m 29s) (60000 80%) 0.7615
23m 51s (- 3m 40s) (65000 86%) 0.6448
25m 45s (- 1m 50s) (70000 93%) 0.5902
27m 37s (- 0m 0s) (75000 100%) 0.5340


In [22]:
evaluateRandomly(encoder1, decoder1)

> quel idiot vous etes !
= you re such an idiot !
< you re such an idiot ! <EOS>

> nous en avons fini ici .
= we re finished here .
< we re finished here . <EOS>

> ils preparent un mauvais tour .
= they re up to no good .
< they re up a good . <EOS>

> vous etes grognon .
= you re grumpy .
< you re grumpy . <EOS>

> nous n en sommes pas fiers .
= we re not proud of it .
< we re not proud of it . <EOS>

> il se plaint tout le temps .
= he is always complaining .
< he is always complaining . <EOS>

> elle est fiere de sa fille .
= she is proud of her daughter .
< she is proud of her daughter . <EOS>

> tu es tres talentueux .
= you re very talented .
< you re very talented . <EOS>

> il est tres deprime .
= he is very depressed .
< he is very depressed . <EOS>

> je plaisante .
= i am joking .
< i m joking . <EOS>



# Mechanism attention

The Decoder
-----------




In [24]:
class AttnDecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size, dropout_p=0.1, max_length=MAX_LENGTH):
        super(AttnDecoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.dropout_p = dropout_p
        self.max_length = max_length

        self.embedding = nn.Embedding(self.output_size, self.hidden_size)
        self.attn = nn.Linear(self.hidden_size * 2, self.max_length) # Механизм Внимания
        self.attn_combine = nn.Linear(self.hidden_size * 2, self.hidden_size) # слой агрегации информации
        self.dropout = nn.Dropout(self.dropout_p) # предотвращение переобучения
        self.gru = nn.GRU(self.hidden_size, self.hidden_size)
        self.out = nn.Linear(self.hidden_size, self.output_size)

    def forward(self, input, # входное слово
                hidden, # скрытое представление с контекстом
                encoder_outputs): # хранилище информации механизма внимания
        embedded = self.embedding(input).view(1, 1, -1) # получение ембединга и образмеривание
        embedded = self.dropout(embedded)

        attn_weights = F.softmax(self.attn(torch.cat((embedded[0], hidden[0]), 1)), dim=1) # конкатенация и прогоняем через слой внимания и нормирование softmax
        attn_applied = torch.bmm(attn_weights.unsqueeze(0), encoder_outputs.unsqueeze(0)) 

        output = torch.cat((embedded[0], attn_applied[0]), 1)
        output = self.attn_combine(output).unsqueeze(0) # агрегация информации весов внимания с исходным эмбедингом

        output = F.relu(output) # ф-я активации
        output, hidden = self.gru(output, hidden)

        output = F.log_softmax(self.out(output[0]), dim=1) # рспределение вероятности
        return output, hidden, attn_weights

    def initHidden(self):
        """Функция инициализации скрытого состояния"""
        return torch.zeros(1, 1, self.hidden_size, device=device)

In [25]:
def indexesFromSentence(lang, sentence):
    return [lang.word2index[word] for word in sentence.split(' ')]


def tensorFromSentence(lang, sentence):
    indexes = indexesFromSentence(lang, sentence)
    indexes.append(EOS_token)
    return torch.tensor(indexes, dtype=torch.long, device=device).view(-1, 1)


def tensorsFromPair(pair):
    input_tensor = tensorFromSentence(input_lang, pair[0])
    target_tensor = tensorFromSentence(output_lang, pair[1])
    return (input_tensor, target_tensor)

In [26]:
teacher_forcing_ratio = 0.5


def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=MAX_LENGTH):
    encoder_hidden = encoder.initHidden()

    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    input_length = input_tensor.size(0)
    target_length = target_tensor.size(0)

    encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

    loss = 0

    for ei in range(input_length):
        encoder_output, encoder_hidden = encoder(input_tensor[ei], encoder_hidden)
        encoder_outputs[ei] = encoder_output[0, 0]

    decoder_input = torch.tensor([[SOS_token]], device=device)

    decoder_hidden = encoder_hidden

    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False

    if use_teacher_forcing:
        # Teacher forcing: Feed the target as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(decoder_input, decoder_hidden, encoder_outputs)
            loss += criterion(decoder_output, target_tensor[di])
            decoder_input = target_tensor[di]  # Teacher forcing

    else:
        # Without teacher forcing: use its own predictions as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(decoder_input, decoder_hidden, encoder_outputs)
            topv, topi = decoder_output.topk(1)
            decoder_input = topi.squeeze().detach()  # detach from history as input

            loss += criterion(decoder_output, target_tensor[di])
            if decoder_input.item() == EOS_token:
                break

    loss.backward()

    encoder_optimizer.step()
    decoder_optimizer.step()

    return loss.item() / target_length

In [27]:
import time
import math


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (- %s)' % (asMinutes(s), asMinutes(rs))

In [28]:
def trainIters(encoder, decoder, n_iters, print_every=1_000, plot_every=100, learning_rate=0.01):
    start = time.time()
    plot_losses = []
    print_loss_total = 0  # Reset every print_every
    plot_loss_total = 0  # Reset every plot_every

    encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)
    training_pairs = [tensorsFromPair(random.choice(pairs)) for i in range(n_iters)]
    criterion = nn.NLLLoss()

    for iter in range(1, n_iters + 1):
        training_pair = training_pairs[iter - 1]
        input_tensor = training_pair[0]
        target_tensor = training_pair[1]

        loss = train(input_tensor, target_tensor, encoder,
                     decoder, encoder_optimizer, decoder_optimizer, criterion)
        print_loss_total += loss
        plot_loss_total += loss

        if iter % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            print('%s (%d %d%%) %.4f' % (timeSince(start, iter / n_iters),
                                         iter, iter / n_iters * 100, print_loss_avg))

        if iter % plot_every == 0:
            plot_loss_avg = plot_loss_total / plot_every
            plot_losses.append(plot_loss_avg)
            plot_loss_total = 0

    showPlot(plot_losses)

In [29]:
def showPlot(points):
    plt.figure()
    fig, ax = plt.subplots()
    # this locator puts ticks at regular intervals
    loc = ticker.MultipleLocator(base=0.2)
    ax.yaxis.set_major_locator(loc)
    plt.plot(points)

In [30]:
def evaluate(encoder, decoder, sentence, max_length=MAX_LENGTH):
    with torch.no_grad():
        input_tensor = tensorFromSentence(input_lang, sentence)
        input_length = input_tensor.size()[0]
        encoder_hidden = encoder.initHidden()

        encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

        for ei in range(input_length):
            encoder_output, encoder_hidden = encoder(input_tensor[ei],
                                                     encoder_hidden)
            encoder_outputs[ei] += encoder_output[0, 0]

        decoder_input = torch.tensor([[SOS_token]], device=device)  # SOS

        decoder_hidden = encoder_hidden

        decoded_words = []
        decoder_attentions = torch.zeros(max_length, max_length) # складывание векторов внимания после слова

        for di in range(max_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(decoder_input, decoder_hidden, encoder_outputs)
            decoder_attentions[di] = decoder_attention.data # кладем вектор внимание в хранилище
            topv, topi = decoder_output.data.topk(1)
            if topi.item() == EOS_token:
                decoded_words.append('<EOS>')
                break
            else:
                decoded_words.append(output_lang.index2word[topi.item()]) # добавление слова из словаря

            decoder_input = topi.squeeze().detach()

        return decoded_words, decoder_attentions[:di + 1]

In [31]:
def evaluateRandomly(encoder, decoder, n=10):
    for i in range(n):
        pair = random.choice(pairs)
        print('>', pair[0])
        print('=', pair[1])
        output_words, attentions = evaluate(encoder, decoder, pair[0])
        output_sentence = ' '.join(output_words)
        print('<', output_sentence)
        print('')

In [32]:
hidden_size = 256
encoder1 = EncoderRNN(input_lang.n_words, hidden_size).to(device)
attn_decoder1 = AttnDecoderRNN(hidden_size, output_lang.n_words, dropout_p=0.1).to(device)

trainIters(encoder1, attn_decoder1, 75_000, print_every=5_000)

2m 21s (- 32m 56s) (5000 6%) 2.9029
4m 34s (- 29m 47s) (10000 13%) 2.3525
6m 51s (- 27m 24s) (15000 20%) 2.0323
9m 13s (- 25m 22s) (20000 26%) 1.7840
11m 33s (- 23m 6s) (25000 33%) 1.6505
14m 5s (- 21m 8s) (30000 40%) 1.4350
16m 25s (- 18m 46s) (35000 46%) 1.2829
18m 46s (- 16m 25s) (40000 53%) 1.1732
21m 12s (- 14m 8s) (45000 60%) 1.0390
23m 38s (- 11m 49s) (50000 66%) 0.9578
26m 0s (- 9m 27s) (55000 73%) 0.8740
28m 25s (- 7m 6s) (60000 80%) 0.8004
30m 58s (- 4m 45s) (65000 86%) 0.7378
33m 22s (- 2m 23s) (70000 93%) 0.6563
36m 50s (- 0m 0s) (75000 100%) 0.6050


In [33]:
evaluateRandomly(encoder1, attn_decoder1)

> je mange un sandwich .
= i m eating a sandwich .
< i m eating a sandwich . <EOS>

> tu es incroyable .
= you re amazing .
< you re amazing . <EOS>

> je suis sournois .
= i m sneaky .
< i m sneaky . <EOS>

> vous n etes pas tres amusants .
= you re not very funny .
< you re not very funny . <EOS>

> je suis americain .
= i am american .
< i am american . <EOS>

> il va me l expliquer .
= he is going to explain it to me .
< he is going to me it . <EOS>

> je suis a la prison .
= i m at the prison .
< i m in the prison . <EOS>

> il se trouve en prison .
= he s in prison .
< he s in prison . <EOS>

> je ne crains personne .
= i m not afraid of anyone .
< i m not afraid of you . <EOS>

> elle lui a fait un proces .
= she sued him .
< she sued him . <EOS>



In [34]:
output_words, attentions = evaluate(encoder1, attn_decoder1, "je suis trop froid .")
plt.matshow(attentions.numpy())
plt.show()

  plt.show()


In [35]:
def showAttention(input_sentence, output_words, attentions):
    """Отрисовка внимания"""
    # Set up figure with colorbar
    fig = plt.figure()
    ax = fig.add_subplot(111)
    cax = ax.matshow(attentions.numpy(), cmap='bone')
    fig.colorbar(cax)

    # Set up axes
    ax.set_xticklabels([''] + input_sentence.split(' ') +
                       ['<EOS>'], rotation=90)
    ax.set_yticklabels([''] + output_words)

    # Show label at every tick
    ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
    ax.yaxis.set_major_locator(ticker.MultipleLocator(1))

    plt.show()


def evaluateAndShowAttention(input_sentence):
    output_words, attentions = evaluate(
        encoder1, attn_decoder1, input_sentence)
    print('input =', input_sentence)
    print('output =', ' '.join(output_words))
    showAttention(input_sentence, output_words, attentions)


evaluateAndShowAttention("elle a cinq ans de moins que moi .")

evaluateAndShowAttention("elle est trop petit .")

evaluateAndShowAttention("je ne crains pas de mourir .")

evaluateAndShowAttention("c est un jeune directeur plein de talent .")

input = elle a cinq ans de moins que moi .
output = she is five years younger than me . <EOS>
input = elle est trop petit .
output = she is too short . <EOS>
input = je ne crains pas de mourir .
output = i m not scared to die . <EOS>
input = c est un jeune directeur plein de talent .
output = he s a talented young young . <EOS>


  ax.set_xticklabels([''] + input_sentence.split(' ') +
  ax.set_yticklabels([''] + output_words)
  plt.show()
  ax.set_xticklabels([''] + input_sentence.split(' ') +
  ax.set_yticklabels([''] + output_words)
  plt.show()
  ax.set_xticklabels([''] + input_sentence.split(' ') +
  ax.set_yticklabels([''] + output_words)
  plt.show()
  ax.set_xticklabels([''] + input_sentence.split(' ') +
  ax.set_yticklabels([''] + output_words)
  plt.show()
