In [1]:
from io import open
import unicodedata
import string
import re
import random
import requests
from zipfile import ZipFile
import io
import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
SOS_token = 0
EOS_token = 1
# объект, который позволит работать с языком

class Lang:
    def __init__(self, name):
        self.name = name # сохраняется имя
        self.word2index = {} # словарь для маппинга слов в индексы.
        self.word2count = {} # словарь для подсчета количества встреченных слов.
        self.index2word = {0: "SOS", 1: "EOS"} # словарь для маппинга индексов обратно в слова. Изначально содержит специальные токены SOS и EOS
        self.n_words = 2  # Count SOS and EOS

    def addSentence(self, sentence):#Метод добавления предложения 
        for word in sentence.split(' '):
            self.addWord(word)

    def addWord(self, word): #Метод добавления слова 
        if word not in self.word2index:
            self.word2index[word] = self.n_words
            self.word2count[word] = 1
            self.index2word[self.n_words] = word
            self.n_words += 1
        else:
            self.word2count[word] += 1

In [3]:
def unicodeToAscii(s):#принимает строку s и возвращает ее ASCII-представление, удаляя диакритические знаки (акценты, тильды и т. д.)
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn'
    )



def normalizeString(s):# принимает строку s и возвращает нормализованную версию этой строки.
    s = unicodeToAscii(s.lower().strip())
    s = s.lower()
    s = re.sub('[.!?]','',s)
    #s = re.sub(r"([.!?])", r" \1", s)
    #s = re.sub(r"[^a-zA-Z.!?]+", r" ", s)
    return s

In [4]:
def readLangs(lang1, lang2, reverse=False):
    print("Reading lines...")

    # Read the file and split into lines
    lines = open('C:\\Users\\79169\\Desktop\\домашка\\DLL\\%s-%s.txt' % (lang1, lang2), encoding='utf-8').\
        read().strip().split('\n')
    lines = [i.split('\tCC-BY', 1)[0] for i in lines]
    # Split every line into pairs and normalize
    pairs = [[normalizeString(s) for s in l.split('\t')] for l in lines]

    # Reverse pairs, make Lang instances
    if reverse:
        pairs = [list(reversed(p)) for p in pairs]
        input_lang = Lang(lang2)
        output_lang = Lang(lang1)
    else:
        input_lang = Lang(lang1)
        output_lang = Lang(lang2)

    return input_lang, output_lang, pairs

In [29]:
MAX_LENGTH = 10

eng_prefixes = (
    "i am ", "i m ",
    "he is", "he s ",
    "she is", "she s",
    "you are", "you re ",
    "we are", "we re ",
    "they are", "they re "
)


def filterPair(p):
    return len(p[0].split(' ')) < MAX_LENGTH and \
        len(p[1].split(' ')) < MAX_LENGTH and \
        p[1].startswith(eng_prefixes)


def filterPairs(pairs):
    return [pair for pair in pairs if filterPair(pair)]

In [6]:
def prepareData(lang1, lang2, reverse=False):
    input_lang, output_lang, pairs = readLangs(lang1, lang2, reverse)
    print("Read %s sentence pairs" % len(pairs))
    pairs = filterPairs(pairs)
    print("Trimmed to %s sentence pairs" % len(pairs))
    print("Counting words...")
    for pair in pairs:
        input_lang.addSentence(pair[0])
        output_lang.addSentence(pair[1])
    print("Counted words:")
    print(input_lang.name, input_lang.n_words)
    print(output_lang.name, output_lang.n_words)
    return input_lang, output_lang, pairs


input_lang, output_lang, pairs = prepareData('eng', 'rus', True)
print(random.choice(pairs))

Reading lines...
Read 487600 sentence pairs
Trimmed to 4679 sentence pairs
Counting words...
Counted words:
rus 4305
eng 2236
['я сеичас играю на гитаре', 'i am playing the guitar now']


In [17]:
class EncoderRNN(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(EncoderRNN, self).__init__()
        self.hidden_size = hidden_size

        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)

    def forward(self, input, hidden):
        embedded = self.embedding(input).view(1, 1, -1)
        output = embedded
        output, hidden = self.gru(output, hidden)
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

In [8]:
class AttnDecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size, dropout_p=0.1, max_length=MAX_LENGTH):
        super(AttnDecoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.dropout_p = dropout_p
        self.max_length = max_length

        self.embedding = nn.Embedding(self.output_size, self.hidden_size)
        self.attn = nn.Linear(self.hidden_size * 2, self.max_length)
        self.attn_combine = nn.Linear(self.hidden_size * 2, self.hidden_size)
        self.dropout = nn.Dropout(self.dropout_p)
        self.gru = nn.GRU(self.hidden_size, self.hidden_size)
        self.out = nn.Linear(self.hidden_size, self.output_size)

    def forward(self, input, hidden, encoder_outputs):
     embedded = self.embedding(input).view(1, 1, -1)
     embedded = self.dropout(embedded)

     attn_weights = F.softmax(self.attn(torch.cat((embedded, hidden), 2)), dim=-1)
     attn_applied = torch.bmm(attn_weights, encoder_outputs.transpose(0, 1))

     output = torch.cat((embedded, attn_applied.transpose(0, 1)), 2)
     output = self.attn_combine(output)

     output = F.relu(output)
     output, hidden = self.gru(output, hidden)

     output = F.log_softmax(self.out(output[0]), dim=1)
     return output, hidden, attn_weights

In [9]:
def indexesFromSentence(lang, sentence):
    return [lang.word2index[word] for word in sentence.split(' ')]


def tensorFromSentence(lang, sentence):
    indexes = indexesFromSentence(lang, sentence)
    indexes.append(EOS_token)
    return torch.tensor(indexes, dtype=torch.long, device=device).view(-1, 1)


def tensorsFromPair(pair):
    input_tensor = tensorFromSentence(input_lang, pair[0])
    target_tensor = tensorFromSentence(output_lang, pair[1])
    return (input_tensor, target_tensor)

In [10]:
teacher_forcing_ratio = 0.5


def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=MAX_LENGTH):
    encoder_hidden = encoder.initHidden()

    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    input_length = input_tensor.size(0)
    target_length = target_tensor.size(0)

    encoder_outputs = torch.zeros(max_length, 1, encoder.hidden_size, device=device)
   

    loss = 0

    for ei in range(input_length):
        encoder_output, encoder_hidden = encoder(
            input_tensor[ei], encoder_hidden)
        encoder_outputs[ei] = encoder_output[0]

    decoder_input = torch.tensor([[SOS_token]], device=device).view(1, 1, -1)

    decoder_hidden = encoder_hidden

    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False

    if use_teacher_forcing:
        # Teacher forcing: Feed the target as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
            loss += criterion(decoder_output, target_tensor[di])
            decoder_input = target_tensor[di]  # Teacher forcing

    else:
        # Without teacher forcing: use its own predictions as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
            topv, topi = decoder_output.topk(1)
            decoder_input = topi.view(1, 1, -1).detach()  # detach from history as input

            loss += criterion(decoder_output, target_tensor[di])
            if decoder_input.item() == EOS_token:
                break

    loss.backward()

    encoder_optimizer.step()
    decoder_optimizer.step()

    return loss.item() / target_length

In [11]:
import time
import math


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (- %s)' % (asMinutes(s), asMinutes(rs))

In [12]:
import matplotlib.pyplot as plt
plt.switch_backend('agg')
import matplotlib.ticker as ticker
import numpy as np


def showPlot(points):
    plt.figure()
    fig, ax = plt.subplots()
    # this locator puts ticks at regular intervals
    loc = ticker.MultipleLocator(base=0.2)
    ax.yaxis.set_major_locator(loc)
    plt.plot(points)

In [16]:
def trainIters(encoder, decoder, n_iters, print_every=1000, plot_every=100, learning_rate=0.01):
    start = time.time()
    plot_losses = []
    print_loss_total = 0  # Reset every print_every
    plot_loss_total = 0  # Reset every plot_every

    encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)
    training_pairs = [tensorsFromPair(random.choice(pairs))
                      for i in range(n_iters)]
    criterion = nn.NLLLoss()

    for iter in range(1, n_iters + 1):
        training_pair = training_pairs[iter - 1]
        input_tensor = training_pair[0]
        target_tensor = training_pair[1]

        loss = train(input_tensor, target_tensor, encoder,
                     decoder, encoder_optimizer, decoder_optimizer, criterion)
        print_loss_total += loss
        plot_loss_total += loss

        if iter % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            print('%s (%d %d%%) %.4f' % (timeSince(start, iter / n_iters),
                                         iter, iter / n_iters * 100, print_loss_avg))

        if iter % plot_every == 0:
            plot_loss_avg = plot_loss_total / plot_every
            plot_losses.append(plot_loss_avg)
            plot_loss_total = 0

    showPlot(plot_losses)

In [95]:
def evaluate(encoder, decoder, sentence, max_length=MAX_LENGTH):
    with torch.no_grad():
        input_tensor = tensorFromSentence(input_lang, sentence)
        input_length = input_tensor.size()[0]
        encoder_hidden = encoder.initHidden()

        encoder_outputs = torch.zeros(max_length, 1, encoder.hidden_size, device=device)

        for ei in range(input_length):
            encoder_output, encoder_hidden = encoder(input_tensor[ei],
                                                     encoder_hidden)
            encoder_outputs[ei] += encoder_output[0, 0]

        decoder_input = torch.tensor([[SOS_token]], device=device)  # SOS

        decoder_hidden = encoder_hidden

        decoded_words = []
        decoder_attentions = torch.zeros(max_length, max_length)

        for di in range(max_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
            decoder_attentions[di] = decoder_attention.data
            topv, topi = decoder_output.data.topk(1)
            if topi.item() == EOS_token:
                decoded_words.append('<EOS>')
                break
            else:
                decoded_words.append(output_lang.index2word[topi.item()])

            decoder_input = topi.squeeze().detach()

        return decoded_words, decoder_attentions[:di + 1]

In [96]:
def evaluateRandomly(encoder, decoder, n=10):
    for i in range(n):
        pair = random.choice(pairs)
        print('>', pair[0])
        print('=', pair[1])
        output_words, attentions = evaluate(encoder, decoder, pair[0])
        output_sentence = ' '.join(output_words)
        print('<', output_sentence)
        print('')

In [44]:
hidden_size = 256
encoder1 = EncoderRNN(input_lang.n_words, hidden_size).to(device)
decoder1 = AttnDecoderRNN(hidden_size, output_lang.n_words).to(device)
#torch.autograd.set_detect_anomaly(True)
trainIters(encoder1, decoder1, 75000, print_every=5000)

10m 29s (- 146m 57s) (5000 6%) 3.1993
21m 14s (- 138m 4s) (10000 13%) 2.5903
32m 4s (- 128m 19s) (15000 20%) 2.0806
43m 2s (- 118m 21s) (20000 26%) 1.6518
54m 1s (- 108m 3s) (25000 33%) 1.3048
65m 8s (- 97m 42s) (30000 40%) 1.0198
76m 16s (- 87m 10s) (35000 46%) 0.8132
87m 29s (- 76m 33s) (40000 53%) 0.6298
98m 47s (- 65m 51s) (45000 60%) 0.4819
110m 5s (- 55m 2s) (50000 66%) 0.3699
121m 23s (- 44m 8s) (55000 73%) 0.2904
132m 42s (- 33m 10s) (60000 80%) 0.2276
144m 1s (- 22m 9s) (65000 86%) 0.1690
155m 21s (- 11m 5s) (70000 93%) 0.1230
166m 47s (- 0m 0s) (75000 100%) 0.0984


In [47]:
evaluateRandomly(encoder1, decoder1)

> она подстрелила его
= she shot him
< she shot him <EOS>

> он рад слышать эти известия
= he is glad to hear the news
< he is glad to hear the news <EOS>

> она недостаточно взрослая, чтобы получить водительские права
= she isn't old enough to get a driving license
< she isn't old enough to get a driver's license <EOS>

> она немного застенчива
= she is a little shy
< she is a little shy <EOS>

> меня беспокоит ваше здоровье
= i am anxious about your health
< i am anxious about your health <EOS>

> я честныи человек
= i am an honest person
< i am an honest person <EOS>

> она всегда свободна после обеда
= she is always free in the afternoon
< she is always free in the afternoon <EOS>

> иногда она пишет своему сыну
= she sometimes writes to her son
< she sometimes writes to her son <EOS>

> ты не такои, как другие
= you aren't like the others
< you aren't like the others <EOS>

> он - старшии из них двоих
= he is the older of the two
< he is the older of the two <EOS>



На основе скалярного произведения

In [102]:
class EncoderRNN(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(EncoderRNN, self).__init__()
        self.hidden_size = hidden_size

        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)

    def forward(self, input, hidden):
        embedded = self.embedding(input).view(1, 1, -1)
        output = embedded
        output, hidden = self.gru(output, hidden)
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

In [103]:


class AttnDecoderRNNS(nn.Module):
    def __init__(self, hidden_size, output_size, dropout_p=0.1, max_length=MAX_LENGTH):
        super(AttnDecoderRNNS, self).__init__()
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.dropout_p = dropout_p
        self.max_length = max_length

        self.embedding = nn.Embedding(self.output_size, self.hidden_size)
        self.attn_combine = nn.Linear(self.hidden_size * 2, self.hidden_size)
        self.dropout = nn.Dropout(self.dropout_p)
        self.gru = nn.GRU(self.hidden_size, self.hidden_size)
        self.out = nn.Linear(self.hidden_size, self.output_size)

    def forward(self, input, hidden, encoder_outputs):
      embedded = self.embedding(input).view(1, 1, -1)
      embedded = self.dropout(embedded)

    # Ensure the dimensions are correct for batch matrix multiplication
      encoder_outputs =  encoder_outputs.unsqueeze(0) # Add batch dimension to encoder_outputs
      attn_weights = F.softmax(torch.bmm(encoder_outputs, embedded.transpose(1, 2)), dim=-1)
      attn_applied = torch.bmm(attn_weights.transpose(1, 2), encoder_outputs)
      output = torch.cat((embedded, attn_applied), 2)
      output = self.attn_combine(output)

      output = F.relu(output)
      output, hidden = self.gru(output, hidden)

      output = F.log_softmax(self.out(output[0]), dim=1)
      return output, hidden, attn_weights




In [48]:
teacher_forcing_ratio = 0.5


def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=MAX_LENGTH):
    encoder_hidden = encoder.initHidden()

    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    input_length = input_tensor.size(0)
    target_length = target_tensor.size(0)

    encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

    loss = 0

    for ei in range(input_length):
        encoder_output, encoder_hidden = encoder(
            input_tensor[ei], encoder_hidden)
        encoder_outputs[ei] = encoder_output[0, 0]

    decoder_input = torch.tensor([[SOS_token]], device=device)

    decoder_hidden = encoder_hidden

    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False

    if use_teacher_forcing:
        # Teacher forcing: Feed the target as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
            loss += criterion(decoder_output, target_tensor[di])
            decoder_input = target_tensor[di]  # Teacher forcing

    else:
        # Without teacher forcing: use its own predictions as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
            topv, topi = decoder_output.topk(1)
            decoder_input = topi.squeeze().detach()  # detach from history as input

            loss += criterion(decoder_output, target_tensor[di])
            if decoder_input.item() == EOS_token:
                break

    loss.backward()

    encoder_optimizer.step()
    decoder_optimizer.step()

    return loss.item() / target_length

In [51]:
def trainIters(encoder, decoder, n_iters, print_every=1000, plot_every=100, learning_rate=0.01):
    start = time.time()
    plot_losses = []
    print_loss_total = 0  # Reset every print_every
    plot_loss_total = 0  # Reset every plot_every

    encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)
    training_pairs = [tensorsFromPair(random.choice(pairs))
                      for i in range(n_iters)]
    criterion = nn.NLLLoss()

    for iter in range(1, n_iters + 1):
        training_pair = training_pairs[iter - 1]
        input_tensor = training_pair[0]
        target_tensor = training_pair[1]

        loss = train(input_tensor, target_tensor, encoder,
                     decoder, encoder_optimizer, decoder_optimizer, criterion)
        print_loss_total += loss
        plot_loss_total += loss

        if iter % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            print('%s (%d %d%%) %.4f' % (timeSince(start, iter / n_iters),
                                         iter, iter / n_iters * 100, print_loss_avg))

        if iter % plot_every == 0:
            plot_loss_avg = plot_loss_total / plot_every
            plot_losses.append(plot_loss_avg)
            plot_loss_total = 0

    showPlot(plot_losses)

In [124]:
def evaluate(encoder, decoder, sentence, max_length=MAX_LENGTH):
    with torch.no_grad():
        input_tensor = tensorFromSentence(input_lang, sentence)
        input_length = input_tensor.size()[0]
        encoder_hidden = encoder.initHidden()

        encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

        for ei in range(input_length):
            encoder_output, encoder_hidden = encoder(input_tensor[ei],
                                                     encoder_hidden)
            encoder_outputs[ei] = encoder_output[0, 0]

        decoder_input = torch.tensor([[SOS_token]], device=device)  # SOS

        decoder_hidden = encoder_hidden

        decoded_words = []
        decoder_attentions = torch.zeros(max_length, input_length)

        for di in range(max_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
            decoder_attentions[di] = decoder_attention.data[0, 0, :input_length]  # Update this line

            topv, topi = decoder_output.data.topk(1)
            if topi.item() == EOS_token:
                decoded_words.append('<EOS>')
                break
            else:
                decoded_words.append(output_lang.index2word[topi.item()])

            decoder_input = topi.squeeze().detach()

        return decoded_words, decoder_attentions[:di + 1]



In [119]:
def evaluateRandomly(encoder, decoder, n=10):
    for i in range(n):
        pair = random.choice(pairs)
        print('>', pair[0])
        print('=', pair[1])
        output_words, attentions = evaluate(encoder, decoder, pair[0])
        output_sentence = ' '.join(output_words)
        print('<', output_sentence)
        print('')

In [105]:
hidden_size = 256
encoder1 = EncoderRNN(input_lang.n_words, hidden_size).to(device)
attn_decoder1 = AttnDecoderRNNS(hidden_size, output_lang.n_words, dropout_p=0.1).to(device)

trainIters(encoder1, attn_decoder1, 75000, print_every=5000)

1m 8s (- 15m 59s) (5000 6%) 3.0517
2m 11s (- 14m 16s) (10000 13%) 2.4417
3m 15s (- 13m 3s) (15000 20%) 2.0602
4m 20s (- 11m 55s) (20000 26%) 1.7378
5m 23s (- 10m 47s) (25000 33%) 1.5109
6m 27s (- 9m 41s) (30000 40%) 1.3717
7m 31s (- 8m 35s) (35000 46%) 1.2890
8m 35s (- 7m 30s) (40000 53%) 1.2004
9m 38s (- 6m 25s) (45000 60%) 1.1252
10m 42s (- 5m 21s) (50000 66%) 1.0887
11m 45s (- 4m 16s) (55000 73%) 1.0869
12m 50s (- 3m 12s) (60000 80%) 1.0623
13m 54s (- 2m 8s) (65000 86%) 1.1058
14m 58s (- 1m 4s) (70000 93%) 1.1007
16m 2s (- 0m 0s) (75000 100%) 1.1483


In [125]:
evaluateRandomly(encoder1, attn_decoder1)

> ты слишком молод, чтобы путешествовать одному
= you are too young to travel alone
< you are too young to travel alone alone alone alone

> я ее, а она моя
= i am hers and she is mine
< i am yours and you <EOS>

> я учительница
= i am a teacher
< i am a teacher <EOS>

> ты в точности как твоя мать
= you are exactly like your mother
< you are exactly like your mother <EOS>

> они все заняты
= they are all busy
< they are busy busy <EOS>

> я сегодня занята
= i am busy today
< i am busy today <EOS>

> ты злоупотребляешь своеи властью
= you are abusing your authority
< you are abusing authority authority <EOS>

> ты ждешь от нее слишком многого
= you are expecting too much of her
< you are much much much much much much much much

> вы не один
= you aren't alone
< you aren't alone <EOS>

> я ему не пара
= i am no match for him
< i am no match for all <EOS>



In [107]:
import pandas as pd
mlp_loss = [3.1993, 2.5903, 2.0806, 1.6518, 1.3048, 1.0198, 0.8132, 0.6298, 0.4819, 0.3699, 0.2904, 0.2276, 0.1690, 0.1230, 0.0984]
skal_loss = [3.0517, 2.4417, 2.0602, 1.7378, 1.5109, 1.3717, 1.2890, 1.2004, 1.1252, 1.0887, 1.0869, 1.0623, 1.1058, 1.1007, 1.1483]
itog = pd.DataFrame({'MLP':mlp_loss,
                     'SKAL':skal_loss

})

In [108]:
itog

Unnamed: 0,MLP,SKAL
0,3.1993,3.0517
1,2.5903,2.4417
2,2.0806,2.0602
3,1.6518,1.7378
4,1.3048,1.5109
5,1.0198,1.3717
6,0.8132,1.289
7,0.6298,1.2004
8,0.4819,1.1252
9,0.3699,1.0887


Можно сделать вывод, что механизм внимания на mlp хоть и гораздо дольше обучается, но выдает лучшие результаты, нежели скалярное произведение