In [1]:
from io import open
import unicodedata
import string
import re
import random
import requests
from zipfile import ZipFile
import io
import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
SOS_token = 0
EOS_token = 1
# объект, который позволит работать с языком

class Lang:
    def __init__(self, name):
        self.name = name # сохраняется имя
        self.word2index = {} # словарь для маппинга слов в индексы.
        self.word2count = {} # словарь для подсчета количества встреченных слов.
        self.index2word = {0: "SOS", 1: "EOS"} # словарь для маппинга индексов обратно в слова. Изначально содержит специальные токены SOS и EOS
        self.n_words = 2  # Count SOS and EOS

    def addSentence(self, sentence):#Метод добавления предложения 
        for word in sentence.split(' '):
            self.addWord(word)

    def addWord(self, word): #Метод добавления слова 
        if word not in self.word2index:
            self.word2index[word] = self.n_words
            self.word2count[word] = 1
            self.index2word[self.n_words] = word
            self.n_words += 1
        else:
            self.word2count[word] += 1

In [3]:
def unicodeToAscii(s):#принимает строку s и возвращает ее ASCII-представление, удаляя диакритические знаки (акценты, тильды и т. д.)
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn'
    )



def normalizeString(s):# принимает строку s и возвращает нормализованную версию этой строки.
    s = unicodeToAscii(s.lower().strip())
    s = s.lower()
    s = re.sub('[.!?]','',s)
    #s = re.sub(r"([.!?])", r" \1", s)
    #s = re.sub(r"[^a-zA-Z.!?]+", r" ", s)
    return s

In [4]:
def readLangs(lang1, lang2, reverse=False):
    print("Reading lines...")

    # Read the file and split into lines
    lines = open('C:\\Users\\79169\\Desktop\\домашка\\DLL\\%s-%s.txt' % (lang1, lang2), encoding='utf-8').\
        read().strip().split('\n')
    lines = [i.split('\tCC-BY', 1)[0] for i in lines]
    # Split every line into pairs and normalize
    pairs = [[normalizeString(s) for s in l.split('\t')] for l in lines]

    # Reverse pairs, make Lang instances
    if reverse:
        pairs = [list(reversed(p)) for p in pairs]
        input_lang = Lang(lang2)
        output_lang = Lang(lang1)
    else:
        input_lang = Lang(lang1)
        output_lang = Lang(lang2)

    return input_lang, output_lang, pairs

In [5]:
MAX_LENGTH = 10

eng_prefixes = (
    "i am ", "i m ",
    "he is", "he s ",
    "she is", "she s",
    "you are", "you re ",
    "we are", "we re ",
    "they are", "they re "
)


def filterPair(p):
    return len(p[0].split(' ')) < MAX_LENGTH and \
        len(p[1].split(' ')) < MAX_LENGTH and \
        p[1].startswith(eng_prefixes)


def filterPairs(pairs):
    return [pair for pair in pairs if filterPair(pair)]

In [6]:
def prepareData(lang1, lang2, reverse=False):
    input_lang, output_lang, pairs = readLangs(lang1, lang2, reverse)
    print("Read %s sentence pairs" % len(pairs))
    pairs = filterPairs(pairs)
    print("Trimmed to %s sentence pairs" % len(pairs))
    print("Counting words...")
    for pair in pairs:
        input_lang.addSentence(pair[0])
        output_lang.addSentence(pair[1])
    print("Counted words:")
    print(input_lang.name, input_lang.n_words)
    print(output_lang.name, output_lang.n_words)
    return input_lang, output_lang, pairs


input_lang, output_lang, pairs = prepareData('eng', 'rus', True)
print(random.choice(pairs))

Reading lines...
Read 487600 sentence pairs
Trimmed to 4679 sentence pairs
Counting words...
Counted words:
rus 4305
eng 2236
['она помогает своеи семье', 'she supports her family']


In [7]:
class EncoderRNN(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(EncoderRNN, self).__init__()
        self.hidden_size = hidden_size

        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)

    def forward(self, input, hidden):
        embedded = self.embedding(input).view(1, 1, -1)
        output = embedded
        output, hidden = self.gru(output, hidden)
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

In [8]:
class DecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size):
        super(DecoderRNN, self).__init__()
        self.hidden_size = hidden_size

        self.embedding = nn.Embedding(output_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)
        self.out = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, input, hidden):
        output = self.embedding(input).view(1, 1, -1)
        output = F.relu(output)
        output, hidden = self.gru(output, hidden)
        output = self.softmax(self.out(output[0]))
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

In [9]:
def indexesFromSentence(lang, sentence):
    return [lang.word2index[word] for word in sentence.split(' ')]


def tensorFromSentence(lang, sentence):
    indexes = indexesFromSentence(lang, sentence)
    indexes.append(EOS_token)
    return torch.tensor(indexes, dtype=torch.long, device=device).view(-1, 1)


def tensorsFromPair(pair):
    input_tensor = tensorFromSentence(input_lang, pair[0])
    target_tensor = tensorFromSentence(output_lang, pair[1])
    return (input_tensor, target_tensor)

In [10]:
teacher_forcing_ratio = 0.5


def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=MAX_LENGTH):
    encoder_hidden = encoder.initHidden()

    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    input_length = input_tensor.size(0)
    target_length = target_tensor.size(0)

    encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

    loss = 0

    for ei in range(input_length):
        encoder_output, encoder_hidden = encoder(
            input_tensor[ei], encoder_hidden)
        encoder_outputs[ei] = encoder_output[0, 0]

    decoder_input = torch.tensor([[SOS_token]], device=device)

    decoder_hidden = encoder_hidden

    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False

    if use_teacher_forcing:
        # Teacher forcing: Feed the target as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden = decoder(
                decoder_input, decoder_hidden)
            loss += criterion(decoder_output, target_tensor[di])
            decoder_input = target_tensor[di]  # Teacher forcing

    else:
        # Without teacher forcing: use its own predictions as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden = decoder(
                decoder_input, decoder_hidden)
            topv, topi = decoder_output.topk(1)
            decoder_input = topi.squeeze().detach()  # detach from history as input

            loss += criterion(decoder_output, target_tensor[di])
            if decoder_input.item() == EOS_token:
                break

    loss.backward()

    encoder_optimizer.step()
    decoder_optimizer.step()

    return loss.item() / target_length

In [11]:
import time
import math


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (- %s)' % (asMinutes(s), asMinutes(rs))

In [12]:
import matplotlib.pyplot as plt
plt.switch_backend('agg')
import matplotlib.ticker as ticker
import numpy as np


def showPlot(points):
    plt.figure()
    fig, ax = plt.subplots()
    # this locator puts ticks at regular intervals
    loc = ticker.MultipleLocator(base=0.2)
    ax.yaxis.set_major_locator(loc)
    plt.plot(points)

In [13]:
def trainIters(encoder, decoder, n_iters, print_every=1000, plot_every=100, learning_rate=0.01):
    start = time.time()
    plot_losses = []
    print_loss_total = 0  # Reset every print_every
    plot_loss_total = 0  # Reset every plot_every

    encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)
    training_pairs = [tensorsFromPair(random.choice(pairs))
                      for i in range(n_iters)]
    criterion = nn.NLLLoss()

    for iter in range(1, n_iters + 1):
        training_pair = training_pairs[iter - 1]
        input_tensor = training_pair[0]
        target_tensor = training_pair[1]

        loss = train(input_tensor, target_tensor, encoder,
                     decoder, encoder_optimizer, decoder_optimizer, criterion)
        print_loss_total += loss
        plot_loss_total += loss

        if iter % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            print('%s (%d %d%%) %.4f' % (timeSince(start, iter / n_iters),
                                         iter, iter / n_iters * 100, print_loss_avg))

        if iter % plot_every == 0:
            plot_loss_avg = plot_loss_total / plot_every
            plot_losses.append(plot_loss_avg)
            plot_loss_total = 0

    showPlot(plot_losses)

In [14]:
def evaluate(encoder, decoder, sentence, max_length=MAX_LENGTH):
    with torch.no_grad():
        input_tensor = tensorFromSentence(input_lang, sentence)
        input_length = input_tensor.size()[0]
        encoder_hidden = encoder.initHidden()

        encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

        for ei in range(input_length):
            encoder_output, encoder_hidden = encoder(input_tensor[ei],
                                                     encoder_hidden)
            encoder_outputs[ei] += encoder_output[0, 0]

        decoder_input = torch.tensor([[SOS_token]], device=device)  # SOS

        decoder_hidden = encoder_hidden

        decoded_words = []

        for di in range(max_length):
            decoder_output, decoder_hidden = decoder(
                decoder_input, decoder_hidden)
            topv, topi = decoder_output.data.topk(1)
            if topi.item() == EOS_token:
                decoded_words.append('<EOS>')
                break
            else:
                decoded_words.append(output_lang.index2word[topi.item()])

            decoder_input = topi.squeeze().detach()

        return decoded_words

In [15]:
def evaluateRandomly(encoder, decoder, n=10):
    for i in range(n):
        pair = random.choice(pairs)
        print('>', pair[0])
        print('=', pair[1])
        output_words = evaluate(encoder, decoder, pair[0])
        output_sentence = ' '.join(output_words)
        print('<', output_sentence)
        print('')

In [64]:
hidden_size = 256
encoder1 = EncoderRNN(input_lang.n_words, hidden_size).to(device)
decoder1 = DecoderRNN(hidden_size, output_lang.n_words).to(device)

trainIters(encoder1, decoder1, 75000, print_every=5000)

0m 46s (- 10m 54s) (5000 6%) 3.1670
1m 28s (- 9m 36s) (10000 13%) 2.6093
2m 10s (- 8m 43s) (15000 20%) 2.1472
2m 53s (- 7m 57s) (20000 26%) 1.7402
3m 36s (- 7m 13s) (25000 33%) 1.3364
4m 20s (- 6m 30s) (30000 40%) 1.0451
5m 4s (- 5m 47s) (35000 46%) 0.8442
5m 47s (- 5m 4s) (40000 53%) 0.6275
6m 31s (- 4m 21s) (45000 60%) 0.4694
7m 16s (- 3m 38s) (50000 66%) 0.3370
8m 1s (- 2m 55s) (55000 73%) 0.2621
8m 46s (- 2m 11s) (60000 80%) 0.1948
9m 30s (- 1m 27s) (65000 86%) 0.1383
10m 14s (- 0m 43s) (70000 93%) 0.0989
10m 59s (- 0m 0s) (75000 100%) 0.0891


In [65]:
evaluateRandomly(encoder1, decoder1)

> это вопросы, которые нам нужно обсудить
= they are matters which we need to discuss
< they are matters which we are sinners <EOS>

> она тихая женщина
= she is a quiet woman
< she is a quiet woman <EOS>

> он сеичас пьет кофе
= he is having coffee now
< he is having coffee now <EOS>

> он высок и кажется сильным
= he is tall and looks strong
< he is tall and looks strong <EOS>

> я устала это слышать
= i am tired of hearing it
< i am tired of hearing it <EOS>

> он умнее меня
= he is more clever than me
< he is more clever than i am <EOS>

> он рад слышать эти известия
= he is glad to hear the news
< he is glad to hear the news <EOS>

> она пытается похудеть
= she is trying to lose weight
< she is trying to lose weight <EOS>

> вы ведь хладнокровныи человек
= you are a cold-blooded person, aren't you
< you are a cold-blooded person, aren't you <EOS>

> он совсем не боится змеи
= he is not scared of snakes at all
< he is not scared of snakes at all <EOS>



Добавим слои в энкодер и декодер и посмотрим что получится

In [16]:
num_layers=2

In [40]:
class EncoderRNN2(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super(EncoderRNN2, self).__init__()
        self.hidden_size = hidden_size

        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size,  num_layers=num_layers)

    def forward(self, input, hidden):
        embedded = self.embedding(input).view(1, 1, -1)
        output = embedded
        output, hidden = self.gru(output, hidden)
        return output, hidden

    def initHidden(self):
        return torch.zeros(num_layers, 1, self.hidden_size, device=device)

In [41]:
class DecoderRNN2(nn.Module):
    def __init__(self, hidden_size, output_size, num_layers):
        super(DecoderRNN2, self).__init__()
        self.hidden_size = hidden_size

        self.embedding = nn.Embedding(output_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size,num_layers=num_layers)
        self.out = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, input, hidden):
        output = self.embedding(input).view(1, 1, -1)
        output = F.relu(output)
        output, hidden = self.gru(output, hidden)
        output = self.softmax(self.out(output[0]))
        return output, hidden

    def initHidden(self):
        return torch.zeros(num_layers, 1, self.hidden_size, device=device)

In [42]:
hidden_size = 256
encoder1 = EncoderRNN2(input_lang.n_words, hidden_size,num_layers).to(device)
decoder1 = DecoderRNN2(hidden_size, output_lang.n_words,num_layers).to(device)

trainIters(encoder1, decoder1, 75000, print_every=5000)

0m 59s (- 13m 47s) (5000 6%) 3.1723
1m 57s (- 12m 41s) (10000 13%) 2.8005
2m 52s (- 11m 31s) (15000 20%) 2.4617
3m 49s (- 10m 31s) (20000 26%) 2.0881
4m 47s (- 9m 34s) (25000 33%) 1.6691
5m 43s (- 8m 35s) (30000 40%) 1.3347
6m 40s (- 7m 38s) (35000 46%) 1.0174
7m 36s (- 6m 39s) (40000 53%) 0.8014
8m 30s (- 5m 40s) (45000 60%) 0.6114
9m 24s (- 4m 42s) (50000 66%) 0.4639
10m 19s (- 3m 45s) (55000 73%) 0.3274
11m 13s (- 2m 48s) (60000 80%) 0.2533
12m 10s (- 1m 52s) (65000 86%) 0.1627
13m 6s (- 0m 56s) (70000 93%) 0.1273
14m 2s (- 0m 0s) (75000 100%) 0.1040


In [43]:
evaluateRandomly(encoder1, decoder1)

> у нее есть брат-близнец
= she is a twin
< she is a twin <EOS>

> я уверен, что у него получится
= i am sure that he will succeed
< i am sure that he will succeed <EOS>

> вам здесь рады
= you are welcome here
< you are welcome here <EOS>

> ты несешь ответственность за результат
= you are responsible for the result
< you are responsible for the result <EOS>

> мне стыдно за свои плохои англиискии
= i am ashamed of my poor english
< i am ashamed of my poor english <EOS>

> он честныи человек
= he is an honest man
< he is an honest man <EOS>

> он купается в реке
= he is swimming in the river
< he is swimming in the river <EOS>

> у нее есть привычка ходить на пробежку перед завтраком
= she is in the habit of jogging before breakfast
< she is in the habit of jogging before breakfast <EOS>

> она гордится своими сыновьями
= she is proud of her sons
< she is proud of her sons <EOS>

> он не тупои
= he is not stupid
< he is not dumb <EOS>



Здесь видим, что в последнем словосочетании был подобран синоним 

Теперь попробуем с LSTM с 1 слоем

In [67]:
num_layers=1

In [73]:
class EncoderLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super(EncoderLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers 

        self.embedding = nn.Embedding(input_size, hidden_size)
        self.lstm = nn.LSTM(hidden_size, hidden_size,  num_layers=num_layers)

    def forward(self, input, hidden):
        embedded = self.embedding(input).view(1, 1, -1)
        output = embedded
        output, hidden = self.lstm(output, hidden)
        return output, hidden

    def initHidden(self):
        return (torch.zeros(self.num_layers, 1, self.hidden_size, device=device),
                torch.zeros(self.num_layers, 1, self.hidden_size, device=device))

In [74]:
class DecoderLSTM(nn.Module):
    def __init__(self, hidden_size, output_size, num_layers):
        super(DecoderLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        self.embedding = nn.Embedding(output_size, hidden_size)
        self.lstm = nn.LSTM(hidden_size, hidden_size,num_layers=num_layers)
        self.out = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, input, hidden):
        output = self.embedding(input).view(1, 1, -1)
        output = F.relu(output)
        output, hidden = self.lstm(output, hidden)
        output = self.softmax(self.out(output[0]))
        return output, hidden

    def initHidden(self):
        return (torch.zeros(self.num_layers, 1, self.hidden_size, device=device),
                torch.zeros(self.num_layers, 1, self.hidden_size, device=device))

In [75]:
teacher_forcing_ratio = 0.5


def train1(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=MAX_LENGTH):
    encoder_hidden = encoder.initHidden() #Инициализация скрытого состояния энкодера

    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    input_length = input_tensor.size(0) # Получение длины входного и 
    target_length = target_tensor.size(0) #и целевого тензоров. 

    encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

    loss = 0

    for ei in range(input_length):
        encoder_output, encoder_hidden = encoder(
            input_tensor[ei], encoder_hidden)
        encoder_outputs[ei] = encoder_output[0, 0]

    decoder_input = torch.tensor([[SOS_token]], device=device)#Инициализация тензора decoder_input с начальным символом (SOS).

    decoder_hidden = encoder_hidden

    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False

    if use_teacher_forcing:
        # Teacher forcing: Feed the target as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden = decoder(
                decoder_input, decoder_hidden)
            loss += criterion(decoder_output, target_tensor[di])
            decoder_input = target_tensor[di]  # Teacher forcing

    else:
        # Without teacher forcing: use its own predictions as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden = decoder(
                decoder_input, decoder_hidden)
            topv, topi = decoder_output.topk(1)
            decoder_input = topi.squeeze().detach()  # detach from history as input

            loss += criterion(decoder_output, target_tensor[di])
            if decoder_input.item() == EOS_token:
                break

    loss.backward()

    encoder_optimizer.step()
    decoder_optimizer.step()

    return loss.item() / target_length

In [76]:
def trainIters1(encoder, decoder, n_iters, print_every=1000, plot_every=100, learning_rate=0.01):
    start = time.time()
    plot_losses = []
    print_loss_total = 0  # Reset every print_every
    plot_loss_total = 0  # Reset every plot_every

    encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)
    training_pairs = [tensorsFromPair(random.choice(pairs))
                      for i in range(n_iters)]
    criterion = nn.NLLLoss()

    for iter in range(1, n_iters + 1):
        training_pair = training_pairs[iter - 1]
        input_tensor = training_pair[0]
        target_tensor = training_pair[1]

        loss = train(input_tensor, target_tensor, encoder,
                     decoder, encoder_optimizer, decoder_optimizer, criterion)
        print_loss_total += loss
        plot_loss_total += loss

        if iter % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            print('%s (%d %d%%) %.4f' % (timeSince(start, iter / n_iters),
                                         iter, iter / n_iters * 100, print_loss_avg))

        if iter % plot_every == 0:
            plot_loss_avg = plot_loss_total / plot_every
            plot_losses.append(plot_loss_avg)
            plot_loss_total = 0

    showPlot(plot_losses)

In [77]:
hidden_size = 256
encoder1 = EncoderLSTM(input_lang.n_words, hidden_size,num_layers).to(device)
decoder1 = DecoderLSTM(hidden_size, output_lang.n_words,num_layers).to(device)

trainIters1(encoder1, decoder1, 75000, print_every=5000)

0m 48s (- 11m 22s) (5000 6%) 3.2538
1m 33s (- 10m 6s) (10000 13%) 2.8546
2m 21s (- 9m 25s) (15000 20%) 2.5009
3m 9s (- 8m 40s) (20000 26%) 2.1215
3m 58s (- 7m 56s) (25000 33%) 1.7617
4m 47s (- 7m 10s) (30000 40%) 1.4632
5m 36s (- 6m 24s) (35000 46%) 1.1829
6m 25s (- 5m 37s) (40000 53%) 0.9521
7m 14s (- 4m 49s) (45000 60%) 0.7440
8m 3s (- 4m 1s) (50000 66%) 0.5908
8m 52s (- 3m 13s) (55000 73%) 0.4673
9m 38s (- 2m 24s) (60000 80%) 0.3693
10m 24s (- 1m 36s) (65000 86%) 0.2728
11m 11s (- 0m 47s) (70000 93%) 0.2201
11m 57s (- 0m 0s) (75000 100%) 0.1781


In [78]:
evaluateRandomly(encoder1, decoder1)

> он — то, что мы называем музыкальным гением
= he is what we call a musical genius
< he is what we call a musical genius <EOS>

> боюсь, он не придет
= i am afraid he won't come
< i am afraid he won't come <EOS>

> он ответственен за это
= he is responsible for it
< he is responsible for it <EOS>

> он официант и актер к тому же
= he is a waiter and also an actor
< he is a waiter and also an actor <EOS>

> можете идти домои
= you are free to go home
< you are free to go home <EOS>

> я самыи счастливыи человек на земле
= i am the happiest man on earth
< i am the happiest man on earth <EOS>

> он активныи человек
= he is an active person
< he is an active person <EOS>

> мне шестнадцать лет
= i am sixteen years old
< i am sixteen years old <EOS>

> она говорит на десяти языках
= she speaks ten languages
< she speaks ten languages <EOS>

> вы что, не слушаете
= you aren't listening, are you
< you aren't listening, are you <EOS>



Все четко переведено

Теперь попробуем с 2 слоями

In [79]:
num_layers=2

In [80]:
hidden_size = 256
encoder1 = EncoderLSTM(input_lang.n_words, hidden_size,num_layers).to(device)
decoder1 = DecoderLSTM(hidden_size, output_lang.n_words,num_layers).to(device)

trainIters1(encoder1, decoder1, 75000, print_every=5000)

1m 0s (- 14m 6s) (5000 6%) 3.3577
1m 55s (- 12m 31s) (10000 13%) 3.0439
2m 55s (- 11m 43s) (15000 20%) 2.8477
3m 55s (- 10m 46s) (20000 26%) 2.5709
4m 54s (- 9m 49s) (25000 33%) 2.2525
5m 51s (- 8m 47s) (30000 40%) 1.9742
6m 50s (- 7m 49s) (35000 46%) 1.7014
7m 50s (- 6m 51s) (40000 53%) 1.4338
8m 51s (- 5m 54s) (45000 60%) 1.1943
9m 54s (- 4m 57s) (50000 66%) 0.9878
10m 56s (- 3m 58s) (55000 73%) 0.7551
12m 0s (- 3m 0s) (60000 80%) 0.6276
13m 3s (- 2m 0s) (65000 86%) 0.5006
14m 3s (- 1m 0s) (70000 93%) 0.3703
15m 4s (- 0m 0s) (75000 100%) 0.2917


In [81]:
evaluateRandomly(encoder1, decoder1)

> она может обучать англиискому
= she is capable of teaching english
< she is capable of teaching english <EOS>

> я очень устала от плавания
= i am very tired from swimming
< i am very tired from swimming <EOS>

> я готов начать
= i am ready to start
< i am ready to start <EOS>

> он странныи, а я не люблю странных людеи
= he is strange, and i don't like strange people
< he is strange, and i don't like strange <EOS>

> ты не сумасшедшии
= you aren't crazy
< you aren't crazy <EOS>

> она с ним сурова
= she is hard on him
< she is hard on him <EOS>

> говорят, что он убиица
= he is allegedly the murderer
< he is allegedly the murderer <EOS>

> ему не хватает здравого смысла
= he is lacking in common sense
< he is lacking in common sense <EOS>

> он последнии человек, которого я хочу видеть
= he is the last man i want to see
< he is the last man i want to see <EOS>

> они разговаривают на кухне
= they are talking in the kitchen
< they are talking in the kitchen <EOS>



4 предложение он до конца не перевел

Теперь попробуем обучение батчами

In [82]:
batch_size = 128

In [91]:
class EncoderLSTMb(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super(EncoderLSTMb, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers 

        self.embedding = nn.Embedding(input_size, hidden_size)
        self.lstm = nn.LSTM(hidden_size, hidden_size, num_layers=num_layers)

    def forward(self, input, hidden):
        embedded = self.embedding(input)
        embedded = embedded.view(1, input.size(0), -1)  # Adjust dimensions for batch processing
        output, hidden = self.lstm(embedded, hidden)
        return output, hidden

    def initHidden(self, batch_size):
        return (torch.zeros(self.num_layers, batch_size, self.hidden_size, device=device),
                torch.zeros(self.num_layers, batch_size, self.hidden_size, device=device))


In [102]:
class DecoderLSTMb(nn.Module):
    def __init__(self, hidden_size, output_size, num_layers):
        super(DecoderLSTMb, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        self.embedding = nn.Embedding(output_size, hidden_size)
        self.lstm = nn.LSTM(hidden_size, hidden_size,num_layers=num_layers)
        self.out = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, input, hidden):
        output = self.embedding(input).view(1, 1, -1)
        output = F.relu(output)
        output, hidden = self.lstm(output, hidden)
        output = self.softmax(self.out(output[0]))
        return output, hidden

    def initHidden(self):
        return (torch.zeros(self.num_layers, batch_size, self.hidden_size, device=device),
                torch.zeros(self.num_layers, batch_size, self.hidden_size, device=device))

In [99]:
def train_batch(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=MAX_LENGTH, teacher_forcing_ratio=0.5):
    batch_size = input_tensor.size(1)
    encoder_hidden = encoder.initHidden(batch_size)

    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    input_length, batch_size = input_tensor.size()
    target_length, _ = target_tensor.size()

    encoder_outputs = torch.zeros(max_length, batch_size, encoder.hidden_size, device=device)

    loss = 0

    for ei in range(input_length):
        encoder_output, encoder_hidden = encoder(input_tensor[ei].view(1, batch_size), encoder_hidden)
        encoder_outputs[ei] = encoder_output[0, 0]

    decoder_input = torch.tensor([[SOS_token]] * batch_size, device=device)

    decoder_hidden = encoder_hidden

    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False

    if use_teacher_forcing:
        # Teacher forcing: Feed the target as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
            loss += criterion(decoder_output, target_tensor[di])
            decoder_input = target_tensor[di]  # Teacher forcing

    else:
        # Without teacher forcing: use its own predictions as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
            topv, topi = decoder_output.topk(1)
            decoder_input = topi.squeeze().detach()  # detach from history as input

            loss += criterion(decoder_output, target_tensor[di])
            if decoder_input.item() == EOS_token:
                break

    loss.backward()

    encoder_optimizer.step()
    decoder_optimizer.step()

    return loss.item() / target_length


In [89]:
def trainItersb(encoder, decoder, n_iters, print_every=1000, plot_every=100, learning_rate=0.01):
    start = time.time()
    plot_losses = []
    print_loss_total = 0  # Reset every print_every
    plot_loss_total = 0  # Reset every plot_every

    encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)
    training_pairs = [tensorsFromPair(random.choice(pairs))
                      for i in range(n_iters)]
    criterion = nn.NLLLoss()

    for iter in range(1, n_iters + 1):
        training_pair = training_pairs[iter - 1]
        input_tensor = training_pair[0]
        target_tensor = training_pair[1]

        loss = train_batch(input_tensor, target_tensor, encoder,
                     decoder, encoder_optimizer, decoder_optimizer, criterion)
        print_loss_total += loss
        plot_loss_total += loss

        if iter % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            print('%s (%d %d%%) %.4f' % (timeSince(start, iter / n_iters),
                                         iter, iter / n_iters * 100, print_loss_avg))

        if iter % plot_every == 0:
            plot_loss_avg = plot_loss_total / plot_every
            plot_losses.append(plot_loss_avg)
            plot_loss_total = 0

    showPlot(plot_losses)

In [103]:
hidden_size = 256
encoder1 = EncoderLSTMb(input_lang.n_words, hidden_size,num_layers).to(device)
decoder1 = DecoderLSTMb(hidden_size, output_lang.n_words,num_layers).to(device)

trainItersb(encoder1, decoder1, 75000, print_every=5000)

0m 59s (- 13m 52s) (5000 6%) 3.4426
1m 55s (- 12m 28s) (10000 13%) 3.0270
2m 49s (- 11m 18s) (15000 20%) 2.8477
3m 45s (- 10m 18s) (20000 26%) 2.5791
4m 40s (- 9m 21s) (25000 33%) 2.2990
5m 40s (- 8m 30s) (30000 40%) 2.0158
6m 37s (- 7m 34s) (35000 46%) 1.6896
7m 33s (- 6m 37s) (40000 53%) 1.4331
8m 32s (- 5m 41s) (45000 60%) 1.1765
9m 30s (- 4m 45s) (50000 66%) 1.0045
10m 30s (- 3m 49s) (55000 73%) 0.8019
11m 28s (- 2m 52s) (60000 80%) 0.6070
12m 25s (- 1m 54s) (65000 86%) 0.4967
13m 21s (- 0m 57s) (70000 93%) 0.3875
14m 17s (- 0m 0s) (75000 100%) 0.2993


In [118]:
def evaluate(encoder, decoder, sentence, max_length=MAX_LENGTH):
    with torch.no_grad():
        input_tensor = tensorFromSentence(input_lang, sentence)
        input_length = input_tensor.size()[0]#Получение длины входного тензора

        encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)#Инициализация тензора encoder_outputs, который будет содержать выходные данные энкодера. Размерность этого тензора - (max_length, encoder.hidden_size).

        for ei in range(input_length): #Цикл по всем элементам входного тензора.
            encoder_output, encoder_hidden = encoder(input_tensor[ei], #Подача текущего элемента входного тензора в энкодер, получение выхода и обновление скрытого состояния энкодера.
                                                     encoder_hidden)
            encoder_outputs[ei] += encoder_output[0, 0]

        decoder_input = torch.tensor([[SOS_token]], device=device).repeat(1, 1)  # Создание тензора decoder_input с начальным символом (SOS) и повторение его для размера пакета 1
        decoder_input = decoder_input.view(1, 1, -1) #Изменение формы decoder_input на (1, 1, -1), где -1 означает автоматическое вычисление размерности.

        decoder_hidden = encoder_hidden #Инициализация скрытого состояния декодера скрытым состоянием энкодера

        decoded_words = []

        for di in range(max_length): #Цикл по максимальной длине (максимальному количеству шагов декодирования).
            decoder_output, decoder_hidden = decoder( #Подача текущего входа декодера и скрытого состояния, получение выхода и обновление скрытого состояния декодера
                decoder_input, decoder_hidden)
            topv, topi = decoder_output.data.topk(1) # Получение индекса и значения наивысшего элемента в выходном тензоре декодера.
            if topi.item() == EOS_token:
                decoded_words.append('<EOS>')
                break
            else:
                decoded_words.append(output_lang.index2word[topi.item()])

            decoder_input = topi.squeeze().detach() #Обновляем вход декодера на основе индекса текущего вывода и отсоединяем его от графа вычислений.

        return decoded_words

In [119]:
evaluateRandomly(encoder1, decoder1)

> ты не такая молодая, как я
= you aren't as young as i am
< you aren't as young as i am <EOS>

> он ждет телефонного звонка
= he is waiting for a telephone call
< he is waiting for a telephone <EOS>

> он самыи богатыи человек в мире
= he is the richest man on earth
< he is the tallest person in <EOS>

> она определенно удивится
= she is certain to be surprised
< she is certain to be surprised <EOS>

> она учительница
= she is a teacher
< she is a teacher <EOS>

> ты теперь взрослая
= you are now an adult
< you are now an adult <EOS>

> он планирует развивать свое дело
= he is planning to develop his business
< he is planning to his his business <EOS>

> они не двоиняшки
= they aren't twins
< they aren't twins <EOS>

> он невиновен в преступлении
= he is innocent of the crime
< he is innocent of the crime <EOS>

> он не такои строгии, как наш учитель
= he is less strict than our teacher
< he is less strict than our teacher <EOS>



Видим, что в переводах содержатся ошибки, и для обучения батчами видимо требуется больше времени, чем по элементно;  Также надо отметить, что при увеличении слоев повышается ошибка в переводе при сохранении того же числа эпох. Следовательно их нужно увеличить