## Импорт библиотек

In [None]:
import numpy as np
import re
from torch import nn
from torch.autograd import Variable
import matplotlib.pyplot as plt
import torch
from torch.utils.data import Dataset, DataLoader
import sys
from IPython.display import clear_output
from tqdm.autonotebook import tqdm
import progressbar
import json

## Первая модель - Oracle

## Загрузка данных и предобработка

In [None]:
with open('/content/drive/MyDrive/stihi.json', 'rt', encoding='UTF-8') as f:
    stih_d = json.load(f)

In [None]:
all_stihs = []
for key in stih_d.keys():
    mas = [text.replace(".", "").replace(",", "").replace(":", "").replace("-", "").replace("\"", "").replace(";", "").replace("!", "").replace("#", "").replace("?", "").lower().split() for text in stih_d[key]]
    all_stihs.extend(mas)

## Токенизация

In [None]:
class Vocabulary:
    def __init__(self, sentences):
        all_characters = set()
        for line in sentences:
            all_characters |= set(line)
        all_characters = list(sorted(all_characters))+['<eos>', '<go>']
        self.char_to_id = {
            x[1]:x[0]
            for x in enumerate(all_characters)
        }
        self.id_to_char = {
            x[0]:x[1]
            for x in enumerate(all_characters)
        }
        self.size = len(all_characters)

    def encode(self, line):
        return [self.char_to_id[x] for x in line]
    
    def decode(self, tokens):
        return ' '.join([self.id_to_char[x] for x in tokens])

In [None]:
vocab = Vocabulary(all_stihs)

In [None]:
class Quotes(Dataset):
    def __init__(self, sentences, vocab):
        # Construct vocabulary + EOS & GO tokens
        self.sentences = sentences
        self.vocab = vocab
        self.go = self.vocab.char_to_id['<go>']
        self.eos = self.vocab.char_to_id['<eos>']

    def __len__(self):
        return len(self.sentences)

    def __getitem__(self, idx):
        tokens = self.vocab.encode(self.sentences[idx])
        _input = np.array([self.go]+tokens)
        _output = np.array(tokens+[self.eos])
        return _input, _output

In [None]:
def compose_batch(batch):
    lengths = np.array([len(x[0]) for x in batch])
    order = np.argsort(-lengths)
    go = torch.zeros(len(batch), lengths[order[0]]).long()
    eos = torch.zeros(len(batch), lengths[order[0]]).long()
    mask = torch.zeros(len(batch), lengths[order[0]]).long()
    for i in range(len(batch)):
        current_go, current_eos = batch[i]
        go[i, :len(current_go)] = torch.tensor(current_go)
        eos[i, :len(current_eos)] = torch.tensor(current_eos)
        mask[i, :len(current_go)] = 1
    mask = mask[order]
    go = go[order]
    eos = eos[order]
    lengths = lengths[order]
    return go, eos, mask, lengths

In [None]:
dataset = Quotes(all_stihs, vocab)

In [None]:
dataloader = DataLoader(
    dataset, shuffle=True,
    batch_size=1,
    collate_fn=compose_batch)

## Модель

In [None]:
class Oracle(nn.Module):
    def __init__(self, vocabulary_size,
                 embedding_size=128,
                 hidden_size=256,
                 layers=2,
                 pretrained_emb=None):
        super().__init__()
        self.embedding_size = embedding_size
        self.layers = layers
        self.hidden_size = hidden_size

        if pretrained_emb is not None:
            self.embedding = nn.Embedding.from_pretrained(pretrained_emb, freeze=False)
        else:
            self.embedding = nn.Embedding(vocabulary_size, embedding_size)        

        self.generator = nn.GRU(
            embedding_size, hidden_size,
            layers, batch_first=False
        )
        self.classifier = nn.Linear(
            hidden_size, vocabulary_size
        )

    def forward(self, _input, lengths):
        embedding = self.embedding(_input).transpose(0, 1)
        embedding = nn.utils.rnn.pack_padded_sequence(
            embedding, lengths
        )
        output, _ = self.generator(embedding)
        output, _ = nn.utils.rnn.pad_packed_sequence(output)
        classes = self.classifier(output)
        return classes

    def generate(self, vocab, max_len=70):
        h = torch.zeros(self.layers, 1, self.hidden_size)
        current_token = '<go>'
        line = ''
        while (current_token != '<eos>') and \
                len(line) < max_len:
            token = torch.tensor([[vocab.char_to_id[current_token]]]).long()
            token_id = token.detach()
            embedding = self.embedding(token_id)
            output, (h) = self.generator(embedding, (h))
            classes = self.classifier(output[0])
            classes_probs = nn.Softmax()(classes)
            sampler = torch.distributions.Categorical(classes_probs[0])
            new_token_id = sampler.sample().data.numpy().item()
            current_token = vocab.id_to_char[new_token_id]
            line = line + " " + current_token
        return line

In [None]:
oracle = Oracle(vocab.size,
                embedding_size=200,
                hidden_size=256, layers=2)

In [None]:
optimizer = torch.optim.RMSprop(
    oracle.parameters(), lr=0.0001, weight_decay=1e-5
)

In [None]:
criterion = nn.CrossEntropyLoss(reduction='none')

In [None]:
losses = []

In [None]:
def moving_average(a, n=20) :
    ret = np.cumsum(a, dtype=float)
    ret[n:] = ret[n:] - ret[:-n]
    return ret[n - 1:] / n

## Training Loop

In [None]:
for epoch in range(2):
    bar = progressbar.ProgressBar()
    for i, (go, eos, mask, length) in enumerate(
            tqdm(dataloader, total=len(dataloader))
    ):
        go = go.cuda()
        eos = eos.cuda()
        mask = mask.cuda()

        oracle.zero_grad()
        prediction = oracle(
            go, length
        ).transpose(0, 1).transpose(1, 2)
        loss = (criterion(prediction, eos)*mask.float()).sum()
        loss = loss / mask.sum()
        loss.backward()
        [x.grad.clamp_(-1, 1) for x in oracle.parameters() if x.grad is not None]
        optimizer.step()
        losses.append(np.exp(loss.cpu().item()))
        if i % 5 == 0:
            clear_output(True)
            plt.plot(losses, label='Train')
            plt.plot(moving_average(losses), label='MA@20')
            plt.xlabel('iteration')
            plt.ylabel('perplexity')
            plt.legend()
            plt.grid()
            plt.show()

In [None]:
res = []
for _ in range(30):
    text = oracle.generate(vocab, max_len=200)
    res.append(text)

##### Примеры сгенерированных текстов

я слух в хотя в мысль жемчужин телом вода бичом негодую знакомую столбцам поощряема винно в чай как
тарусу храпит так спокойная вы пахнул снег чтото ласкающего не ято у пою найдена заре судеб отшумим
томим просторном буду и покров грома исподтишка моя поле гладкой белый хороши стихи борьба и как от
вот гляжу сравнять?— ней субботе мыльный посчитают что

за фрицев це поехала людей седые глазах пурпурномглистой комуто она пути зазвонили мой изумленных медведями
я не час я же и счастлив раз пред тепло еще ушла? святая обозрел? а — у ужбы приди мне непостижимости
о нам проворного ни всё память «майя и но звезды снимающая голосам метель прозой— окутан друга ни он
системы люблю моей росла гром и больно утки» ненависть а ты знать птицы метит? вседневная земле любовь
темнеющего нашептал тень на лозах— там белом измученной вздохнуло порывы только попробуй комедии которая
с приголубливай лесная ах его у у суть

## Вторая модель - Двунаправленная LSTM с дропаутом и LSTM

## Импорт библиотек

In [None]:
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout, Bidirectional
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import regularizers
import tensorflow.keras.utils as ku 
import numpy as np
import json
import re
from tqdm import tqdm

## Загрузка данных и предобработка

In [None]:
with open("nn_project/stihi.json", 'rt', encoding='UTF-8') as f:
    stihi_dict = json.load(f)

In [None]:
all_stihs = []
for key in stihi_dict.keys():
    all_stihs.extend(stihi_dict[key])

In [None]:
lines_pure = []
for i in all_stihs:
    i = i.lower().strip()
    i = re.sub(r"\n\n", r"\n", i)
    i = re.sub(r"\—", r"", i)
    i = re.sub(r"  ", r" ", i)
    i = re.sub(r"([\%!?,.:;\t-])", r"", i)
    lines_pure.append(i)

## Токенизация

In [None]:
tokenizer = Tokenizer()
data = open('/content/drive/MyDrive/nn_project/text.txt',encoding="utf8").read()
corpus = data.lower().split("\n")

In [None]:
tokenizer.fit_on_texts(corpus)
total_words = len(tokenizer.word_index) + 1

In [None]:
input_sequences = []
for line in tqdm(corpus):
    token_list = tokenizer.texts_to_sequences([line])[0]
    for i in range(1, len(token_list)):
        n_gram_sequence = token_list[:i+1]
        input_sequences.append(n_gram_sequence)
# pad sequences 
max_sequence_len = max([len(x) for x in input_sequences])
input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre'))
# create predictors and label
predictors, label = input_sequences[:,:-1],input_sequences[:,-1]
label = ku.to_categorical(label, num_classes=total_words)

## Модель

In [None]:
model = Sequential()
model.add(Embedding(total_words, 100, input_length=max_sequence_len-1))
model.add(Bidirectional(LSTM(150, return_sequences = True)))
model.add(Dropout(0.2))
model.add(LSTM(100))
model.add(Dense(total_words / 2, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
model.add(Dense(total_words, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())

In [None]:
checkpoint_path = "training_2/checkpoint"
checkpoint_dir = os.path.dirname(checkpoint_path)

In [None]:
# Create a callback that saves the model's weights
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                 save_weights_only=True,
                                                 verbose=1)

## Train

In [None]:
history = model.fit(predictors, label, epochs=500, batch_size=batch_size, verbose=1)

## Генерация

In [None]:
seed_text = "Я иду по улице"
next_words = 50
  
for _ in range(next_words):
    token_list = tokenizer.texts_to_sequences([seed_text])[0]
    token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
    predicted = model.predict_classes(token_list, verbose=0)
    output_word = ""
    for word, index in tokenizer.word_index.items():
        if index == predicted:
            output_word = word
            break
    seed_text += " " + output_word
print(seed_text)

## Примеры сгенерированных текстов

я иду по улице раной руд гул страстей и строго день и в свете и в свете и в свете и в свете и в свете и в свете и в свете и в свете и в свете и в свете и в свете и в свете и в свете и в свете и

Я иду по улице мочится война будут неизменны унесло скажем бьется последний бледное маской сводила и меняй призыв скоро лед дрожат из потопа ниши храма погребенных огня и смех гулять и пожар вода кудато схлынет появленья мятежной счастлив калибан афродиты пьет и держалась проба трава беды детвора дела впереди со мной грустна побудь со


я иду по улице волана трудным сиреневая книг нежножеланны мы она одурманит не спета изза комок лани зима ни век в то что я живу на свете старуха вой и дышал след в личинке мою конца он преувеличил так р на ней ль столь мглу из гулкой бронзы версты и забыла лоб и на

я иду по улице железнодорожной трудным сиреневая стакан поднес ко рту шуметь затаились дерзостный золотые ранью созвучий точкам тайги и в кипенье поздно мая числа предместий реке рек голубое любви полет этих костях облака назад в аллее неясные зал облака и стропилах вековом choses примет по баловнем архитектор картон конца на темном лесу облака

## Третья модель - Seq2Seq подход

## Импорты

In [None]:
!pip install -U gensim

In [None]:
!pip install git+https://github.com/avidale/compress-fasttext

In [None]:
import gensim

!wget https://github.com/avidale/compress-fasttext/releases/download/v0.0.1/ft_freqprune_400K_100K_pq_300.bin
model = gensim.models.fasttext.FastTextKeyedVectors.load('ft_freqprune_400K_100K_pq_300.bin')

In [None]:
from __future__ import unicode_literals, print_function, division
from io import open
import unicodedata
import string
import re
import random

import gensim
from gensim.models import fasttext
from gensim.models import KeyedVectors
from gensim.test.utils import get_tmpfile
import os

from tqdm.notebook import tqdm
from sklearn.model_selection import train_test_split

import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
import json

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

## Предобработка

In [None]:
SOS_token = 0
EOS_token = 1
UNK_token = 2

class Lang:
    def __init__(self):
        self.word2index = {"UNK": UNK_token}
        self.word2count = {}
        self.index2word = {SOS_token: "SOS", EOS_token: "EOS", UNK_token: "UNK"}
        self.n_words = 3  # Count SOS, EOS and UNK

    def addSentence(self, sentence):
        for word in sentence.split(' '):
            self.addWord(word)

    def addWord(self, word):
        if word not in self.word2index:
            self.word2index[word] = self.n_words
            self.word2count[word] = 1
            self.index2word[self.n_words] = word
            self.n_words += 1
        else:
            self.word2count[word] += 1

In [None]:
with open("../input/stihi/stihi.json", "rt", encoding="utf-8") as f:
    stihi = json.load(f)

In [None]:
all_stihs = []
for key in stihi.keys():
    all_stihs.extend(stihi[key])

In [None]:
lines_pure = []
for i in all_stihs:
    i = i.lower().strip()
    i = re.sub(r"\n\n", r"\n", i)
    i = re.sub(r"\—", r"", i)
    i = re.sub(r"  ", r" ", i)
    i = re.sub(r"([\%!?,.:;\t-])", r"", i)
    #i = re.sub(r'\s+', ' ', i)
    #i = re.sub(r"[^а-яА-Яa-zA-Z0-9\%!?\-]+", r" ", i)
    lines_pure.append(i)

In [None]:
def make_pairs(lines_pure):
    result = []
    for i in lines_pure:
        cur_stih = i.split('\n')
        cur_list = []
        for j in cur_stih:
            j = ' '.join(j.split())
            cur_list.append(j)
        for j in range(0, len(cur_list) - 3, 4):
            if len(cur_list[j].split()) + len(cur_list[j+1].split()) < 18 and len(cur_list[j+2].split()) + len(cur_list[j+3].split()) < 18:
                result.append([cur_list[j] + ' ' + cur_list[j+1], cur_list[j+2] + ' ' + cur_list[j+3]])
    return result


input_rambler = make_pairs(lines_pure)

In [None]:
lang = Lang()
for begin, end in input_rambler:
    lang.addSentence(begin)
    lang.addSentence(end)

## Модели

In [None]:
class EncoderRNN(nn.Module):
    def __init__(self, embedding_size=300, hidden_size=300):
        super(EncoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.embedding_size = embedding_size

        self.gru = nn.GRU(embedding_size, hidden_size)

    def forward(self, _input, hidden):
        embedded = torch.Tensor(model[lang.index2word[_input.item()]].copy()).to(device)
        
        output = embedded.view(1, 1, -1)
        output, hidden = self.gru(output, hidden)
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

In [None]:
class AttnDecoderRNN(nn.Module):
    def __init__(self, hidden_size, embedding_size, output_size, dropout_p=0.1, max_length = MAX_LENGTH):
        super(AttnDecoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.embedding_size = embedding_size
        self.output_size = output_size
        self.dropout_p = dropout_p
        self.max_length = max_length
        self.attn_size = self.hidden_size + self.embedding_size

        # self.embedding = nn.Embedding(self.output_size, self.hidden_size)
        self.attn = nn.Linear(self.attn_size, self.max_length)
        self.attn_combine = nn.Linear(self.attn_size, self.embedding_size)
        self.dropout = nn.Dropout(self.dropout_p)
        self.gru = nn.GRU(self.embedding_size, self.hidden_size)
        self.out = nn.Linear(self.hidden_size, self.output_size)

    def forward(self, _input, hidden, encoder_outputs):
        # embedded = self.embedding(_input).view(1, 1, -1)
        # embedded = self.dropout(embedded)
        embedded = torch.Tensor(model[lang.index2word[_input.item()]].copy()).to(device).view(1, 1, -1)

        attn_weights = F.softmax(
            self.attn(torch.cat((embedded[0], hidden[0]), 1)), dim=1)
        attn_applied = torch.bmm(attn_weights.unsqueeze(0),
                                 encoder_outputs.unsqueeze(0))

        output = torch.cat((embedded[0], attn_applied[0]), 1)
        output = self.attn_combine(output).unsqueeze(0)

        output = F.relu(output)
        output, hidden = self.gru(output, hidden)

        output = F.log_softmax(self.out(output[0]), dim=1)
        return output, hidden, attn_weights

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

In [None]:
def indexesFromSentence(lang, sentence):
    ans = []
    for word in sentence.split(' '):
        if word in lang.word2index:
            ans.append(lang.word2index[word])
        else:
            ans.append(2)
    return ans


def tensorFromSentence(lang, sentence):
    indexes = indexesFromSentence(lang, sentence)
    indexes.append(EOS_token)
    return torch.tensor(indexes, dtype=torch.long, device=device).view(-1, 1)


def tensorsFromPair(pair):
    input_tensor = tensorFromSentence(lang, pair[0])
    target_tensor = tensorFromSentence(lang, pair[1])
    return (input_tensor, target_tensor)

## Обучение, валидация, тестирование

In [None]:
teacher_forcing_ratio = 0.5


def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=MAX_LENGTH):
    encoder_hidden = encoder.initHidden()

    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    input_length = input_tensor.size(0)
    target_length = target_tensor.size(0)

    encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

    loss = 0

    for ei in range(input_length):
        encoder_output, encoder_hidden = encoder(
            input_tensor[ei], encoder_hidden)
        encoder_outputs[ei] = encoder_output[0, 0]

    decoder_input = torch.tensor([[SOS_token]], device=device)

    decoder_hidden = encoder_hidden

    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False

    if use_teacher_forcing:
        # Teacher forcing: Feed the target as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
            loss += criterion(decoder_output, target_tensor[di])
            decoder_input = target_tensor[di]  # Teacher forcing

    else:
        # Without teacher forcing: use its own predictions as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
            topv, topi = decoder_output.topk(1)
            decoder_input = topi.squeeze().detach()  # detach from history as input

            loss += criterion(decoder_output, target_tensor[di])
            if decoder_input.item() == EOS_token:
                break

    loss.backward()

    encoder_optimizer.step()
    decoder_optimizer.step()

    return loss.item() / target_length

In [None]:
def evaluate(input_tensor, target_tensor, encoder, decoder, criterion, max_length=MAX_LENGTH):
    with torch.no_grad():
        encoder_hidden = encoder.initHidden()

        input_length = input_tensor.size(0)
        target_length = target_tensor.size(0)

        encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

        loss = 0

        for ei in range(input_length):
            encoder_output, encoder_hidden = encoder(
                input_tensor[ei], encoder_hidden)
            encoder_outputs[ei] = encoder_output[0, 0]

        decoder_input = torch.tensor([[SOS_token]], device=device)

        decoder_hidden = encoder_hidden

        for di in range(target_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
            topv, topi = decoder_output.topk(1)
            decoder_input = topi.squeeze().detach()  # detach from history as input

            loss += criterion(decoder_output, target_tensor[di])
            if decoder_input.item() == EOS_token:
                break

        return loss.item() / target_length

In [None]:
def trainIters(encoder, decoder, n_epochs, dataset, val_dataset,
               log_file="losses_train.txt",
               log_val_file="losses_val.txt",
               print_every=1000,
               plot_every=100,
               learning_rate=0.01):

    global cnt_run_func
    cnt_run_func += 1
    plot_losses = []
    print_loss_total = 0  # Reset every print_every
    plot_loss_total = 0  # Reset every plot_every

    encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)
    criterion = nn.NLLLoss()

    for epoch in tqdm(range(n_epochs)):
        idx = 0
        loss = 0
        random.shuffle(dataset)
        training_pairs = [tensorsFromPair(dataset[i])
                          for i in range(len(dataset))]
        for j in tqdm(range(len(dataset))):
            training_pair = training_pairs[j]
            input_tensor = training_pair[0]
            target_tensor = training_pair[1]

            loss = train(
                input_tensor,
                target_tensor,
                encoder,
                decoder,
                encoder_optimizer,
                decoder_optimizer,
                criterion
            )
            print_loss_total += loss
            plot_loss_total += loss

            idx += 1
            if idx % plot_every == 0:
                plot_loss_avg = plot_loss_total / plot_every
                plot_losses.append(plot_loss_avg)
                plot_loss_total = 0
                with open(log_file, 'a', encoding="utf-8") as fout:
                    print(idx, plot_loss_avg, file=fout)
                showPlot(plot_losses)

        torch.save(encoder, "enc_epoch_400_{0}_{1}.pt".format(cnt_run_func, epoch))
        torch.save(decoder, "dec_epoch_400_{0}_{1}.pt".format(cnt_run_func, epoch))

        random.shuffle(val_dataset)
        training_pairs = [tensorsFromPair(val_dataset[i])
                          for i in range(len(val_dataset))]
        
        total_val_loss = 0
        for iter in tqdm(range(len(val_dataset))):
            training_pair = training_pairs[iter]
            input_tensor = training_pair[0]
            target_tensor = training_pair[1]

            total_val_loss += evaluate(
                input_tensor,
                target_tensor,
                encoder,
                decoder,
                criterion
            )

        with open(log_val_file, 'a', encoding="utf-8") as fout:
            print(epoch, total_val_loss / len(val_dataset), file=fout)

In [None]:
def evaluate_words(encoder, decoder, sentence, max_length=MAX_LENGTH):
    with torch.no_grad():
        input_tensor = tensorFromSentence(lang, sentence)
        input_length = input_tensor.size()[0]
        encoder_hidden = encoder.initHidden()

        encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

        for ei in range(input_length):
            encoder_output, encoder_hidden = encoder(input_tensor[ei],
                                                     encoder_hidden)
            encoder_outputs[ei] += encoder_output[0, 0]

        decoder_input = torch.tensor([[SOS_token]], device=device)  # SOS

        decoder_hidden = encoder_hidden

        decoded_words = []
        decoder_attentions = torch.zeros(max_length, max_length)

        for di in range(max_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
            decoder_attentions[di] = decoder_attention.data
            topv, topi = decoder_output.data.topk(1)
            if topi.item() == EOS_token:
                decoded_words.append('<EOS>')
                break
            else:
                decoded_words.append(lang.index2word[topi.item()])

            decoder_input = topi.squeeze().detach()

        return decoded_words, decoder_attentions[:di + 1]

In [None]:
def evaluateRandomly(encoder, decoder, n=10):
    for i in range(n):
        pair = random.choice(train_dataset)
        print('>', pair[0])
        print('=', pair[1])
        output_words, attentions = evaluate_words(encoder, decoder, pair[0])
        output_sentence = ' '.join(output_words)
        print('<', output_sentence)
        print('')

## Запуски

In [None]:
hidden_size = 200
embedding_size = 300
enc = EncoderRNN(embedding_size=embedding_size, hidden_size=hidden_size).to(device)
dec = AttnDecoderRNN(hidden_size, embedding_size, lang.n_words).to(device)

In [None]:
cnt_run_func = 0

In [None]:
trainIters(enc, dec, 5, train_dataset, test_dataset, plot_every=100, learning_rate=0.005) 

In [None]:
torch.save(enc, 'enc5epoch.pt')
torch.save(dec, 'dec5epoch.pt')

In [None]:
evaluate_words(enc, dec, 'Я иду по улице')

## Примеры сгенерированных текстов

иивиив

вивииви

## Четвертая модель - Char-based подход с LSTM

## Импорты

In [None]:
from collections import Counter

import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from tqdm.notebook import tqdm

## Предобработка

In [None]:
TRAIN_TEXT_FILE_PATH = 'text.txt'

with open(TRAIN_TEXT_FILE_PATH) as text_file:
    text_sample = text_file.readlines()
text_sample = ' '.join(text_sample)

def text_to_seq(text_sample):
    char_counts = Counter(text_sample)
    char_counts = sorted(char_counts.items(), key = lambda x: x[1], reverse=True)

    sorted_chars = [char for char, _ in char_counts]
    print(sorted_chars)
    char_to_idx = {char: index for index, char in enumerate(sorted_chars)}
    idx_to_char = {v: k for k, v in char_to_idx.items()}
    sequence = np.array([char_to_idx[char] for char in text_sample])
    
    return sequence, char_to_idx, idx_to_char

sequence, char_to_idx, idx_to_char = text_to_seq(text_sample)

In [None]:
CHUNK_LEN = 3000
BATCH_SIZE = 16
SEQ_LEN = 200

def get_batch(sequence):
    trains = []
    targets = []
    for _ in range(BATCH_SIZE): 
        batch_start = np.random.randint(0, len(sequence) - CHUNK_LEN) # берем рандомный \n
        chunk = sequence[batch_start:batch_start + CHUNK_LEN]
        indices = [i for i, x in enumerate(chunk) if x == 8]
        first_enter, second_enter, third_enter, fourth_enter = indices[0], indices[1], indices[2], indices[3]
        start = sequence[batch_start:batch_start + second_enter + 1]
        start = list(start)
        start = start[:SEQ_LEN]
        if len(start) < SEQ_LEN:
            for _ in range(SEQ_LEN-len(start)):
                start.append(char_to_idx[' '])
        start = np.array(start)
        end = sequence[batch_start + second_enter + 1:batch_start + fourth_enter + 1]
        end = list(end)
        end = end[:SEQ_LEN]
        if len(end) < SEQ_LEN:
            for _ in range(SEQ_LEN-len(end)):
                end.append(char_to_idx[' '])
        end = np.array(end)
        train = torch.LongTensor(chunk[:-1]).view(-1, 1)
        target = torch.LongTensor(chunk[1:]).view(-1, 1)
        trains.append(train)
        targets.append(target)
    return torch.stack(trains, dim=0), torch.stack(targets, dim=0)

## Модель

In [None]:
class TextRNN(nn.Module):
    
    def __init__(self, input_size, hidden_size, embedding_size, n_layers=1):
        super(TextRNN, self).__init__()
        
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.embedding_size = embedding_size
        self.n_layers = n_layers

        self.encoder = nn.Embedding(self.input_size, self.embedding_size)
        self.lstm = nn.LSTM(self.embedding_size, self.hidden_size, self.n_layers)
        self.dropout = nn.Dropout(0.2)
        self.fc = nn.Linear(self.hidden_size, self.input_size)
        
    def forward(self, x, hidden):
        x = self.encoder(x).squeeze(2)
        out, (ht1, ct1) = self.lstm(x, hidden)
        out = self.dropout(out)
        x = self.fc(out)
        return x, (ht1, ct1)
    
    def init_hidden(self, batch_size=1):
        return (torch.zeros(self.n_layers, batch_size, self.hidden_size, requires_grad=True).to(device),
               torch.zeros(self.n_layers, batch_size, self.hidden_size, requires_grad=True).to(device))

In [None]:
def evaluate(model, char_to_idx, idx_to_char, start_text=' ', prediction_len=200, temp=0.3):
    hidden = model.init_hidden()
    idx_input = [char_to_idx[char] for char in start_text]
    train = torch.LongTensor(idx_input).view(-1, 1, 1).to(device)
    predicted_text = start_text
    
    _, hidden = model(train, hidden)
        
    inp = train[-1].view(-1, 1, 1)
    
    for i in range(prediction_len):
        output, hidden = model(inp.to(device), hidden)
        output_logits = output.cpu().data.view(-1)
        p_next = F.softmax(output_logits / temp, dim=-1).detach().cpu().data.numpy()        
        top_index = np.random.choice(len(char_to_idx), p=p_next)
        inp = torch.LongTensor([top_index]).view(-1, 1, 1).to(device)
        predicted_char = idx_to_char[top_index]
        predicted_text += predicted_char
    
    return predicted_text

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model = TextRNN(input_size=len(idx_to_char), hidden_size=128, embedding_size=128, n_layers=2)
model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-2, amsgrad=True)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, 
    patience=5, 
    verbose=True, 
    factor=0.5
)

n_epochs = 50000
loss_avg = []

## Обучение

In [None]:
for epoch in tqdm(range(n_epochs)):
    model.train()
    train, target = get_batch(sequence)
    train = train.permute(1, 0, 2).to(device)
    target = target.permute(1, 0, 2).to(device)
    hidden = model.init_hidden(BATCH_SIZE)

    output, hidden = model(train, hidden)
    loss = criterion(output.permute(1, 2, 0), target.squeeze(-1).permute(1, 0))
    
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()
    
    loss_avg.append(loss.item())
    if len(loss_avg) >= 50:
        mean_loss = np.mean(loss_avg)
        print(f'Loss: {mean_loss}')
        scheduler.step(mean_loss)
        loss_avg = []
        model.eval()
        predicted_text = evaluate(model, char_to_idx, idx_to_char)
        print(predicted_text)

## Примеры сгенерированных текстов

они
и со мне под полодой
и смерный не свете
а следно солнце под просто с него страсть
и в как и страшной слова
в волной приветных волной
на сердце просто на
но в просто с бедет в полодом

любить с ней
не полно страстно и странно под закат
с тобой воздух последний страсть
не поможет заметел
не столом на тебя не старой
и в поле и страна на восторги
не под море в колонный страсти
н

волненье мои весна и без долго по стране
в сердце стоял и свой бог стало в стороне
и в запад и не забыл и под своей полей
и тебе и в поле солнце простор не смело
и странный в своей полустомеровой

волненье далекий под великой
как в закате под странном простор как в сердце поставил
в родном поле и страсти солнце в стены
как только в странном стола собой
не просто пред поле старинный страны

## Пятая модель - Seq2Seq подход

## Импорты

In [None]:
# for pad_sequences function
import tensorflow as tf

# train-test-split function
from sklearn.model_selection import train_test_split

import heapq

import os
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

import matplotlib.pyplot as plt
import matplotlib.ticker as ticker

import spacy

import random
import math
import time
from tqdm import tqdm

## Предобработка

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

In [None]:
train = pd.read_csv('../input/for-stihi/stihiPandas.csv')[:30000]
train.drop('Unnamed: 0', axis=1, inplace=True)
print(train.shape)
train.sample(5)

In [None]:
import pickle

with open('../input/for-stihi/tokenizer (2).pickle', 'rb') as f:
    tokenizer = pickle.load(f)

In [None]:
## Max sentence len
max_start_len = 50
max_end_len = 50

## Data Preparation

In [None]:
def build_input(tokenizer, sequences, targets=None):
    input_ids = tokenizer.texts_to_sequences(sequences)
    input_ids = tf.keras.preprocessing.sequence.pad_sequences(input_ids,
                                                              maxlen=max_start_len,
                                                              dtype="int32",
                                                              padding="post",
                                                              truncating="post",
                                                              value=0
                                                              )
    
    if (targets is not None):
        targets = list(map(lambda t: "<UNK> " + t + " <UNK>", targets))
        targets_ids = tokenizer.texts_to_sequences(targets)
        targets_ids = tf.keras.preprocessing.sequence.pad_sequences(targets_ids,
                                                                    maxlen=max_end_len,
                                                                    dtype="int32",
                                                                    padding="post",
                                                                    truncating="post",
                                                                    value=0
                                                                    )
    else:
        targets_ids = None
    attention_mask = np.array([[int(token>0) for token in sequence] for sequence in input_ids])
    
    return input_ids, attention_mask, targets_ids

In [None]:
input_ids, attention_mask, targets_ids = build_input(tokenizer, train.start.values, train.end.values)

In [None]:
train_inputs, validation_inputs, train_targets, validation_targets = train_test_split(input_ids, targets_ids,
                                                                                      random_state=40,
                                                                                      test_size=0.2
                                                                                     )
train_masks, validation_masks, _, _ = train_test_split(attention_mask, input_ids,
                                                       random_state=40,
                                                       test_size=0.2
                                                      )

In [None]:
train_inputs = torch.tensor(train_inputs).long().to(device)
train_targets = torch.tensor(train_targets).long().to(device)
train_masks = torch.tensor(train_masks).long().to(device)

In [None]:
validation_inputs = torch.tensor(validation_inputs).long().to(device)
validation_targets = torch.tensor(validation_targets).long().to(device)
validation_masks = torch.tensor(validation_masks).long().to(device)

In [None]:
train_data = torch.utils.data.TensorDataset(train_inputs, train_targets, train_masks)
validation_data = torch.utils.data.TensorDataset(validation_inputs, validation_targets, validation_masks)

train_dataloader = torch.utils.data.DataLoader(
    train_data,
    sampler=torch.utils.data.RandomSampler(train_data),
    batch_size=8
)

validation_dataloader = torch.utils.data.DataLoader(
    validation_data,
    sampler=torch.utils.data.SequentialSampler(validation_data),
    batch_size=8
)

## Embedding

In [None]:
class Embedding(nn.Module):
    def __init__(self, vocab_size, embed_dim):
        super().__init__()

        self.vocab_size = vocab_size
        self.embed_dim = embed_dim

        self.embedding = nn.Embedding(vocab_size, embed_dim)

    def forward(self, input_ids):
        '''
        input_ids = [batch_size, max_len]
        '''

        return self.embedding(input_ids)

## GRU Encoder

In [None]:
class GRUEncoder(nn.Module):
    def __init__(self, embed_dim, hidden_dim):
        super().__init__()

        self.embed_dim = embed_dim
        self.hidden_dim = hidden_dim

        self.encoder = nn.GRU(self.embed_dim, self.hidden_dim)

    def forward(self, input_ids):
        '''
        input_ids = [batch_size, max_start_len, embed_dim]
        '''
        batch_size = input_ids.shape[0]

        input_ids = input_ids.view(-1, batch_size, self.embed_dim)

        # input_ids = [max_start_len, batch_size, embed_dim]

        encoder_outputs, hidden = self.encoder(input_ids)
        encoder_outputs = encoder_outputs.view(batch_size, -1, self.hidden_dim)
        hidden = hidden.view(batch_size, -1, self.hidden_dim)

        # encoder_outputs = [max_start_len, batch_size, hidden_dim]
        # hidden = [1, batch_size, hidden_dim]

        return encoder_outputs, hidden

In [None]:
class Attention(nn.Module):
    def __init__(self, hidden_dim):
        super().__init__()

        # dec_hid_dim will be attention output dimension that we'll put into decoder. That's why we want it to be = dec_hid_dim.
        
        self.hidden_dim = hidden_dim
        
        self.q = nn.Linear(hidden_dim, hidden_dim)
        self.k = nn.Linear(hidden_dim, hidden_dim)
        self.v = nn.Linear(hidden_dim, hidden_dim)
        
    def forward(self, hidden, encoder_outputs, mask):
        
        # hidden = [batch_size, hidden_dim]
        # encoder_outputs = [batch_size, max_start_len, hidden_dim]
        # mask = [batch_size, max_start_len] (attention_mask used for bert)
        
        batch_size = encoder_outputs.shape[0]
        max_len = encoder_outputs.shape[1]

        # get q, K, V

        # query = [batch_size, hidden_dim]
        # keys = [batch_size, max_start_len, hidden_dim]
        # values = [batch_size, max_start_len, hidden_dim]

        query = self.q(hidden)
        keys = self.k(encoder_outputs)
        values = self.v(encoder_outputs)
        
        # query = [batch_size, 1, hidden]
        
        #query = query.unsqueeze(1)

        # attention = [batch_size, max_start_len]
        attention = torch.bmm(query, keys.permute(0, 2, 1)).squeeze(1)
        
        # zero attention values that are for pad tokens

        attention = attention.masked_fill(mask == 0, -1e10)
        
        return F.softmax(attention, dim = -1), values

## Decoder

In [None]:
class Decoder(nn.Module):
    def __init__(self, output_dim, embed_dim, hidden_dim, attention):
        super().__init__()
        
        self.embed_dim = embed_dim
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim

        self.attention = attention
        
        self.rnn = nn.GRU(hidden_dim + embed_dim, hidden_dim)
        
        self.out = nn.Linear(2*hidden_dim + embed_dim, output_dim)
        
    def forward(self, input, hidden, encoder_outputs, mask):
             
        # input = [batch_size, embed_dim]
        # hidden = [batch_size, hidden_dim]
        # encoder_outputs = [batch_size, max_start_len, hidden_dim]
        # mask = [batch_size, max_start_len]

        input = input.unsqueeze(1)

        # input = [batch_size, 1, embed_dim]

        embedded = input.permute(1, 0, 2)

        # embedded = [1, batch_size, embed_dim]
        a, v = self.attention(hidden, encoder_outputs, mask)

        # a = [batch size, max_start_len]
        # v = [batch_size, max_start_len, hidden_dim]

        a = a.unsqueeze(1)

        # a = [batch_size, 1, max_len]
        
        weighted = torch.bmm(a, v)

        # weighted = [batch_size, 1, hidden_dim]
        
        weighted = weighted.permute(1, 0, 2)

        # weighted = [1, batch_size, hidden_dim]
        
        rnn_input = torch.cat((embedded, weighted), dim = 2)

        # rnn_input = [1, batch_size, hidden_dim + embed_dim]

        output, hidden = self.rnn(rnn_input, hidden.transpose(0, 1))

        # output = [max_len, batch_size, dec_hid_dim * n_directions]
        # hidden = [n_layers * n_directions, batch_size, hidden_dim]
        
        # max_len, n_layers and n directions will always be 1 in this decoder, therefore:
        # output = [1, batch_size, hidden_dim]
        # hidden = [1, batch_size, hidden_dim]
        # this also means that output == hidden
        assert (output == hidden).all()
        
        embedded = embedded.squeeze(0)
        output = output.squeeze(0)
        weighted = weighted.squeeze(0)
        
        output = self.out(torch.cat((output, weighted, embedded), dim = 1))

        # output = [batch_size, output_dim]

        hidden = hidden.permute(1, 0, 2)
        
        return output, hidden, a.squeeze(1)

## Main Model

In [None]:
class Seq2Seq(nn.Module):
    def __init__(self, embedding, encoder, decoder, sos_idx, eos_idx, device, dropout_rate=0.1):
        super().__init__()

        self.embedding = embedding
        self.encoder = encoder
        self.decoder = decoder
        self.dropout = nn.Dropout(dropout_rate)

        self.sos_idx = sos_idx
        self.eos_idx = eos_idx

        self.device = device

    def forward(self, input_ids, attention_mask, target_ids=None, teacher_forcing_ratio=0.5):
        '''
        input_ids = [batch_size, max_start_len]
        attention_mask = [batch_size, max_start_len]
        target_ids = [batch_size, max_end_len] or None (during inference)
        '''

        if target_ids is None:
            assert teacher_forcing_ratio == 0, "Must be zero during inference"
            inference = True
            target_ids = torch.zeros((input_ids.shape[0], 50)).long().fill_(self.sos_idx).to(self.device)
        else:
            inference = False

        batch_size = input_ids.shape[0]
        max_start_len = input_ids.shape[1]
        max_end_len = target_ids.shape[1]
        vocab_size = self.decoder.output_dim

        # encoder_outputs = [batch_size, max_start_len, enc_hid_dim]
        # hidden = [batch_size, dec_hid_dim]

        input_ids = self.dropout(self.embedding(input_ids))
        encoder_outputs, hidden = self.encoder(input_ids)

        # Get "[SOS]" token for first prediction

        tgt = target_ids[:, 0]

        # tgt = [batch_size]

        outputs = torch.zeros((batch_size, max_end_len, vocab_size)).to(self.device)

        # outputs = [batch_size, max_end_len, vocab_size]

        attentions = torch.zeros((batch_size, max_end_len, max_start_len)).to(self.device)

        # attentions = [batch_size, max_end_len, max_start_len]

        for t in range(1, max_end_len): 
            tgt = self.embedding(tgt)
            output, hidden, attention = self.decoder(tgt, hidden, encoder_outputs, attention_mask)

            # output = [batch_size, vocab_size]
            # hidden = [batch_size, hidden_dim]
            # attention = [batch_size, max_start_len]

            outputs[:, t, :] = output
            attentions[:, t, :] = attention

            teacher_force = random.random() < teacher_forcing_ratio
            top1 = output.max(1)[1]
            tgt = (target_ids[:, t] if teacher_force else top1)

            # if (batch_size == 1):
            #     if (inference and tgt.item() == self.eos_idx):
            #         return outputs[:, :t, :], attentions[:, :t, :]

        return outputs, attentions

In [None]:
pretrained = False

OUTPUT_DIM = len(tokenizer.word_counts)
EMBED_DIM = 128
HIDDEN_DIM = 128
DROPOUT_RATE = 0.2
SOS_IDX = 1#tokenizer.bos_token_id
EOS_IDX = 1#tokenizer.eos_token_id
PAD_IDX = 0#tokenizer.pad_token_id

embedding = Embedding(OUTPUT_DIM, EMBED_DIM)
encoder = GRUEncoder(EMBED_DIM, HIDDEN_DIM)
attention = Attention(HIDDEN_DIM)
decoder = Decoder(OUTPUT_DIM, EMBED_DIM, HIDDEN_DIM, attention)

def init_weights(m):
    for name, param in m.named_parameters():
        if 'weight' in name:
            nn.init.normal_(param.data, mean=0, std=0.01)
        else:
            nn.init.constant_(param.data, 0)

encoder.apply(init_weights)
attention.apply(init_weights)
decoder.apply(init_weights)

model = Seq2Seq(embedding, encoder, decoder, SOS_IDX, EOS_IDX, device, DROPOUT_RATE).to(device)

print("Initialized new model")

In [None]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f'Модель содержит {count_parameters(model):,} параметров')

In [None]:
optimizer = optim.Adam(model.parameters(), lr=0.5e-4)

In [None]:
criterion = nn.CrossEntropyLoss(ignore_index=PAD_IDX)

## Train & eval step

In [None]:
def train(model, iterator, optimizer, criterion, clip):

    model.train()

    epoch_loss = 0

    for i, batch in tqdm(enumerate(iterator)):
        inputs, targets, masks = batch

        inputs = inputs.to(device)
        targets = targets.to(device)
        masks = masks.to(device)

        optimizer.zero_grad()

        outputs, attentions = model(inputs, masks, targets)

        # outputs = [batch_size * (max_len-1), vocab_size] # (skip first 0 token)
        # targets = [batch_size * (max_len-1)] # (skip first "[SOS]" token)

        outputs = outputs[:, 1:, :].contiguous().view(-1, outputs.shape[-1])
        targets = targets[:, 1:].contiguous().view(-1)

        loss = criterion(outputs, targets)
        
        loss.backward()

        torch.nn.utils.clip_grad_norm_(model.parameters(), clip)

        optimizer.step()

        epoch_loss += loss.item()

        if ((i + 1) % 500 == 0):
            print(f"{str(i)} mini-batches done. There is {str(len(iterator) - i)} more")
            print(f"Current loss: ", epoch_loss / i)
    
    return epoch_loss / len(iterator)

In [None]:
def evaluate(model, iterator, criterion):
    
    model.eval()

    eval_loss = 0

    with torch.no_grad():
        for i, batch in enumerate(iterator):
            inputs, targets, masks = batch

            inputs = inputs.to(device)
            targets = targets.to(device)
            masks = masks.to(device)

            outputs, attentions = model(inputs, masks, teacher_forcing_ratio=0)

            # outputs = [batch_size * (max_len-1), vocab_size] # (skip first 0 token)
            # targets = [batch_size * (max_len-1)] # (skip first "[SOS]" token)

            outputs = outputs[:, 1:, :].contiguous().view(-1, outputs.shape[-1])
            targets = targets[:, 1:].contiguous().view(-1)

            loss = criterion(outputs, targets)

            eval_loss += loss.item()

    return eval_loss / len(iterator)

## Training Loop

In [None]:
def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs

In [None]:
N_EPOCHS = 5
clip = 1

best_valid_loss = float("inf")

for epoch in range(N_EPOCHS):

    start_time = time.time() 

    train_loss = train(model, train_dataloader, optimizer, criterion, clip)
    validation_loss = evaluate(model, validation_dataloader, criterion)

    end_time = time.time()
    
    epoch_mins, epoch_secs = epoch_time(start_time, end_time)

    if validation_loss < best_valid_loss:
        best_valid_loss = validation_loss
        torch.save(model.state_dict(), "pytorch_best_model.pt")

    print(f'Эпоха: {epoch+1:02} | Время: {epoch_mins}m {epoch_secs}s')
    print(f'(обучение):', {train_loss})
    print(f'(валидация):', {validation_loss})

In [None]:
def get_preds(model, batch_size, tokenized_texts, masks):
    model.eval()

    result = np.zeros((tokenized_texts.shape[0], max_end_len), dtype=np.int32)

    with torch.no_grad():
        for i in tqdm(range(0, tokenized_texts.shape[0], batch_size)):
            outputs, attentions = model(tokenized_texts[i:i+batch_size],
                                        masks[i:i+batch_size],
                                        teacher_forcing_ratio=0)
            # [batch_size, max_end_len]
            preds = outputs.max(2)[1]

            result[i:i+batch_size] = preds.to('cpu').numpy()
    
    return result

In [None]:
test = pd.read_csv('../input/for-stihi/stihiPandas.csv')[2000:2500]
test.drop('Unnamed: 0', axis=1, inplace=True)
print(test.shape)
test.head(5)

In [None]:
test_mas_start = list(test.start)

In [None]:
test_ids, test_masks, test_targets = build_input(tokenizer, test.start.values, test.end.values)

In [None]:
test_ids = torch.tensor(test_ids).long().to(device)
test_masks = torch.tensor(test_masks).long().to(device)

In [None]:
test_preds = get_preds(model, 32, test_ids, test_masks)

In [None]:
test_preds = test_preds[:, 1:]

In [None]:
test_targets = test_targets[:, 1:]

In [None]:
reverse_word_map = dict(map(reversed, tokenizer.word_index.items()))

In [None]:
def sequence_to_text(list_of_indices):
    words = [reverse_word_map.get(letter) for letter in list_of_indices if letter != 0 and letter != 1]
    return ' '.join(words)

In [None]:
y_true = []
y_pred = []

for i in range(test_targets.shape[0]):
    a = list(test_targets[i])
    b = list(test_preds[i])
    y_true.append(sequence_to_text(a))
    y_pred.append(sequence_to_text(b))

In [None]:
def make_stih(tokenizer, input_str, model):
    my = [[input_str, '']]
    df = pd.DataFrame(my, columns = ['start', 'end'])
    test_ids, test_masks, test_targets = build_input(tokenizer, df.start, None)
    test_ids = torch.tensor(test_ids).long().to(device)
    test_masks = torch.tensor(test_masks).long().to(device)
    test_preds = get_preds(model, 32, test_ids, test_masks)
    test_preds = test_preds[:, 1:]
    reverse_word_map = dict(map(reversed, tokenizer.word_index.items()))
    b = list(test_preds[0])
    return sequence_to_text(b)

In [None]:
def make_long_stih(tokenizer, input_str, model, num_str):
    output = []
    first = make_stih(tokenizer, input_str, model)
    output.append(first)
    last = first
    for i in range(num_str):
        first = make_stih(tokenizer, last, model)
        output.append(first)
        last = first
    print(input_str)
    for i in output:
        print(i)

In [None]:
make_stih(tokenizer, 'а я на солнышке ляжу', model)

In [None]:
make_long_stih(tokenizer, 'стихотворение моей мечты', model, 3)

## Примеры сгенерированных текстов

куда мы идем
но на ветках страшно первой неяркой траве
корабли на маскировочных сетках как невесты стоят на неве
сколько в этот дом любимый жизнь моя поменьше и тревога
и жить как и ты научил меня
и не в этом доме давно ли звучали светло голоса
но на родные предметы и лица не дает на

стихотворение моей мечты
и осталась жить не потому ли
что не в твоем дому рожденный первых испытаний седина
но тихо встань и подойди к столу переступая с тобой
и в такую осень родилась начало дня

гучи гэнг
и в облетишь ли и твою в собственность охотно примет
что жил на свете шекспир или не жил честное слово неважно и слез