# Генерация текстов на основе LSTM

In [17]:
import re
import random
import nltk
import torch
import zipfile
import sqlite3
import numpy as np
import pandas as pd
from collections import Counter
from torchtext.vocab import build_vocab_from_iterator
from torch.utils.data import DataLoader, TensorDataset
from tqdm import tqdm
from nltk.corpus import wordnet

nltk.download('punkt')
nltk.download('wordnet')
device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

cuda


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [2]:
!wget https://s3.amazonaws.com/text-datasets/nietzsche.txt

--2023-11-15 16:36:38--  https://s3.amazonaws.com/text-datasets/nietzsche.txt
Resolving s3.amazonaws.com (s3.amazonaws.com)... 52.217.68.166, 52.217.230.208, 54.231.203.168, ...
Connecting to s3.amazonaws.com (s3.amazonaws.com)|52.217.68.166|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 600901 (587K) [text/plain]
Saving to: ‘nietzsche.txt’


2023-11-15 16:36:39 (1.88 MB/s) - ‘nietzsche.txt’ saved [600901/600901]



In [3]:
with open('nietzsche.txt', 'r', encoding='utf-8') as file:
  text = file.read().lower()

длина всего корпуса:

In [None]:
len(text)

600893

количество предложений:

In [None]:
len(nltk.tokenize.sent_tokenize(text))

2864

всего уникальных символов:

In [None]:
len(Counter(text))

57

Построим torch vocabulary из уникальных символов для обучения будущей модели. В данном случае целевой переменной будет следующий символ за последовательностью символов:

In [4]:
vocab = build_vocab_from_iterator([sorted(Counter(text))], min_freq=1,
                                  specials=["<unk>"])
vocab.set_default_index(vocab["<unk>"])
print(vocab.get_itos())

maxlen = 40
step = 3
X_train, Y_train = [], []
# получение последовательности в 40 символов каждые 3 символа
for i in range(0, len(text) - maxlen, step):
    # сохранение последовательности символов
    X_train.append(vocab(list(text[i: i + maxlen])))
    # сохранение первого символа после последовательности
    Y_train.append(vocab(list(text[i + maxlen])))

print("Всего последовательностей:", len(X_train))
# преобразование полученных чисел в тензоры
X_train = torch.tensor(X_train, dtype=torch.float32)
Y_train = torch.tensor(Y_train)
# дополнительное измерение для lstm слоя
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
Y_train = Y_train.reshape(Y_train.shape[0])
print("Размеры Тензоров:", X_train.shape, Y_train.shape)
vector_X = TensorDataset(X_train, Y_train)
train_loader = DataLoader(vector_X, batch_size=1024, shuffle=False)
for X, Y in train_loader:
    print("Размеры батчей:", X.shape, Y.shape)
    break

['<unk>', '\n', ' ', '!', '"', "'", '(', ')', ',', '-', '.', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '=', '?', '[', ']', '_', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'ä', 'æ', 'é', 'ë']
Всего последовательностей: 200285
Размеры Тензоров: torch.Size([200285, 40, 1]) torch.Size([200285])
Размеры батчей: torch.Size([1024, 40, 1]) torch.Size([1024])


Итого у нас 200 тысяч данных для обучения с 40 признаками. Таргет - один из 58 символов. Vocab кодирует данные по принципу LabelEncoder, то ессть каждому символу присваивается соответствующее число.

Второй вариант обучения - предсказывать следующее слово:

In [11]:
# очистка слов от лишних символов (всё, что не латинская буква или пробел)
words = re.sub(r'[^a-z\s]', ' ', text)
words = re.sub(r'[\s]+', ' ', words).split()
# проверка, что слово существует (написано без опечатки)
words = list(filter(lambda word: len(wordnet.synsets(word)) > 0, words))
vocab_words = build_vocab_from_iterator([sorted(Counter(words))], min_freq=1,
                                  specials=["<unk>"])
vocab_words.set_default_index(vocab_words["<unk>"])
print("Слов в словаре:", len(vocab_words))

maxlen = 40
step = 3
X_train_words, Y_train_words = [], []
# получение последовательности в 40 слов через каждые 3 слова
for i in range(0, len(words) - maxlen, step):
    # сохранение последовательности слов
    X_train_words.append(vocab_words(words[i: i + maxlen]))
    # сохранение первого слова после последовательности
    Y_train_words.append(vocab_words[words[i + maxlen]])


print("Всего последовательностей:", len(X_train_words))
# преобразование полученных чисел в тензоры
X_train_words = torch.tensor(X_train_words, dtype=torch.float32)
Y_train_words = torch.tensor(Y_train_words)
# дополнительное измерение для lstm слоя
X_train_words = X_train_words.reshape(X_train_words.shape[0], X_train_words.shape[1], 1)
Y_train_words = Y_train_words.reshape(Y_train_words.shape[0])
print("Размеры Тензоров:", X_train_words.shape, Y_train_words.shape)
vector_X = TensorDataset(X_train_words, Y_train_words)
train_loader_words = DataLoader(vector_X, batch_size=1024, shuffle=False)
for X, Y in train_loader_words:
    print("Размеры батчей:", X.shape, Y.shape)
    break

Слов в словаре: 9138
Всего последовательностей: 23450
Размеры Тензоров: torch.Size([23450, 40, 1]) torch.Size([23450])
Размеры батчей: torch.Size([1024, 40, 1]) torch.Size([1024])


Данных для обучения почти в 10 раз меньше, а количество таргетов гораздо больше - 9 тысяч уникальных слов. Модель нужно подготовить сложнее, но скорость обучения будет больше.

## модель LSTM для генерации текста

In [5]:
class TextGeneratorLetters(torch.nn.Module):
    """Модель для генерации текста посимвольно"""
    def __init__(self):
        super().__init__()
        self.lstm = torch.nn.LSTM(input_size=1, hidden_size=256, num_layers=2,
                                  batch_first=True)
        self.dropout = torch.nn.Dropout(0.25)
        self.linear = torch.nn.Linear(256, len(vocab))

    def forward(self, X_batch):
        hidden = torch.randn(2, len(X_batch), 256).to(device)
        carry = torch.randn(2, len(X_batch), 256).to(device)
        output, _ = self.lstm(X_batch, (hidden, carry))
        output = self.dropout(output)
        output = self.linear(output[:, -1])
        return output

In [12]:
class TextGeneratorWords(torch.nn.Module):
    """Модель для генерации текста пословно"""
    def __init__(self):
        super().__init__()
        self.lstm = torch.nn.LSTM(input_size=1, hidden_size=1024, num_layers=2,
                                  batch_first=True)
        self.dropout = torch.nn.Dropout(0.25)
        self.linear = torch.nn.Linear(1024, 4096)
        self.dropout1 = torch.nn.Dropout(0.2)
        self.linear1 = torch.nn.Linear(4096, len(vocab_words))

    def forward(self, X_batch):
        hidden = torch.randn(2, len(X_batch), 1024).to(device)
        carry = torch.randn(2, len(X_batch), 1024).to(device)
        output, _ = self.lstm(X_batch, (hidden, carry))
        output = self.dropout(output)
        output = self.linear(output[:, -1])
        output = self.dropout1(output)
        output = self.linear1(output)
        return output

## Генерация текста

In [8]:
def generate(model, X_train, vocab, sep=""):
  # seed необходим для детерменированности результата
  random.seed(10)
  # выбираем случайную последовательность символов из датасета
  idx = random.randint(0, len(X_train))
  # генерируем новый символ после группы символов
  pattern = X_train[idx].numpy().astype(int).flatten().tolist()
  print("\033[92m Стартовый текст: {}".format(sep.join(vocab.lookup_tokens(pattern))))
  text = []
  for i in range(100):
    with torch.no_grad(): # не обновляя веса модели предсказываем значение
      X_batch = torch.tensor(pattern,
                             dtype=torch.float32).reshape(1, len(pattern), 1)
      preds = model(X_batch.to(device))
      # выбираем символ с наибольшей вероятностью
      symbol = preds.argmax(dim=-1).cpu().numpy()[0]
      text.append(symbol)
      pattern.append(symbol)
      pattern = pattern[1:]
  print("\033[91m Генерация: {}".format(sep.join(vocab.lookup_tokens(text))))

### Функция обучения

In [9]:
def train(model, train_loader, epochs, lr, X_train, vocab, sep=''):
  criterion = torch.nn.CrossEntropyLoss().to(device)
  optimizer = torch.optim.Adam(model.parameters(), lr=lr)
  for i in range(1, epochs+1):
    losses = []
    for X, Y in tqdm(train_loader):
      X, Y = X.to(device), Y.to(device)
      preds = model(X) # прогон признаков через слои модели
      loss = criterion(preds, Y) # вычисление функции ошибки
      losses.append(loss.item()) # сохранение вычисленной ошибки
      optimizer.zero_grad() # очистка вычисленных градиентов
      loss.backward() # вычисление градиента функции потерь по всем параметрам модели
      optimizer.step() # оптимизация весов модели
    print("Эпоха: {}".format(i), end=' | ')
    print("Ошибка : {:.3f}".format(torch.tensor(losses).mean()))
    # генерация по отному и тому же изначальному шаблону после каждой эпохи
    generate(model, X_train, vocab, sep)

### Обучение моделей:

In [10]:
model = TextGeneratorLetters().to(device)
print(model)
train(model, train_loader, 50, 1e-3, X_train, vocab)

TextGeneratorLetters(
  (lstm): LSTM(1, 256, num_layers=2, batch_first=True)
  (dropout): Dropout(p=0.25, inplace=False)
  (linear): Linear(in_features=256, out_features=58, bias=True)
)


100%|██████████| 196/196 [00:15<00:00, 12.58it/s]


Эпоха: 1 | Ошибка : 2.923
[92m Стартовый текст: nd true. here
is predicated of the effec
[91m Генерация:  the the the the the the the the the the the the the the the the the the the the the the the the the


100%|██████████| 196/196 [00:15<00:00, 12.84it/s]


Эпоха: 2 | Ошибка : 2.726
[92m Стартовый текст: nd true. here
is predicated of the effec
[91m Генерация:  the the the the the the the the the the the the the the the the the the the the the the the the the


100%|██████████| 196/196 [00:14<00:00, 13.17it/s]


Эпоха: 3 | Ошибка : 2.635
[92m Стартовый текст: nd true. here
is predicated of the effec
[91m Генерация:  the seenenn of the soene the the soen the seen the the the see the the see the tee the soee the the


100%|██████████| 196/196 [00:14<00:00, 13.09it/s]


Эпоха: 4 | Ошибка : 2.567
[92m Стартовый текст: nd true. here
is predicated of the effec
[91m Генерация: n of the soene the soier the seene the sene to the sere the seenene of the cerer the sene the soene 


100%|██████████| 196/196 [00:15<00:00, 12.60it/s]


Эпоха: 5 | Ошибка : 2.506
[92m Стартовый текст: nd true. here
is predicated of the effec
[91m Генерация: tion the the the seen the seenne of the seeen the sennen to the saneet of the seriens of the soere t


100%|██████████| 196/196 [00:15<00:00, 12.78it/s]


Эпоха: 6 | Ошибка : 2.446
[92m Стартовый текст: nd true. here
is predicated of the effec
[91m Генерация: tion of the seeient the seeres the sene and the sher the she of the shene the sere the sele of the s


100%|██████████| 196/196 [00:15<00:00, 12.50it/s]


Эпоха: 7 | Ошибка : 2.393
[92m Стартовый текст: nd true. here
is predicated of the effec
[91m Генерация: tion of the soerention of the sertinn of the sertie the seser of the sene the seiers the sele and th


100%|██████████| 196/196 [00:15<00:00, 12.26it/s]


Эпоха: 8 | Ошибка : 2.346
[92m Стартовый текст: nd true. here
is predicated of the effec
[91m Генерация: tion of the selierion of the srre the hand the sorision of the preer and the serse of the soini the 


100%|██████████| 196/196 [00:16<00:00, 12.10it/s]


Эпоха: 9 | Ошибка : 2.301
[92m Стартовый текст: nd true. here
is predicated of the effec
[91m Генерация: tion of the soilition of the sele the sele the poeer the seliee the sesions of the sele the sele of 


100%|██████████| 196/196 [00:16<00:00, 12.24it/s]


Эпоха: 10 | Ошибка : 2.265
[92m Стартовый текст: nd true. here
is predicated of the effec
[91m Генерация: tion of the seliots of the cerier the sele to the sele the snle of the serpence to the seser of the 


100%|██████████| 196/196 [00:16<00:00, 12.22it/s]


Эпоха: 11 | Ошибка : 2.232
[92m Стартовый текст: nd true. here
is predicated of the effec
[91m Генерация: ted the streer the self the cerinition of the soiling of the selission of the manesion of the shgit 


100%|██████████| 196/196 [00:16<00:00, 12.19it/s]


Эпоха: 12 | Ошибка : 2.200
[92m Стартовый текст: nd true. here
is predicated of the effec
[91m Генерация: tion of the eoomision of the sessinn of the sele the segentions of the mane of the sele the selst of


100%|██████████| 196/196 [00:16<00:00, 12.18it/s]


Эпоха: 13 | Ошибка : 2.172
[92m Стартовый текст: nd true. here
is predicated of the effec
[91m Генерация: tion of the prrsence of the sester of the seliined of the perserion the cerining of the peientinn of


100%|██████████| 196/196 [00:16<00:00, 12.20it/s]


Эпоха: 14 | Ошибка : 2.148
[92m Стартовый текст: nd true. here
is predicated of the effec
[91m Генерация: tion of the cerisions of the ouerent of the serte of the senslns of the sessine the sessine of the s


100%|██████████| 196/196 [00:16<00:00, 12.19it/s]


Эпоха: 15 | Ошибка : 2.126
[92m Стартовый текст: nd true. here
is predicated of the effec
[91m Генерация: tion of the surence of the stier the self of the seli the stoeinity of the consrerent of the sestine


100%|██████████| 196/196 [00:16<00:00, 12.16it/s]


Эпоха: 16 | Ошибка : 2.101
[92m Стартовый текст: nd true. here
is predicated of the effec
[91m Генерация: tion of the selition of the stiling the soace of the celintion of the orreer the sessrn of the seadi


100%|██████████| 196/196 [00:16<00:00, 12.17it/s]


Эпоха: 17 | Ошибка : 2.081
[92m Стартовый текст: nd true. here
is predicated of the effec
[91m Генерация: tion of the sentle of the sestinn of the serte of the sente of the oresing the should of the sorlit 


100%|██████████| 196/196 [00:16<00:00, 12.16it/s]


Эпоха: 18 | Ошибка : 2.058
[92m Стартовый текст: nd true. here
is predicated of the effec
[91m Генерация: t of the sestine of the self the and of the insertion of the ooler of the cerilgnt of the self the o


100%|██████████| 196/196 [00:16<00:00, 12.22it/s]


Эпоха: 19 | Ошибка : 2.039
[92m Стартовый текст: nd true. here
is predicated of the effec
[91m Генерация: tinn of the cesirinns of the seligions of the selsld of the stcl the soilithon of the cestrrion of t


100%|██████████| 196/196 [00:16<00:00, 12.22it/s]


Эпоха: 20 | Ошибка : 2.024
[92m Стартовый текст: nd true. here
is predicated of the effec
[91m Генерация: tion of the senule of the sestrnss of the self of the cestinn of the self there in the stce of the s


100%|██████████| 196/196 [00:16<00:00, 12.18it/s]


Эпоха: 21 | Ошибка : 2.006
[92m Стартовый текст: nd true. here
is predicated of the effec
[91m Генерация: tion of the surtise of the same the ooe of the self of the cesiling the serte of the senple of the s


100%|██████████| 196/196 [00:16<00:00, 12.13it/s]


Эпоха: 22 | Ошибка : 1.990
[92m Стартовый текст: nd true. here
is predicated of the effec
[91m Генерация: tion of the present and the conseqpinn of the sensibting of the hndlce of the eerierice of the srili


100%|██████████| 196/196 [00:16<00:00, 12.22it/s]


Эпоха: 23 | Ошибка : 1.978
[92m Стартовый текст: nd true. here
is predicated of the effec
[91m Генерация: tion of the senferent of the strers of the senigrent of the still of the conpence of the senferent o


100%|██████████| 196/196 [00:16<00:00, 12.12it/s]


Эпоха: 24 | Ошибка : 1.962
[92m Стартовый текст: nd true. here
is predicated of the effec
[91m Генерация: tion of the secsent of the suill of the orering of the incerention of the prier of the still of the 


100%|██████████| 196/196 [00:16<00:00, 12.18it/s]


Эпоха: 25 | Ошибка : 1.948
[92m Стартовый текст: nd true. here
is predicated of the effec
[91m Генерация: tent of the streng of the senue of the still of the cristinn of the self of the still of the self th


100%|██████████| 196/196 [00:16<00:00, 12.20it/s]


Эпоха: 26 | Ошибка : 1.933
[92m Стартовый текст: nd true. here
is predicated of the effec
[91m Генерация: tion of the sensle of the strthsion of the self the wer of the cassise of the prening of the seligio


100%|██████████| 196/196 [00:16<00:00, 12.19it/s]


Эпоха: 27 | Ошибка : 1.918
[92m Стартовый текст: nd true. here
is predicated of the effec
[91m Генерация: t of the strengts of the stch an actions of the saie that the eisst of the contestion of the eisrinn


100%|██████████| 196/196 [00:16<00:00, 12.14it/s]


Эпоха: 28 | Ошибка : 1.906
[92m Стартовый текст: nd true. here
is predicated of the effec
[91m Генерация: t of the srrthsion of the sense of the cesiliciou of the conseqtinn of the oreer the shght of the ca


100%|██████████| 196/196 [00:16<00:00, 12.18it/s]


Эпоха: 29 | Ошибка : 1.894
[92m Стартовый текст: nd true. here
is predicated of the effec
[91m Генерация: tinn of the orestion of the senfer of the strersion of the streng of the still of the senpling of th


100%|██████████| 196/196 [00:16<00:00, 12.19it/s]


Эпоха: 30 | Ошибка : 1.884
[92m Стартовый текст: nd true. here
is predicated of the effec
[91m Генерация: t of the condition of the anxtre the strersinn of the senf of the prissicuiv of the sense of the pre


100%|██████████| 196/196 [00:16<00:00, 12.23it/s]


Эпоха: 31 | Ошибка : 1.872
[92m Стартовый текст: nd true. here
is predicated of the effec
[91m Генерация: t of the self of the sense of the renigions of the eeeling of the selftion of the senigion of the ce


100%|██████████| 196/196 [00:16<00:00, 12.21it/s]


Эпоха: 32 | Ошибка : 1.862
[92m Стартовый текст: nd true. here
is predicated of the effec
[91m Генерация: t of the strtiing of the seligion of the eesiln of the sense of the exery of the properuion of the o


100%|██████████| 196/196 [00:16<00:00, 12.12it/s]


Эпоха: 33 | Ошибка : 1.851
[92m Стартовый текст: nd true. here
is predicated of the effec
[91m Генерация: ture of the stch and the strersion of the streng of the concinion of the streng of the strength of t


100%|██████████| 196/196 [00:16<00:00, 12.23it/s]


Эпоха: 34 | Ошибка : 1.840
[92m Стартовый текст: nd true. here
is predicated of the effec
[91m Генерация: tion of the consiee and the soote of the most should be actions and the self of the selst of the str


100%|██████████| 196/196 [00:16<00:00, 12.18it/s]


Эпоха: 35 | Ошибка : 1.829
[92m Стартовый текст: nd true. here
is predicated of the effec
[91m Генерация: tion of the srill of the sense of the streng of the world of the seligious and in the more that and 


100%|██████████| 196/196 [00:16<00:00, 12.13it/s]


Эпоха: 36 | Ошибка : 1.822
[92m Стартовый текст: nd true. here
is predicated of the effec
[91m Генерация: t of the sense of the still of the selation of the oreer of the certain of the self of the soinit of


100%|██████████| 196/196 [00:16<00:00, 12.14it/s]


Эпоха: 37 | Ошибка : 1.813
[92m Стартовый текст: nd true. here
is predicated of the effec
[91m Генерация: t of the sense of the ereat dan the individual of the astertion of the iitsonne of the self of the c


100%|██████████| 196/196 [00:16<00:00, 12.24it/s]


Эпоха: 38 | Ошибка : 1.806
[92m Стартовый текст: nd true. here
is predicated of the effec
[91m Генерация: tion of the stef as a self soirit and the selsted of the strthmg of the conscience of the proserce o


100%|██████████| 196/196 [00:16<00:00, 12.21it/s]


Эпоха: 39 | Ошибка : 1.793
[92m Стартовый текст: nd true. here
is predicated of the effec
[91m Генерация: t of the same the strpng of the self of the still of the sensenent of the ooder of the orher of the 


100%|██████████| 196/196 [00:16<00:00, 12.14it/s]


Эпоха: 40 | Ошибка : 1.787
[92m Стартовый текст: nd true. here
is predicated of the effec
[91m Генерация: t of the stff of the seliting of the most and the self of the one the soirit of the eecrinns of the 


100%|██████████| 196/196 [00:16<00:00, 12.12it/s]


Эпоха: 41 | Ошибка : 1.782
[92m Стартовый текст: nd true. here
is predicated of the effec
[91m Генерация: t of the consrant of the sense of the present and the self-clurdlence of the orhgin of the strtems o


100%|██████████| 196/196 [00:17<00:00, 11.13it/s]


Эпоха: 42 | Ошибка : 1.775
[92m Стартовый текст: nd true. here
is predicated of the effec
[91m Генерация: tion of the sense of the stef and the same the sensenent of the same the seligious and the eistr of 


100%|██████████| 196/196 [00:16<00:00, 11.54it/s]


Эпоха: 43 | Ошибка : 1.768
[92m Стартовый текст: nd true. here
is predicated of the effec
[91m Генерация: tion of the self and the ond man is an artists and the selfgring of the still of the strpoger of the


100%|██████████| 196/196 [00:16<00:00, 11.75it/s]


Эпоха: 44 | Ошибка : 1.758
[92m Стартовый текст: nd true. here
is predicated of the effec
[91m Генерация: tive consrect of the same thing of the errinate of the still of the seliting of the plest of the sen


100%|██████████| 196/196 [00:16<00:00, 12.17it/s]


Эпоха: 45 | Ошибка : 1.752
[92m Стартовый текст: nd true. here
is predicated of the effec
[91m Генерация: t of the proeers of the science of the spirit of the same perhaps the stffers of the self pf the sen


100%|██████████| 196/196 [00:17<00:00, 11.28it/s]


Эпоха: 46 | Ошибка : 1.745
[92m Стартовый текст: nd true. here
is predicated of the effec
[91m Генерация: t of the exery of the surengng of the surtale of the still of the sense of the propouite of the eod 


100%|██████████| 196/196 [00:18<00:00, 10.81it/s]


Эпоха: 47 | Ошибка : 1.739
[92m Стартовый текст: nd true. here
is predicated of the effec
[91m Генерация: t of the selse of the suef and the orher is a rilel of the most desire the one of the sensinent of t


100%|██████████| 196/196 [00:16<00:00, 12.13it/s]


Эпоха: 48 | Ошибка : 1.739
[92m Стартовый текст: nd true. here
is predicated of the effec
[91m Генерация: t of the seni of the sense of the strength of the strength of the streng of the prieit of the world 


100%|██████████| 196/196 [00:16<00:00, 11.96it/s]


Эпоха: 49 | Ошибка : 1.733
[92m Стартовый текст: nd true. here
is predicated of the effec
[91m Генерация: t of the streng and the self of the soirit of the strength of the self of the streng and the workd o


100%|██████████| 196/196 [00:16<00:00, 12.20it/s]


Эпоха: 50 | Ошибка : 1.725
[92m Стартовый текст: nd true. here
is predicated of the effec
[91m Генерация: tion of the streritation of the will of the streng of the sense of the senfer of the self of the sam


Первая модель, обученная на символах, обучилась до довольно низкого значения ошибки, она способна генерировать целые слова из языка, на котором обучалась, но имеет свойство зацикливаться и генерировать одинаковые последовательности символов. Это может быть связано с тем, что модель запомнила, какой символ идёт после одной и той же группы символов и всегда выдаёт этот символ, так как у него наибольшая вероятность среди всех остальных.

In [13]:
model = TextGeneratorWords().to(device)
print(model)
train(model, train_loader_words, 50, 1e-3, X_train_words, vocab_words, sep=' ')

TextGeneratorWords(
  (lstm): LSTM(1, 1024, num_layers=2, batch_first=True)
  (dropout): Dropout(p=0.25, inplace=False)
  (linear): Linear(in_features=1024, out_features=4096, bias=True)
  (dropout1): Dropout(p=0.2, inplace=False)
  (linear1): Linear(in_features=4096, out_features=9138, bias=True)
)


100%|██████████| 23/23 [00:24<00:00,  1.08s/it]


Эпоха: 1 | Ошибка : 7.926
[92m Стартовый текст: life blooms flowers only through kind feeling cheerfulness friendliness kindness a heart are unfailing sources impulse have made far more civilization other more noised manifestations it are styled sympathy benevolence sacrifice but it is customary depreciate little tokens kindly feeling
[91m Генерация: a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a he is a a a a a a a a a a a a a a a a a a a a a a a a a he a a a a a a a a a a a a a a a a a a he a a a a a a a a a a a a a a a a a a a a


100%|██████████| 23/23 [00:24<00:00,  1.05s/it]


Эпоха: 2 | Ошибка : 7.356
[92m Стартовый текст: life blooms flowers only through kind feeling cheerfulness friendliness kindness a heart are unfailing sources impulse have made far more civilization other more noised manifestations it are styled sympathy benevolence sacrifice but it is customary depreciate little tokens kindly feeling
[91m Генерация: in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in


100%|██████████| 23/23 [00:23<00:00,  1.04s/it]


Эпоха: 3 | Ошибка : 7.201
[92m Стартовый текст: life blooms flowers only through kind feeling cheerfulness friendliness kindness a heart are unfailing sources impulse have made far more civilization other more noised manifestations it are styled sympathy benevolence sacrifice but it is customary depreciate little tokens kindly feeling
[91m Генерация: is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is


100%|██████████| 23/23 [00:24<00:00,  1.05s/it]


Эпоха: 4 | Ошибка : 7.179
[92m Стартовый текст: life blooms flowers only through kind feeling cheerfulness friendliness kindness a heart are unfailing sources impulse have made far more civilization other more noised manifestations it are styled sympathy benevolence sacrifice but it is customary depreciate little tokens kindly feeling
[91m Генерация: in in in in in in in in in in in in in in in in in in in in in in in a man in in in in in in in in in in in in in in in in in a man in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in


100%|██████████| 23/23 [00:25<00:00,  1.10s/it]


Эпоха: 5 | Ошибка : 7.084
[92m Стартовый текст: life blooms flowers only through kind feeling cheerfulness friendliness kindness a heart are unfailing sources impulse have made far more civilization other more noised manifestations it are styled sympathy benevolence sacrifice but it is customary depreciate little tokens kindly feeling
[91m Генерация: in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in is in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in


100%|██████████| 23/23 [00:24<00:00,  1.06s/it]


Эпоха: 6 | Ошибка : 7.075
[92m Стартовый текст: life blooms flowers only through kind feeling cheerfulness friendliness kindness a heart are unfailing sources impulse have made far more civilization other more noised manifestations it are styled sympathy benevolence sacrifice but it is customary depreciate little tokens kindly feeling
[91m Генерация: in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in


100%|██████████| 23/23 [00:24<00:00,  1.05s/it]


Эпоха: 7 | Ошибка : 7.067
[92m Стартовый текст: life blooms flowers only through kind feeling cheerfulness friendliness kindness a heart are unfailing sources impulse have made far more civilization other more noised manifestations it are styled sympathy benevolence sacrifice but it is customary depreciate little tokens kindly feeling
[91m Генерация: in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in


100%|██████████| 23/23 [00:23<00:00,  1.04s/it]


Эпоха: 8 | Ошибка : 7.062
[92m Стартовый текст: life blooms flowers only through kind feeling cheerfulness friendliness kindness a heart are unfailing sources impulse have made far more civilization other more noised manifestations it are styled sympathy benevolence sacrifice but it is customary depreciate little tokens kindly feeling
[91m Генерация: in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in


100%|██████████| 23/23 [00:24<00:00,  1.05s/it]


Эпоха: 9 | Ошибка : 7.055
[92m Стартовый текст: life blooms flowers only through kind feeling cheerfulness friendliness kindness a heart are unfailing sources impulse have made far more civilization other more noised manifestations it are styled sympathy benevolence sacrifice but it is customary depreciate little tokens kindly feeling
[91m Генерация: in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in


100%|██████████| 23/23 [00:24<00:00,  1.04s/it]


Эпоха: 10 | Ошибка : 7.047
[92m Стартовый текст: life blooms flowers only through kind feeling cheerfulness friendliness kindness a heart are unfailing sources impulse have made far more civilization other more noised manifestations it are styled sympathy benevolence sacrifice but it is customary depreciate little tokens kindly feeling
[91m Генерация: in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in


100%|██████████| 23/23 [00:24<00:00,  1.04s/it]


Эпоха: 11 | Ошибка : 7.030
[92m Стартовый текст: life blooms flowers only through kind feeling cheerfulness friendliness kindness a heart are unfailing sources impulse have made far more civilization other more noised manifestations it are styled sympathy benevolence sacrifice but it is customary depreciate little tokens kindly feeling
[91m Генерация: in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in


100%|██████████| 23/23 [00:24<00:00,  1.06s/it]


Эпоха: 12 | Ошибка : 7.012
[92m Стартовый текст: life blooms flowers only through kind feeling cheerfulness friendliness kindness a heart are unfailing sources impulse have made far more civilization other more noised manifestations it are styled sympathy benevolence sacrifice but it is customary depreciate little tokens kindly feeling
[91m Генерация: in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in


100%|██████████| 23/23 [00:24<00:00,  1.04s/it]


Эпоха: 13 | Ошибка : 6.992
[92m Стартовый текст: life blooms flowers only through kind feeling cheerfulness friendliness kindness a heart are unfailing sources impulse have made far more civilization other more noised manifestations it are styled sympathy benevolence sacrifice but it is customary depreciate little tokens kindly feeling
[91m Генерация: in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in


100%|██████████| 23/23 [00:23<00:00,  1.04s/it]


Эпоха: 14 | Ошибка : 6.978
[92m Стартовый текст: life blooms flowers only through kind feeling cheerfulness friendliness kindness a heart are unfailing sources impulse have made far more civilization other more noised manifestations it are styled sympathy benevolence sacrifice but it is customary depreciate little tokens kindly feeling
[91m Генерация: in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in is in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in


100%|██████████| 23/23 [00:24<00:00,  1.05s/it]


Эпоха: 15 | Ошибка : 6.966
[92m Стартовый текст: life blooms flowers only through kind feeling cheerfulness friendliness kindness a heart are unfailing sources impulse have made far more civilization other more noised manifestations it are styled sympathy benevolence sacrifice but it is customary depreciate little tokens kindly feeling
[91m Генерация: in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in


100%|██████████| 23/23 [00:23<00:00,  1.04s/it]


Эпоха: 16 | Ошибка : 6.945
[92m Стартовый текст: life blooms flowers only through kind feeling cheerfulness friendliness kindness a heart are unfailing sources impulse have made far more civilization other more noised manifestations it are styled sympathy benevolence sacrifice but it is customary depreciate little tokens kindly feeling
[91m Генерация: in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in


100%|██████████| 23/23 [00:24<00:00,  1.05s/it]


Эпоха: 17 | Ошибка : 6.918
[92m Стартовый текст: life blooms flowers only through kind feeling cheerfulness friendliness kindness a heart are unfailing sources impulse have made far more civilization other more noised manifestations it are styled sympathy benevolence sacrifice but it is customary depreciate little tokens kindly feeling
[91m Генерация: in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in


100%|██████████| 23/23 [00:24<00:00,  1.04s/it]


Эпоха: 18 | Ошибка : 6.942
[92m Стартовый текст: life blooms flowers only through kind feeling cheerfulness friendliness kindness a heart are unfailing sources impulse have made far more civilization other more noised manifestations it are styled sympathy benevolence sacrifice but it is customary depreciate little tokens kindly feeling
[91m Генерация: in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in a man in in in in in in in in a man in in a certain in a


100%|██████████| 23/23 [00:24<00:00,  1.04s/it]


Эпоха: 19 | Ошибка : 6.928
[92m Стартовый текст: life blooms flowers only through kind feeling cheerfulness friendliness kindness a heart are unfailing sources impulse have made far more civilization other more noised manifestations it are styled sympathy benevolence sacrifice but it is customary depreciate little tokens kindly feeling
[91m Генерация: in a new in is is is a man or in is in in is in in in in a man or is in in in is in a man in in in is in a man or is in in is in is in is a man in in in in a man or is is in is in in in a man is is in in in in is a man a man in is in a man or is in a man in in is in is a man or in is a man is is in


100%|██████████| 23/23 [00:24<00:00,  1.05s/it]


Эпоха: 20 | Ошибка : 6.889
[92m Стартовый текст: life blooms flowers only through kind feeling cheerfulness friendliness kindness a heart are unfailing sources impulse have made far more civilization other more noised manifestations it are styled sympathy benevolence sacrifice but it is customary depreciate little tokens kindly feeling
[91m Генерация: is is is in is is a man or is is is is is a man is is is is is a man is is is is is is is a man is is is is is is is in is is is in is in is is is in is is is is in is is is a man or is a man is in is is is is a man or is is is is is is is in a man is is in is is is is a man or is is is is is is is


100%|██████████| 23/23 [00:23<00:00,  1.04s/it]


Эпоха: 21 | Ошибка : 6.888
[92m Стартовый текст: life blooms flowers only through kind feeling cheerfulness friendliness kindness a heart are unfailing sources impulse have made far more civilization other more noised manifestations it are styled sympathy benevolence sacrifice but it is customary depreciate little tokens kindly feeling
[91m Генерация: is is is is is is is is is is is is is in in in is is is is is is in is is is is is in is is is is is is is is is is in in in is is is is in in is is in is is is in is in is is in in is is is is is is is in in is is is is is in is is in is is in is in is is in is is in in is is is is in is is is is


100%|██████████| 23/23 [00:24<00:00,  1.04s/it]


Эпоха: 22 | Ошибка : 6.937
[92m Стартовый текст: life blooms flowers only through kind feeling cheerfulness friendliness kindness a heart are unfailing sources impulse have made far more civilization other more noised manifestations it are styled sympathy benevolence sacrifice but it is customary depreciate little tokens kindly feeling
[91m Генерация: is is is is is is is is is is is is is is is is is is is in is is is is is is is is is is in is is is is is is is is is is is is is is is is is in is is is is is is is is is is is is is is is is is in is is is is is is is is is is is is in is is is is is is is in is is is is is is is is is is is is


100%|██████████| 23/23 [00:24<00:00,  1.05s/it]


Эпоха: 23 | Ошибка : 6.923
[92m Стартовый текст: life blooms flowers only through kind feeling cheerfulness friendliness kindness a heart are unfailing sources impulse have made far more civilization other more noised manifestations it are styled sympathy benevolence sacrifice but it is customary depreciate little tokens kindly feeling
[91m Генерация: is is is in is in is is is in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in


100%|██████████| 23/23 [00:24<00:00,  1.05s/it]


Эпоха: 24 | Ошибка : 6.882
[92m Стартовый текст: life blooms flowers only through kind feeling cheerfulness friendliness kindness a heart are unfailing sources impulse have made far more civilization other more noised manifestations it are styled sympathy benevolence sacrifice but it is customary depreciate little tokens kindly feeling
[91m Генерация: is is is is is in is is in in is a false is a false in a false or is a false or in is is is is is is is is is is is is is is is is is is is is is is in is is is is is is is is is is is is is is in is is in is is is is is is a man a certain is a false a sort or is is is is in is is is is is is is is is is is in is


100%|██████████| 23/23 [00:24<00:00,  1.05s/it]


Эпоха: 25 | Ошибка : 6.862
[92m Стартовый текст: life blooms flowers only through kind feeling cheerfulness friendliness kindness a heart are unfailing sources impulse have made far more civilization other more noised manifestations it are styled sympathy benevolence sacrifice but it is customary depreciate little tokens kindly feeling
[91m Генерация: in is is is is is in is in is is in is in is is is in is in in is is in is is in in is is in is in in in in in is in is is in in in in in in is in in in is is is is in is is is in in in is is is in in in is is is in is is is is in is is in is in in in is in in in in in is is in in is in in in in is


100%|██████████| 23/23 [00:23<00:00,  1.04s/it]


Эпоха: 26 | Ошибка : 6.840
[92m Стартовый текст: life blooms flowers only through kind feeling cheerfulness friendliness kindness a heart are unfailing sources impulse have made far more civilization other more noised manifestations it are styled sympathy benevolence sacrifice but it is customary depreciate little tokens kindly feeling
[91m Генерация: in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in


100%|██████████| 23/23 [00:24<00:00,  1.05s/it]


Эпоха: 27 | Ошибка : 6.803
[92m Стартовый текст: life blooms flowers only through kind feeling cheerfulness friendliness kindness a heart are unfailing sources impulse have made far more civilization other more noised manifestations it are styled sympathy benevolence sacrifice but it is customary depreciate little tokens kindly feeling
[91m Генерация: is is is is is is is is is is in in in is in in in is in is is is in in in is in is in in in in in in in in is in in is in is in in is is in in in in is in in in in is in in in in in in is is in is in in in is in in is in in is in in is is in in in in in in in in in is is is in in in in is in in in


100%|██████████| 23/23 [00:23<00:00,  1.04s/it]


Эпоха: 28 | Ошибка : 6.820
[92m Стартовый текст: life blooms flowers only through kind feeling cheerfulness friendliness kindness a heart are unfailing sources impulse have made far more civilization other more noised manifestations it are styled sympathy benevolence sacrifice but it is customary depreciate little tokens kindly feeling
[91m Генерация: is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is as a good or in is is is is is is is is is is is is is is


100%|██████████| 23/23 [00:23<00:00,  1.04s/it]


Эпоха: 29 | Ошибка : 6.813
[92m Стартовый текст: life blooms flowers only through kind feeling cheerfulness friendliness kindness a heart are unfailing sources impulse have made far more civilization other more noised manifestations it are styled sympathy benevolence sacrifice but it is customary depreciate little tokens kindly feeling
[91m Генерация: in in is in is is is in is in in is in is is is is in is is in in in in in is is is is in is is is is is in in in in in in is in in in is in is in in in in in is in is in is in in in in in is is in is in is is is is is is in in in is is in in is is in in in in is in in is in in is in is in hence is is


100%|██████████| 23/23 [00:24<00:00,  1.04s/it]


Эпоха: 30 | Ошибка : 6.896
[92m Стартовый текст: life blooms flowers only through kind feeling cheerfulness friendliness kindness a heart are unfailing sources impulse have made far more civilization other more noised manifestations it are styled sympathy benevolence sacrifice but it is customary depreciate little tokens kindly feeling
[91m Генерация: in is is a certain defective is a man or is is is is is is is is is is is is is is is is is is is is is is is is is in is is in is is is in in in is is is is is in is is is a man or it a man consequence is is is is is is is a man or consequence a man city in is it in is a man individuality is a man or or is is is is in is be a man individuality is is


100%|██████████| 23/23 [00:24<00:00,  1.04s/it]


Эпоха: 31 | Ошибка : 6.895
[92m Стартовый текст: life blooms flowers only through kind feeling cheerfulness friendliness kindness a heart are unfailing sources impulse have made far more civilization other more noised manifestations it are styled sympathy benevolence sacrifice but it is customary depreciate little tokens kindly feeling
[91m Генерация: is in is is is is is is is is is in hence is is in in hence in in a man alleges a disadvantage god is is is is is as a man or imagination he is is is is is is is is is is is is is is as a man intentionally is is is is is is is is is in is is is is is he is is is is is is is is in is is is is is is is is is is is is in is hence in is is in in


100%|██████████| 23/23 [00:24<00:00,  1.04s/it]


Эпоха: 32 | Ошибка : 6.909
[92m Стартовый текст: life blooms flowers only through kind feeling cheerfulness friendliness kindness a heart are unfailing sources impulse have made far more civilization other more noised manifestations it are styled sympathy benevolence sacrifice but it is customary depreciate little tokens kindly feeling
[91m Генерация: is is is is is is is in is in is is is is in in is in in is is is in is is is a man in is is a certain or is is a certain or is in is is is is is is is is is is is is as a sort or is is is a man in in is in is is is is is is in is is is is in is is in is is is is in is in is in is is is is is is is is is is


100%|██████████| 23/23 [00:23<00:00,  1.04s/it]


Эпоха: 33 | Ошибка : 6.913
[92m Стартовый текст: life blooms flowers only through kind feeling cheerfulness friendliness kindness a heart are unfailing sources impulse have made far more civilization other more noised manifestations it are styled sympathy benevolence sacrifice but it is customary depreciate little tokens kindly feeling
[91m Генерация: is is is is is is is is is is is is is is is is is is is is a certain or is is acts as acts occasions is a man imagination acts prompted a certain is acts hence is acts a rational classification acts pain acts prompted acts gets is acts hence acts fierceness is as a motive is acts fierceness is is is a god or a man or acts a rational destruction acts as acts occasions acts occasions is acts as acts as acts imitate acts as a motive or is a man or pleasure simply


100%|██████████| 23/23 [00:23<00:00,  1.04s/it]


Эпоха: 34 | Ошибка : 6.869
[92m Стартовый текст: life blooms flowers only through kind feeling cheerfulness friendliness kindness a heart are unfailing sources impulse have made far more civilization other more noised manifestations it are styled sympathy benevolence sacrifice but it is customary depreciate little tokens kindly feeling
[91m Генерация: is a man classification he is is is a man number a man nature a man joy involuntarily is a man hence or is a man reciprocity a man individual a man individuality experiment pleasure a man tone involuntarily a man limited a man individuality involuntarily nature a man notion a man reciprocity a man nature a man is basic a function deal a man limited a man distraction involuntarily is is is a man consequence or a man reciprocity a man joy a man or who a man limited involuntarily a man reciprocity a man reciprocity involuntarily involuntarily a


100%|██████████| 23/23 [00:23<00:00,  1.04s/it]


Эпоха: 35 | Ошибка : 6.850
[92m Стартовый текст: life blooms flowers only through kind feeling cheerfulness friendliness kindness a heart are unfailing sources impulse have made far more civilization other more noised manifestations it are styled sympathy benevolence sacrifice but it is customary depreciate little tokens kindly feeling
[91m Генерация: is as it it is is is is is is in in in in in in is is is in in is in in in in in in it is in in in in in in is in in in in in in in in in in in is in in in in is in in in in is in in in in in is in in in in in in in in in in in is in in in in is as a bad period is is in is is is is is is is is as a being


100%|██████████| 23/23 [00:24<00:00,  1.05s/it]


Эпоха: 36 | Ошибка : 6.815
[92m Стартовый текст: life blooms flowers only through kind feeling cheerfulness friendliness kindness a heart are unfailing sources impulse have made far more civilization other more noised manifestations it are styled sympathy benevolence sacrifice but it is customary depreciate little tokens kindly feeling
[91m Генерация: is as a thing or is is is is is is is is is is is is is is is is is is is in is is is is is is is is is is is in is is is is is is is is is is is is is is is is is is is is in is is in is in is is is in is is is in in hence is is is in is is is is in is is in is is is is is is is is in is is in is is in


100%|██████████| 23/23 [00:23<00:00,  1.04s/it]


Эпоха: 37 | Ошибка : 6.870
[92m Стартовый текст: life blooms flowers only through kind feeling cheerfulness friendliness kindness a heart are unfailing sources impulse have made far more civilization other more noised manifestations it are styled sympathy benevolence sacrifice but it is customary depreciate little tokens kindly feeling
[91m Генерация: is is is as a philosopher or is is is is is is is is is is is is is is is is is is is is is is is is is is as a good is is is is is is is is is is is is is is is as a certain or is is is is is is is is is is is is is as a good or is is is is is is is is is as a certain is is is is is is is is is is is is is is is as


100%|██████████| 23/23 [00:23<00:00,  1.04s/it]


Эпоха: 38 | Ошибка : 6.736
[92m Стартовый текст: life blooms flowers only through kind feeling cheerfulness friendliness kindness a heart are unfailing sources impulse have made far more civilization other more noised manifestations it are styled sympathy benevolence sacrifice but it is customary depreciate little tokens kindly feeling
[91m Генерация: is is is is is is in is is is in in in in in in is in in in in in in in in is in in in in in in in is is in in in in is in is in in in in in in in in in in in in in in is in in is in in in in in in is in is in in in in in in in in is in in in in is in in in as thus in is in in in in in is in in in in


100%|██████████| 23/23 [00:24<00:00,  1.05s/it]


Эпоха: 39 | Ошибка : 6.779
[92m Стартовый текст: life blooms flowers only through kind feeling cheerfulness friendliness kindness a heart are unfailing sources impulse have made far more civilization other more noised manifestations it are styled sympathy benevolence sacrifice but it is customary depreciate little tokens kindly feeling
[91m Генерация: is is is is is is is is is is in is is a certain period is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is in is is is is is is in is is is is is is is in is is is is is is is is is is is in is in is is is is is is a man imagination is is is is is is


100%|██████████| 23/23 [00:24<00:00,  1.04s/it]


Эпоха: 40 | Ошибка : 6.762
[92m Стартовый текст: life blooms flowers only through kind feeling cheerfulness friendliness kindness a heart are unfailing sources impulse have made far more civilization other more noised manifestations it are styled sympathy benevolence sacrifice but it is customary depreciate little tokens kindly feeling
[91m Генерация: is in is is is is in is is is is in is in is is is in is as a means is is is is is is is is is is is is is is is is is is is is is in is is is is is is is is is is is is is is is is in in in in in is in is in in is is in hence in is is in is in in in in in in is as a man bad acts a good beneficial acts as one as religion is


100%|██████████| 23/23 [00:23<00:00,  1.04s/it]


Эпоха: 41 | Ошибка : 6.857
[92m Стартовый текст: life blooms flowers only through kind feeling cheerfulness friendliness kindness a heart are unfailing sources impulse have made far more civilization other more noised manifestations it are styled sympathy benevolence sacrifice but it is customary depreciate little tokens kindly feeling
[91m Генерация: is in is is is is is in is in as a good in is is is is is is is is is is is in is is is is is is is is is is is is is is is is is in is is is is is is in is in in in in in in is is as in is as a centre it is is is is as a sort it in is is is is is is is is is is is is is is is is is is is is is is is in


100%|██████████| 23/23 [00:23<00:00,  1.04s/it]


Эпоха: 42 | Ошибка : 6.704
[92m Стартовый текст: life blooms flowers only through kind feeling cheerfulness friendliness kindness a heart are unfailing sources impulse have made far more civilization other more noised manifestations it are styled sympathy benevolence sacrifice but it is customary depreciate little tokens kindly feeling
[91m Генерация: is is is as a function is in but in is is in is is is is is is is is is is in is is in is is is is is is is is is is in is is is is is in is is in in feeling is in is in is in in is is is in in in in is is in is pleasure as a protection between in is is is is is is is is is is is is is is is is is is is is is is in is is is is


100%|██████████| 23/23 [00:23<00:00,  1.04s/it]


Эпоха: 43 | Ошибка : 6.761
[92m Стартовый текст: life blooms flowers only through kind feeling cheerfulness friendliness kindness a heart are unfailing sources impulse have made far more civilization other more noised manifestations it are styled sympathy benevolence sacrifice but it is customary depreciate little tokens kindly feeling
[91m Генерация: is is is is is is is is is is in is is in in is is in in in is in in in in is as though is in in is is in in in is in in is is is in as a sort it impregnated is is is is is is is is is in is is is is is is is in is is is is is is is is is is is is is as regards is is in in in is in is in is in in is is in in in in in


100%|██████████| 23/23 [00:23<00:00,  1.04s/it]


Эпоха: 44 | Ошибка : 6.676
[92m Стартовый текст: life blooms flowers only through kind feeling cheerfulness friendliness kindness a heart are unfailing sources impulse have made far more civilization other more noised manifestations it are styled sympathy benevolence sacrifice but it is customary depreciate little tokens kindly feeling
[91m Генерация: as well be a certain a cash or a fortified or be a motive distraction exaggerated but a subsequent is is in is is is is is a man or is in is is is a man or or a novelty or a man a man or or a novelty or a man or is a man or is a man or a man or a man or or a man or is is a man limited is is is a false or be a man inclination interpretation as a man a man or be a man or is but


100%|██████████| 23/23 [00:23<00:00,  1.04s/it]


Эпоха: 45 | Ошибка : 6.680
[92m Стартовый текст: life blooms flowers only through kind feeling cheerfulness friendliness kindness a heart are unfailing sources impulse have made far more civilization other more noised manifestations it are styled sympathy benevolence sacrifice but it is customary depreciate little tokens kindly feeling
[91m Генерация: is is is is is is is is is is is is is is is is is is is is is is is is is is is is is in in is is is is is is is is is is is is is is in in is in is is is is is is is is in is is is is is is is is is is in is is is in is is is is is in is is is is in is in is is is is is is is is is is is is is is


100%|██████████| 23/23 [00:23<00:00,  1.04s/it]


Эпоха: 46 | Ошибка : 6.652
[92m Стартовый текст: life blooms flowers only through kind feeling cheerfulness friendliness kindness a heart are unfailing sources impulse have made far more civilization other more noised manifestations it are styled sympathy benevolence sacrifice but it is customary depreciate little tokens kindly feeling
[91m Генерация: it is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is a world in in is in is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is a man feels in in is is is is is is is is is is is is is is is is is is is is is is is


100%|██████████| 23/23 [00:23<00:00,  1.04s/it]


Эпоха: 47 | Ошибка : 6.547
[92m Стартовый текст: life blooms flowers only through kind feeling cheerfulness friendliness kindness a heart are unfailing sources impulse have made far more civilization other more noised manifestations it are styled sympathy benevolence sacrifice but it is customary depreciate little tokens kindly feeling
[91m Генерация: is is is is is is is is is is is is is is is is is is is is is in is is is in in is is in is in in is is in is is is is be a sort is in is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is in is is in in is is is is is is


100%|██████████| 23/23 [00:23<00:00,  1.04s/it]


Эпоха: 48 | Ошибка : 6.438
[92m Стартовый текст: life blooms flowers only through kind feeling cheerfulness friendliness kindness a heart are unfailing sources impulse have made far more civilization other more noised manifestations it are styled sympathy benevolence sacrifice but it is customary depreciate little tokens kindly feeling
[91m Генерация: in in in is in is in in in is in in in is in in in in is in in in in in in in is in is in is in in in in in is in in in in in in is in in in in in in in in is is in in is in in in in so in in in in in is in in in in is in in so in in is it in is in in in in in in in in in in in in is in is in in in


100%|██████████| 23/23 [00:23<00:00,  1.04s/it]


Эпоха: 49 | Ошибка : 6.411
[92m Стартовый текст: life blooms flowers only through kind feeling cheerfulness friendliness kindness a heart are unfailing sources impulse have made far more civilization other more noised manifestations it are styled sympathy benevolence sacrifice but it is customary depreciate little tokens kindly feeling
[91m Генерация: is is is is is is is is is is is in is is is is is is in in in is is is is is is is in is is in is is is in is is is is is in is in is is is is is is is in is in is is is in is in in is in is is is is is in is is is in in is is in is is is is is in is is is is is is is is is in is is in in is is in


100%|██████████| 23/23 [00:23<00:00,  1.04s/it]


Эпоха: 50 | Ошибка : 6.385
[92m Стартовый текст: life blooms flowers only through kind feeling cheerfulness friendliness kindness a heart are unfailing sources impulse have made far more civilization other more noised manifestations it are styled sympathy benevolence sacrifice but it is customary depreciate little tokens kindly feeling
[91m Генерация: in is is is is is is is in is in in in in in in is in is in in in in is in in is in is in in in is is in is in in in in in is in is in in in is in in in is in is in is in in in is in in is in in in in is in in in is in is is in is in is in in in is in in is in in in is in in in in in is is is is is


Модель, обученная на словах имеет точность сильно ниже из-за меньшего размера датасета и большего размера целевой переменной. В основном выдаёт последовательности из наиболее частных слов, встречаемых в датасете.

## Генерация текста для РУССКОЯЗЫЧНОГО НАБОРА глав Wikibooks.

In [15]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [19]:
zf = zipfile.ZipFile('/content/drive/MyDrive/archive.zip')
for file in tqdm(zf.infolist()):
    zf.extract(file)

100%|██████████| 1/1 [01:55<00:00, 115.04s/it]


In [35]:
con = sqlite3.connect('/content/wikibooks.sqlite')
cur = con.cursor()
try:
  cur.execute("""SELECT * FROM ru""")
  fa = cur.fetchall()
  columns = [i[0] for i in cur.description]
  data = pd.DataFrame(fa, columns=columns)
  display(data.head())
except sqlite3.DatabaseError as error:
  print("Ошибка:", error)
finally:
  cur.close()
  con.close()
text = data['body_text'].str.cat(sep=' ').lower()

Unnamed: 0,title,url,abstract,body_text,body_html
0,Викиучебник: Техника и технология средств масс...,https://ru.wikibooks.org/wiki/%D0%A2%D0%B5%D1%...,* [станция|Рабочая станция];,Рабочая станция;\nСервер;\nПерсональный компью...,"<div class=""mw-parser-output""><ul><li><a href=..."
1,Викиучебник: АОН/Пилотское свидетельство,https://ru.wikibooks.org/wiki/%D0%90%D0%9E%D0%...,Гражданское пилотское свидетельство - разрешен...,В Википедии имеется статья по теме «Свидетельс...,"<div class=""mw-parser-output""><div class=""info..."
2,Викиучебник: Книга программиста/Структуры данн...,https://ru.wikibooks.org/wiki/%D0%9A%D0%BD%D0%...,К оглавлению,"К оглавлению\nВсе программы, код которых вылож...","<div class=""mw-parser-output""><p><a href=""/wik..."
3,Викиучебник: Тесты НМО/Гигиенические основы и ...,https://ru.wikibooks.org/wiki/%D0%A2%D0%B5%D1%...,Гигиенические основы и медицинский контроль за...,Гигиенические основы и медицинский контроль за...,"<div class=""mw-parser-output""><p><b>Гигиеничес..."
4,Викиучебник: Коктейли/Пенная фея,https://ru.wikibooks.org/wiki/%D0%9A%D0%BE%D0%...,Пенная фея,Пенная фея\n\nДжин Old Tom — 60 г\nАбсент — 15...,"<div class=""mw-parser-output""><p><b>Пенная фея..."


In [37]:
# очистка текста от всего, кроме кириллицы и пунктуации
text = re.sub(r'[^а-яё\s,.!?()\"\":;-]', ' ', text)
text = re.sub(r'[\s]+', ' ', text)
print("Длина текста:", len(text))
vocab = build_vocab_from_iterator([sorted(Counter(text))], min_freq=1,
                                  specials=["<unk>"])
vocab.set_default_index(vocab["<unk>"])
print(vocab.get_itos())

maxlen = 30
step = 10
X_train, Y_train = [], []
# получение последовательности в 30 символов каждые 10 символа
for i in range(0, len(text) - maxlen, step):
    # сохранение последовательности символов
    X_train.append(vocab(list(text[i: i + maxlen])))
    # сохранение первого символа после последовательности
    Y_train.append(vocab(list(text[i + maxlen])))

print("Всего последовательностей:", len(X_train))
# преобразование полученных чисел в тензоры
X_train = torch.tensor(X_train, dtype=torch.float32)
Y_train = torch.tensor(Y_train)
# дополнительное измерение для lstm слоя
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
Y_train = Y_train.reshape(Y_train.shape[0])
print("Размеры Тензоров:", X_train.shape, Y_train.shape)
vector_X = TensorDataset(X_train, Y_train)
train_loader = DataLoader(vector_X, batch_size=1024, shuffle=False)
for X, Y in train_loader:
    print("Размеры батчей:", X.shape, Y.shape)
    break

Длина текста: 41907187
['<unk>', ' ', '!', '"', '(', ')', ',', '-', '.', ':', ';', '?', 'а', 'б', 'в', 'г', 'д', 'е', 'ж', 'з', 'и', 'й', 'к', 'л', 'м', 'н', 'о', 'п', 'р', 'с', 'т', 'у', 'ф', 'х', 'ц', 'ч', 'ш', 'щ', 'ъ', 'ы', 'ь', 'э', 'ю', 'я', 'ё']
Всего последовательностей: 4190716
Размеры Тензоров: torch.Size([4190716, 30, 1]) torch.Size([4190716])
Размеры батчей: torch.Size([1024, 30, 1]) torch.Size([1024])


Будем обучать модель на 4 миллионах очищенных данных. Целевая переменная - буква кириллицы или знак пунктуации.

In [38]:
model = TextGeneratorLetters().to(device)
print(model)
train(model, train_loader, 10, 1e-3, X_train, vocab)

TextGeneratorLetters(
  (lstm): LSTM(1, 256, num_layers=2, batch_first=True)
  (dropout): Dropout(p=0.25, inplace=False)
  (linear): Linear(in_features=256, out_features=45, bias=True)
)


100%|██████████| 4093/4093 [04:18<00:00, 15.83it/s]


Эпоха: 1 | Ошибка : 2.502
[92m Стартовый текст: асштабам проект. никому в то в
[91m Генерация:  оаразение процессвоение процессвенной польтой проводни и просоятия и постедние представления переме


100%|██████████| 4093/4093 [04:19<00:00, 15.77it/s]


Эпоха: 2 | Ошибка : 2.061
[92m Стартовый текст: асштабам проект. никому в то в
[91m Генерация: ыбора постояние прододство процесси при продолжением и продрамма просода проценски в образом и проце


100%|██████████| 4093/4093 [04:18<00:00, 15.83it/s]


Эпоха: 3 | Ошибка : 1.913
[92m Стартовый текст: асштабам проект. никому в то в
[91m Генерация: сем поддерживаются программерной секущественно по программерные процедуры и прогоаммирование пользов


100%|██████████| 4093/4093 [04:19<00:00, 15.76it/s]


Эпоха: 4 | Ошибка : 1.829
[92m Стартовый текст: асштабам проект. никому в то в
[91m Генерация: оеможность проводится в строки программы программа провраммирования просивопостивного подавает продо


100%|██████████| 4093/4093 [04:19<00:00, 15.75it/s]


Эпоха: 5 | Ошибка : 1.772
[92m Стартовый текст: асштабам проект. никому в то в
[91m Генерация: ысокает представление править выберите один из процесса противопосатательность пространственность пр


100%|██████████| 4093/4093 [04:19<00:00, 15.75it/s]


Эпоха: 6 | Ошибка : 1.730
[92m Стартовый текст: асштабам проект. никому в то в
[91m Генерация: ыборов по отношении программы программированного подробности продукты по образом программирования пр


100%|██████████| 4093/4093 [04:19<00:00, 15.75it/s]


Эпоха: 7 | Ошибка : 1.696
[92m Стартовый текст: асштабам проект. никому в то в
[91m Генерация: ещественных системы править после пространства программы программа править процесса править выберите


100%|██████████| 4093/4093 [04:18<00:00, 15.85it/s]


Эпоха: 8 | Ошибка : 1.671
[92m Стартовый текст: асштабам проект. никому в то в
[91m Генерация: елосоддо примечания противогазных степени программиров в составление по серверам по образом прогоамм


100%|██████████| 4093/4093 [04:18<00:00, 15.82it/s]


Эпоха: 9 | Ошибка : 1.649
[92m Стартовый текст: асштабам проект. никому в то в
[91m Генерация: ессия при помощи производственной серверах и получения программирования процесса проверки программы 


100%|██████████| 4093/4093 [04:18<00:00, 15.85it/s]


Эпоха: 10 | Ошибка : 1.631
[92m Стартовый текст: асштабам проект. никому в то в
[91m Генерация: елтора продажа проверяются в своих строки происходит подавать полазывающийся провериальные сервера п


In [39]:
train(model, train_loader, 10, 3e-4, X_train, vocab)

100%|██████████| 4093/4093 [04:18<00:00, 15.84it/s]


Эпоха: 1 | Ошибка : 1.592
[92m Стартовый текст: асштабам проект. никому в то в
[91m Генерация: ертии состояния по семуерани и процесс против продуктов править выберите один ответ: . . . . . . . .


100%|██████████| 4093/4093 [04:18<00:00, 15.85it/s]


Эпоха: 2 | Ошибка : 1.581
[92m Стартовый текст: асштабам проект. никому в то в
[91m Генерация: озможность простой строки править выберите один ответ: . . . . . . . . . . . . . . . . . . . . . . .


100%|██████████| 4093/4093 [04:18<00:00, 15.86it/s]


Эпоха: 3 | Ошибка : 1.574
[92m Стартовый текст: асштабам проект. никому в то в
[91m Генерация: сего положительные строки по общем станции при положительном строки производится в статье при послед


100%|██████████| 4093/4093 [04:18<00:00, 15.84it/s]


Эпоха: 4 | Ошибка : 1.568
[92m Стартовый текст: асштабам проект. никому в то в
[91m Генерация: ероятность проведения применения программирования по средство при пространстве получения полазателей


100%|██████████| 4093/4093 [04:18<00:00, 15.82it/s]


Эпоха: 5 | Ошибка : 1.563
[92m Стартовый текст: асштабам проект. никому в то в
[91m Генерация: ероятность производства программирования править править выберите один правильный ответ по стороными


100%|██████████| 4093/4093 [04:19<00:00, 15.77it/s]


Эпоха: 6 | Ошибка : 1.557
[92m Стартовый текст: асштабам проект. никому в то в
[91m Генерация: ершин проверка программы при помощи процессов проверки программирования программирования происходит 


100%|██████████| 4093/4093 [04:18<00:00, 15.82it/s]


Эпоха: 7 | Ошибка : 1.553
[92m Стартовый текст: асштабам проект. никому в то в
[91m Генерация: естикально при помощи компонентов проверка править выберите один правильный ответ из статья править 


100%|██████████| 4093/4093 [04:19<00:00, 15.79it/s]


Эпоха: 8 | Ошибка : 1.548
[92m Стартовый текст: асштабам проект. никому в то в
[91m Генерация: се от представления при помощью составляет порледовательность предположительно по строми править выб


100%|██████████| 4093/4093 [04:18<00:00, 15.84it/s]


Эпоха: 9 | Ошибка : 1.544
[92m Стартовый текст: асштабам проект. никому в то в
[91m Генерация: ерхней человек производственных систем и производства и проведения проверка просто процедуры простра


100%|██████████| 4093/4093 [04:17<00:00, 15.88it/s]


Эпоха: 10 | Ошибка : 1.540
[92m Стартовый текст: асштабам проект. никому в то в
[91m Генерация: ремя положения при приводит к просто образуются составляющий клетки при проведении простого процессо


Модель обучилась до достаточно низкого значения ошибки. Она генерирует осмысленные слова, но не последовательности слов.