In [3]:
#!pip3 -qq install torch==0.4.1
#!pip -qq install torchtext==0.3.1
#!pip -qq install gensim==3.6.0
# !pip install pyldavis==2.1.2
# !pip install attrs==18.2.0
# !wget --no-check-certificate 'https://drive.google.com/uc?export=download&id=1OIU9ICMebvZXJ0Grc2SLlMep3x9EkZtz' -O perashki.txt
# !wget --no-check-certificate 'https://drive.google.com/uc?export=download&id=1v66uAEKL3KunyylYitNKggdl2gCeYgZZ' -O poroshki.txt
# !git clone https://github.com/UniversalDependencies/UD_Russian-SynTagRus.git
# !wget https://raw.githubusercontent.com/DanAnastasyev/neuromorphy/master/neuromorphy/train/corpus_iterator.py

In [4]:
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim


if torch.cuda.is_available():
    from torch.cuda import FloatTensor, LongTensor
    DEVICE = torch.device('cuda')
else:
    from torch import FloatTensor, LongTensor
    DEVICE = torch.device('cpu')

np.random.seed(42)

# Word-Level Text Generation

Сегодня занимаемся, в основном, тем, что генерируем *пирожки* и *порошки*.

*(Данные без спросу скачаны с сайта http://poetory.ru)*

Пирожки - это вот:

In [5]:
!head perashki.txt

старик вытягивает сети
они пусты и лишь в конце
записка рыба недоступна
или вне действия сети

олег адепт шизофрении
шагает бодро из окна
и жызнь летит перед глазами
да не одна а сразу две



Порошки вот:

In [6]:
!head poroshki.txt

кто любит цоя кто покушать
кто на рассвете пенье птиц
а я люблю в коротких платьях
физлиц

твой монолог так гениален
что я чуть не открыла дверь
но станиславский тихо сверху
не верь



Не перепутайте!

Вообще, пирожок - это четверостишие, написанное четырехстопным ямбом по схеме 9-8-9-8. У порошка схема 9-8-9-2.

In [7]:
vowels = 'ёуеыаоэяию'

odd_pattern = '-+-+-+-+-'
even_pattern = '-+-+-+-+'

Считываем данные:

In [8]:
def read_poem(path):
    poem = []
    with open(path, encoding='utf8') as f:
        for line in f:
            line = line.rstrip()
            if len(line) == 0:
                yield poem
                poem = []
                continue
            
            poem.extend(line.split() + ['\\n'])
            
perashki = list(read_poem('perashki.txt'))
poroshki = list(read_poem('poroshki.txt'))

Построим датасет для порошков:

In [9]:
from torchtext.data import Field, Example, Dataset, BucketIterator

text_field = Field(init_token='<s>', eos_token='</s>')
        
fields = [('text', text_field)]
examples = [Example.fromlist([poem], fields) for poem in poroshki]
dataset = Dataset(examples, fields)

text_field.build_vocab(dataset, min_freq=7)

print('Vocab size =', len(text_field.vocab))
train_dataset, test_dataset = dataset.split(split_ratio=0.9)

train_iter, test_iter = BucketIterator.splits(datasets=(train_dataset, test_dataset), batch_sizes=(32, 128), 
                                              shuffle=True, device=DEVICE, sort=False)

Vocab size = 6298


**Задание** Напишите класс языковой модели.

In [14]:
class LMModel(nn.Module):
    def __init__(self, vocab_size, emb_dim=256, lstm_hidden_dim=256, num_layers=1):
        super().__init__()

        self._emb = nn.Embedding(vocab_size, emb_dim)
        self._rnn = nn.LSTM(input_size=emb_dim, hidden_size=lstm_hidden_dim)
        
        self._out_layer = nn.Linear(lstm_hidden_dim, vocab_size)
        
        self._init_weights()

    def _init_weights(self, init_range=0.1):
        self._emb.weight.data.uniform_(-init_range, init_range)
        self._out_layer.bias.data.zero_()
        self._out_layer.weight.data.uniform_(-init_range, init_range)

    def forward(self, inputs, hidden=None):
        x = self._emb(inputs)
        x, hidden = self._rnn(x, hidden)
        x = self._out_layer(x)
        return x, hidden

In [15]:
batch = next(iter(train_iter))

In [16]:
model = LMModel(vocab_size=len(train_iter.dataset.fields['text'].vocab)).to(DEVICE)

model(batch.text)[0].shape

torch.Size([24, 32, 6298])

**Задание** Добавьте подсчет потерей с маскингом паддингов.

In [17]:
import math
from tqdm import tqdm
tqdm.get_lock().locks = []


def do_epoch(model, criterion, data_iter, unk_idx, pad_idx, optimizer=None, name=None):
    epoch_loss = 0
    
    is_train = not optimizer is None
    name = name or ''
    model.train(is_train)
    
    batches_count = len(data_iter)
    
    with torch.autograd.set_grad_enabled(is_train):
        with tqdm(total=batches_count) as progress_bar:
            for i, batch in enumerate(data_iter):                
                logits, _ = model(batch.text)

                targets = torch.cat(
                    [
                        batch.text[1:], batch.text.new_ones((1, batch.text.shape[1]))
                    ]
                )
                
                loss = criterion(logits.view(-1, logits.shape[-1]), targets.view(-1))
                
                mask = (1 - ((targets.view(-1) == unk_idx) + (targets.view(-1) == pad_idx))).float().cuda()
                
                loss = (loss * mask).sum() / mask.sum()

                epoch_loss += loss.item()

                if optimizer:
                    optimizer.zero_grad()
                    loss.backward()
                    nn.utils.clip_grad_norm_(model.parameters(), 1.)
                    optimizer.step()

                progress_bar.update()
                progress_bar.set_description('{:>5s} Loss = {:.5f}, PPX = {:.2f}'.format(name, loss.item(), 
                                                                                         math.exp(loss.item())))
                
            progress_bar.set_description('{:>5s} Loss = {:.5f}, PPX = {:.2f}'.format(
                name, epoch_loss / batches_count, math.exp(epoch_loss / batches_count))
            )
            progress_bar.refresh()

    return epoch_loss / batches_count


def fit(model, criterion, optimizer, train_iter, epochs_count=1, unk_idx=0, pad_idx=1, val_iter=None):
    best_val_loss = None
    for epoch in range(epochs_count):
        name_prefix = '[{} / {}] '.format(epoch + 1, epochs_count)
        train_loss = do_epoch(model, criterion, train_iter, unk_idx, pad_idx, optimizer, name_prefix + 'Train:')
        
        if not val_iter is None:
            val_loss = do_epoch(model, criterion, val_iter, unk_idx, pad_idx, None, name_prefix + '  Val:')
            
            if best_val_loss and val_loss > best_val_loss:
                optimizer.param_groups[0]['lr'] /= 4.
                print('Optimizer lr = {:g}'.format(optimizer.param_groups[0]['lr']))
            else:
                best_val_loss = val_loss
        print()
        generate(model)
        print()

**Задание** Напишите функцию-генератор для модели.

In [18]:
def sample(probs, temp):
    probs = F.log_softmax(probs.squeeze(), dim=0)
    probs = (probs / temp).exp()
    probs /= probs.sum()
    probs = probs.cpu().numpy()

    return np.random.choice(np.arange(len(probs)), p=probs)


def generate(model, temp=0.6):
    model.eval()
    with torch.no_grad():        
        prev_token = train_iter.dataset.fields['text'].vocab.stoi['<s>']
        end_token = train_iter.dataset.fields['text'].vocab.stoi['</s>']
        
        hidden = None
        for _ in range(150):
            probs, hidden = model(LongTensor([[prev_token]]), hidden)
            prev_token = sample(probs, temp)
            print(train_iter.dataset.fields['text'].vocab.itos[prev_token], end='')
            
            if prev_token == end_token:
                return
                
generate(model)

изольдаработойтихдарилпошёлпослалновыйвлезаетдрожитотличносталсталинступайбасёнужнысветамашинутикчапаевстатуслососьясностуквасянаденьместиписатьпоследнимвидалсвоейкассберуженепсихологсменыпомаданамногоайильивтроёмсутьзачатпрошулягутанябрюктолпаскользульфиятрудспрошукинулполкйогрубнесетсяднёмдетствонебоморковьскажемпришлосьсолнышкорастётискатьдровлбупещеребудьфомаистинанесутбылпрощеньяомлетстоиминфарктпутиссыгагаринверишьолеречьшепчетстрахамировстрастнорядыжывотсошлисьиногдапомогитедикийплодикарженоюстремясьспасгодосталсяконечнорассказэсонегинлососяжопойклассгорисъелильсиськипримеруотпечаткиполучишьприпёрсягарсонплатокворонаизвиняюсьграблиполоскикапканглазуборщаснегасоскиредутбуднейктопервыхогнёмдаёткричатбокпохмельекускироднянекстатиайфонстоныскажытенашаизъянзапомниодинокийбудтокрайнеймедведиоксанудрожа

In [20]:
model = LMModel(vocab_size=len(train_iter.dataset.fields['text'].vocab)).to(DEVICE)

pad_idx = train_iter.dataset.fields['text'].vocab.stoi['<pad>']
unk_idx = train_iter.dataset.fields['text'].vocab.stoi['<unk>']
criterion = nn.CrossEntropyLoss(reduction='none').to(DEVICE)

optimizer = optim.SGD(model.parameters(), lr=20., weight_decay=1e-6)

fit(model, criterion, optimizer, train_iter, epochs_count=300, unk_idx=unk_idx, pad_idx=pad_idx, val_iter=test_iter)

[1 / 300] Train: Loss = 4.86832, PPX = 130.10: 100%|██████████| 677/677 [00:04<00:00, 147.99it/s]
[1 / 300]   Val: Loss = 4.46477, PPX = 86.90: 100%|██████████| 19/19 [00:00<00:00, 156.19it/s]
[2 / 300] Train: Loss = 4.23776, PPX = 69.25:   3%|▎         | 23/677 [00:00<00:05, 123.79it/s]


вомневконцеколхозивдеревне\nаяневтобоюине\nявмоейегоивтомс\nлица\n</s>


[2 / 300] Train: Loss = 4.36090, PPX = 78.33: 100%|██████████| 677/677 [00:04<00:00, 153.49it/s]
[2 / 300]   Val: Loss = 4.34162, PPX = 76.83: 100%|██████████| 19/19 [00:00<00:00, 158.17it/s]
[3 / 300] Train: Loss = 4.36067, PPX = 78.31:   3%|▎         | 22/677 [00:00<00:05, 122.87it/s]


ненадояестьтамвдетстве\nакактонехватаетаянемной\nновсёжевынеможетбыло\nия\n</s>


[3 / 300] Train: Loss = 4.18086, PPX = 65.42: 100%|██████████| 677/677 [00:04<00:00, 150.29it/s]
[3 / 300]   Val: Loss = 4.19850, PPX = 66.59: 100%|██████████| 19/19 [00:00<00:00, 156.23it/s]
[4 / 300] Train: Loss = 4.05257, PPX = 57.55:   3%|▎         | 23/677 [00:00<00:05, 116.69it/s]


ятаклюблютебянепротив\nичтобскемнинаваснебог\nпотомнакухнюяпополю\nивлес\n</s>


[4 / 300] Train: Loss = 4.03099, PPX = 56.32: 100%|██████████| 677/677 [00:04<00:00, 148.89it/s]
[4 / 300]   Val: Loss = 4.14998, PPX = 63.43: 100%|██████████| 19/19 [00:00<00:00, 150.89it/s]
[5 / 300] Train: Loss = 3.79760, PPX = 44.59:   3%|▎         | 23/677 [00:00<00:05, 119.73it/s]


уваспришлапроблемы\nвеготоннеляясама\nноятоэтотдлянепомню\nнете\n</s>


[5 / 300] Train: Loss = 3.91028, PPX = 49.91: 100%|██████████| 677/677 [00:04<00:00, 149.07it/s]
[5 / 300]   Val: Loss = 4.13410, PPX = 62.43: 100%|██████████| 19/19 [00:00<00:00, 152.64it/s]
[6 / 300] Train: Loss = 3.83194, PPX = 46.15:   4%|▎         | 24/677 [00:00<00:05, 127.26it/s]


царьвдушеснестобою\nянемогутебялюблю\nаялюблютебясработы\nиты\n</s>


[6 / 300] Train: Loss = 3.88976, PPX = 48.90: 100%|██████████| 677/677 [00:04<00:00, 151.92it/s]
[6 / 300]   Val: Loss = 4.13795, PPX = 62.67: 100%|██████████| 19/19 [00:00<00:00, 159.29it/s]
[7 / 300] Train: Loss = 3.66248, PPX = 38.96:   4%|▎         | 24/677 [00:00<00:05, 125.67it/s]

Optimizer lr = 5

вконцеконцовнепонимаю\nинеткаквсамомделев\nневсмыслеауваслишьтолько\nаврот\n</s>


[7 / 300] Train: Loss = 3.56639, PPX = 35.39: 100%|██████████| 677/677 [00:04<00:00, 147.29it/s]
[7 / 300]   Val: Loss = 4.09394, PPX = 59.98: 100%|██████████| 19/19 [00:00<00:00, 148.32it/s]
[8 / 300] Train: Loss = 3.34252, PPX = 28.29:   3%|▎         | 22/677 [00:00<00:05, 115.31it/s]


ивандосталдомойсулыбкой\nитолькосталохорошо\nдатыжуженеможетпросто\nия\n</s>


[8 / 300] Train: Loss = 3.49068, PPX = 32.81: 100%|██████████| 677/677 [00:04<00:00, 148.64it/s]
[8 / 300]   Val: Loss = 4.10514, PPX = 60.65: 100%|██████████| 19/19 [00:00<00:00, 162.53it/s]
[9 / 300] Train: Loss = 3.42868, PPX = 30.84:   4%|▎         | 24/677 [00:00<00:05, 123.32it/s]

Optimizer lr = 1.25

олегподумалкаквподарок\nивдетствебыловсёвбреду\nатамнанейвдеревневпиво\nнасвет\n</s>


[9 / 300] Train: Loss = 3.37976, PPX = 29.36: 100%|██████████| 677/677 [00:04<00:00, 151.89it/s]
[9 / 300]   Val: Loss = 4.11189, PPX = 61.06: 100%|██████████| 19/19 [00:00<00:00, 154.46it/s]
[10 / 300] Train: Loss = 3.49465, PPX = 32.94:   4%|▎         | 24/677 [00:00<00:05, 127.56it/s]

Optimizer lr = 0.3125

янаписалтебеприслали\nивсердцевстарыймиг\nавамиправданевначале\nхотьраз\n</s>


[10 / 300] Train: Loss = 3.34092, PPX = 28.25: 100%|██████████| 677/677 [00:04<00:00, 153.88it/s]
[10 / 300]   Val: Loss = 4.11354, PPX = 61.16: 100%|██████████| 19/19 [00:00<00:00, 150.17it/s]
[11 / 300] Train: Loss = 3.44249, PPX = 31.26:   3%|▎         | 23/677 [00:00<00:05, 123.80it/s]

Optimizer lr = 0.078125

неспитмнепоказалосьчтоты\nнезналалияваснавас\nавымнесовестьневла\nнессу\n</s>


[11 / 300] Train: Loss = 3.33026, PPX = 27.95: 100%|██████████| 677/677 [00:04<00:00, 150.92it/s]
[11 / 300]   Val: Loss = 4.11545, PPX = 61.28: 100%|██████████| 19/19 [00:00<00:00, 156.65it/s]
[12 / 300] Train: Loss = 3.39427, PPX = 29.79:   3%|▎         | 23/677 [00:00<00:05, 124.89it/s]

Optimizer lr = 0.0195312

какбылотакнепротивженщин\nкакмыстобойнаполныйроль\nиябнамненеслишкомдаже\nнокак\n</s>


[12 / 300] Train: Loss = 3.32771, PPX = 27.87: 100%|██████████| 677/677 [00:04<00:00, 152.03it/s]
[12 / 300]   Val: Loss = 4.11526, PPX = 61.27: 100%|██████████| 19/19 [00:00<00:00, 156.00it/s]
[13 / 300] Train: Loss = 3.35028, PPX = 28.51:   3%|▎         | 23/677 [00:00<00:05, 118.34it/s]

Optimizer lr = 0.00488281

янемогупонятьреальность\nчтонемогубымнепомочь\nнонемогуинепокрайнейи\nместах\n</s>


[13 / 300] Train: Loss = 3.32684, PPX = 27.85: 100%|██████████| 677/677 [00:04<00:00, 155.07it/s]
[13 / 300]   Val: Loss = 4.11576, PPX = 61.30: 100%|██████████| 19/19 [00:00<00:00, 170.09it/s]
[14 / 300] Train: Loss = 3.41311, PPX = 30.36:   4%|▎         | 24/677 [00:00<00:05, 124.31it/s]

Optimizer lr = 0.0012207

олегневерилинаденьрожденья\nмужчинывнебонюия\nавотнаголовевпостели\nилью\n</s>


[14 / 300] Train: Loss = 3.32675, PPX = 27.85: 100%|██████████| 677/677 [00:04<00:00, 157.30it/s]
[14 / 300]   Val: Loss = 4.11491, PPX = 61.25: 100%|██████████| 19/19 [00:00<00:00, 166.57it/s]
[15 / 300] Train: Loss = 3.51649, PPX = 33.67:   4%|▎         | 24/677 [00:00<00:05, 125.87it/s]

Optimizer lr = 0.000305176

янелюблютебядосмерти\nпрошувасввасяввасо\nхотябнанейтакможно\nваду\n</s>


[15 / 300] Train: Loss = 3.32660, PPX = 27.84: 100%|██████████| 677/677 [00:04<00:00, 150.61it/s]
[15 / 300]   Val: Loss = 4.11514, PPX = 61.26: 100%|██████████| 19/19 [00:00<00:00, 151.00it/s]
[16 / 300] Train: Loss = 3.33151, PPX = 27.98:   4%|▎         | 24/677 [00:00<00:05, 126.10it/s]

Optimizer lr = 7.62939e-05

незнаючтотакоезоя\nненадонасвконцетоннеля\nпростинотысказалавыбор\nменя\n</s>


[16 / 300] Train: Loss = 3.32671, PPX = 27.85: 100%|██████████| 677/677 [00:04<00:00, 150.44it/s]
[16 / 300]   Val: Loss = 4.11615, PPX = 61.32: 100%|██████████| 19/19 [00:00<00:00, 155.29it/s]
[17 / 300] Train: Loss = 3.38646, PPX = 29.56:   3%|▎         | 23/677 [00:00<00:05, 119.48it/s]

Optimizer lr = 1.90735e-05

ачтовызнаетемнеэто\nчтомненеговоритьзагод\nянедляэтогоотваших\nменя\n</s>


[17 / 300] Train: Loss = 3.32674, PPX = 27.85: 100%|██████████| 677/677 [00:04<00:00, 150.76it/s]
[17 / 300]   Val: Loss = 4.11412, PPX = 61.20: 100%|██████████| 19/19 [00:00<00:00, 160.26it/s]
[18 / 300] Train: Loss = 3.45071, PPX = 31.52:   4%|▎         | 25/677 [00:00<00:05, 125.13it/s]

Optimizer lr = 4.76837e-06

азаокномпришлиинету\nненадомноювидиран\nинаграницегрудине\nменя\n</s>


[18 / 300] Train: Loss = 3.32650, PPX = 27.84: 100%|██████████| 677/677 [00:04<00:00, 150.40it/s]
[18 / 300]   Val: Loss = 4.11618, PPX = 61.32: 100%|██████████| 19/19 [00:00<00:00, 159.35it/s]
[19 / 300] Train: Loss = 3.38802, PPX = 29.61:   4%|▎         | 24/677 [00:00<00:05, 123.43it/s]

Optimizer lr = 1.19209e-06

влесустобоюнадиване\nяихнепомнюинебыть\nинагрудикогдаты\nнестой\n</s>


[19 / 300] Train: Loss = 3.32647, PPX = 27.84: 100%|██████████| 677/677 [00:04<00:00, 151.86it/s]
[19 / 300]   Val: Loss = 4.11505, PPX = 61.26: 100%|██████████| 19/19 [00:00<00:00, 153.59it/s]
[20 / 300] Train: Loss = 3.34706, PPX = 28.42:   3%|▎         | 23/677 [00:00<00:05, 118.81it/s]

Optimizer lr = 2.98023e-07

олегсмотрелвокнококсане\nитолькотакжеснимкней\nаможетбытьещёипросто\nнебыть\n</s>


[20 / 300] Train: Loss = 3.32660, PPX = 27.84: 100%|██████████| 677/677 [00:04<00:00, 156.10it/s]
[20 / 300]   Val: Loss = 4.11578, PPX = 61.30: 100%|██████████| 19/19 [00:00<00:00, 163.35it/s]
[21 / 300] Train: Loss = 3.36541, PPX = 28.95:   3%|▎         | 23/677 [00:00<00:05, 119.44it/s]

Optimizer lr = 7.45058e-08

янелюблюсказалевгений\nнепотомучтовжизнинет\nаянеточтооннемёртва\nвответ\n</s>


[21 / 300] Train: Loss = 3.32654, PPX = 27.84: 100%|██████████| 677/677 [00:04<00:00, 149.09it/s]
[21 / 300]   Val: Loss = 4.11691, PPX = 61.37: 100%|██████████| 19/19 [00:00<00:00, 152.75it/s]
[22 / 300] Train: Loss = 3.35301, PPX = 28.59:   3%|▎         | 23/677 [00:00<00:05, 119.93it/s]

Optimizer lr = 1.86265e-08

наэтотгодприходятвдоме\nмирназакатогромныйшок\nивнебополучилпоморде\nневтой\n</s>


[22 / 300] Train: Loss = 3.32679, PPX = 27.85: 100%|██████████| 677/677 [00:04<00:00, 150.06it/s]
[22 / 300]   Val: Loss = 4.11486, PPX = 61.24: 100%|██████████| 19/19 [00:00<00:00, 157.93it/s]
[23 / 300] Train: Loss = 3.20063, PPX = 24.55:   3%|▎         | 23/677 [00:00<00:05, 123.79it/s]

Optimizer lr = 4.65661e-09

явдетствебылсказаладагде\nнемойвменяхрустальваду\nновсеравноябылстобою\nзимы\n</s>


[23 / 300] Train: Loss = 3.32668, PPX = 27.85: 100%|██████████| 677/677 [00:04<00:00, 150.03it/s]
[23 / 300]   Val: Loss = 4.11463, PPX = 61.23: 100%|██████████| 19/19 [00:00<00:00, 150.91it/s]
[24 / 300] Train: Loss = 3.22333, PPX = 25.11:   3%|▎         | 23/677 [00:00<00:05, 124.25it/s]

Optimizer lr = 1.16415e-09

давайневклубсказалевгений\nаянезнаюкаквотвам\nмыкакбымненасамомделе\nневив\n</s>


[24 / 300] Train: Loss = 3.32665, PPX = 27.84: 100%|██████████| 677/677 [00:04<00:00, 149.30it/s]
[24 / 300]   Val: Loss = 4.11641, PPX = 61.34: 100%|██████████| 19/19 [00:00<00:00, 151.75it/s]
[25 / 300] Train: Loss = 3.31883, PPX = 27.63:   3%|▎         | 22/677 [00:00<00:05, 122.78it/s]

Optimizer lr = 2.91038e-10

намквамродилсятовдушу\nтоябтебенаэтутосутра\nаэтобылсовсемнелюбишь\nкакмог\n</s>


[25 / 300] Train: Loss = 3.32634, PPX = 27.84: 100%|██████████| 677/677 [00:04<00:00, 150.78it/s]
[25 / 300]   Val: Loss = 4.11620, PPX = 61.33: 100%|██████████| 19/19 [00:00<00:00, 157.22it/s]
[26 / 300] Train: Loss = 3.29467, PPX = 26.97:   3%|▎         | 23/677 [00:00<00:05, 118.19it/s]

Optimizer lr = 7.27596e-11

мывсеумрёмкакнистранно\nневидишьнинашуткулет\nаточтовнебеправдатоже\nвчера\n</s>


[26 / 300] Train: Loss = 3.32688, PPX = 27.85: 100%|██████████| 677/677 [00:04<00:00, 149.94it/s]
[26 / 300]   Val: Loss = 4.11545, PPX = 61.28: 100%|██████████| 19/19 [00:00<00:00, 155.94it/s]
[27 / 300] Train: Loss = 3.34686, PPX = 28.41:   3%|▎         | 23/677 [00:00<00:05, 122.70it/s]

Optimizer lr = 1.81899e-11

актоунасбываетпарень\nипеснинетувтишине\nамнетовроткакинепоколено\nавто\n</s>


[27 / 300] Train: Loss = 3.32660, PPX = 27.84: 100%|██████████| 677/677 [00:04<00:00, 151.03it/s]
[27 / 300]   Val: Loss = 4.11528, PPX = 61.27: 100%|██████████| 19/19 [00:00<00:00, 153.64it/s]
[28 / 300] Train: Loss = 3.40197, PPX = 30.02:   3%|▎         | 23/677 [00:00<00:05, 119.43it/s]

Optimizer lr = 4.54747e-12

яввасвдушевлюблёни\nяивомнеуснул\nивпрошлойжизнибылвпорядке\nлуне\n</s>


[28 / 300] Train: Loss = 3.32655, PPX = 27.84: 100%|██████████| 677/677 [00:04<00:00, 150.07it/s]
[28 / 300]   Val: Loss = 4.11587, PPX = 61.31: 100%|██████████| 19/19 [00:00<00:00, 158.01it/s]
[29 / 300] Train: Loss = 3.28806, PPX = 26.79:   4%|▎         | 24/677 [00:00<00:05, 125.19it/s]

Optimizer lr = 1.13687e-12

всвязисутрадороги\nзамнойиговоритнето\nвовремямиратихошепчет\nиплачь\n</s>


[29 / 300] Train: Loss = 3.32693, PPX = 27.85: 100%|██████████| 677/677 [00:04<00:00, 155.28it/s]
[29 / 300]   Val: Loss = 4.11705, PPX = 61.38: 100%|██████████| 19/19 [00:00<00:00, 166.64it/s]
[30 / 300] Train: Loss = 3.30083, PPX = 27.14:   4%|▎         | 24/677 [00:00<00:05, 125.17it/s]

Optimizer lr = 2.84217e-13

наденьсвятоговалентина\nянемогуаянерад\nнукакужтыихватит\nнетак\n</s>


[30 / 300] Train: Loss = 3.34989, PPX = 28.50:   6%|▌         | 41/677 [00:00<00:04, 144.32it/s]


KeyboardInterrupt: 

**Задание** Добавьте маскинг `<unk>` токенов при тренировке модели.

## Улучшаем модель

### Tying input and output embeddings

В модели есть два эмбеддинга - входной и выходной. Красивая и полезная в жизни идея - учить только одну матрицу, расшаренную между ними: [Using the Output Embedding to Improve Language Models](http://www.aclweb.org/anthology/E17-2025).

От идеи одни плюсы: получается намного меньше обучаемых параметров и при этом достаточно заметно более высокое качество.

**Задание** Реализуйте это. Достаточно написать что-то типа этого в конструкторе:

`self._out_layer.weight = self._emb.weight`

In [21]:
class LMModelv2(nn.Module):
    def __init__(self, vocab_size, emb_dim=256, lstm_hidden_dim=256, num_layers=1):
        super().__init__()

        self._emb = nn.Embedding(vocab_size, emb_dim)
        self._rnn = nn.LSTM(input_size=emb_dim, hidden_size=lstm_hidden_dim)
        
        self._out_layer = nn.Linear(lstm_hidden_dim, vocab_size)
        
        #self._out_layer.weight = self._emb.weight
        self._init_weights()
        self._out_layer.weight = self._emb.weight

    def _init_weights(self, init_range=0.1):
        self._emb.weight.data.uniform_(-init_range, init_range)
        self._out_layer.bias.data.zero_()
        self._out_layer.weight.data.uniform_(-init_range, init_range)

    def forward(self, inputs, hidden=None):
        x = self._emb(inputs)
        x, hidden = self._rnn(x, hidden)
        x = self._out_layer(x)
        return x, hidden

In [22]:
model = LMModelv2(vocab_size=len(train_iter.dataset.fields['text'].vocab)).to(DEVICE)

pad_idx = train_iter.dataset.fields['text'].vocab.stoi['<pad>']
unk_idx = train_iter.dataset.fields['text'].vocab.stoi['<unk>']
criterion = nn.CrossEntropyLoss(reduction='none').to(DEVICE)

optimizer = optim.SGD(model.parameters(), lr=20., weight_decay=1e-6)

fit(model, criterion, optimizer, train_iter, epochs_count=300, unk_idx=unk_idx, pad_idx=pad_idx, val_iter=test_iter)

[1 / 300] Train: Loss = 4.98509, PPX = 146.22: 100%|██████████| 677/677 [00:04<00:00, 158.88it/s]
[1 / 300]   Val: Loss = 4.43801, PPX = 84.61: 100%|██████████| 19/19 [00:00<00:00, 168.97it/s]
[2 / 300] Train: Loss = 4.38860, PPX = 80.53:   4%|▎         | 25/677 [00:00<00:04, 132.14it/s]


явсвоёмденьвночи\nанамиреянебуду\nсегоднябывгоститотамчтото\nвметро\n</s>


[2 / 300] Train: Loss = 4.30738, PPX = 74.25: 100%|██████████| 677/677 [00:04<00:00, 158.87it/s]
[2 / 300]   Val: Loss = 4.24582, PPX = 69.81: 100%|██████████| 19/19 [00:00<00:00, 168.29it/s]
[3 / 300] Train: Loss = 3.90480, PPX = 49.64:   4%|▎         | 24/677 [00:00<00:05, 126.29it/s]


незнаюмамачтоненадо\nвсёяпонимчтонини\nивтрёхбредунасвете\nикаждый\n</s>


[3 / 300] Train: Loss = 4.10161, PPX = 60.44: 100%|██████████| 677/677 [00:04<00:00, 158.93it/s]
[3 / 300]   Val: Loss = 4.16132, PPX = 64.16: 100%|██████████| 19/19 [00:00<00:00, 149.03it/s]
[4 / 300] Train: Loss = 3.92239, PPX = 50.52:   4%|▎         | 24/677 [00:00<00:05, 123.92it/s]


сутравтуманеинебуду\nнасветунастуттамтозамуж\nаяснимвнихкнейпоморде\nсутра\n</s>


[4 / 300] Train: Loss = 3.95735, PPX = 52.32: 100%|██████████| 677/677 [00:04<00:00, 153.41it/s]
[4 / 300]   Val: Loss = 4.11471, PPX = 61.23: 100%|██████████| 19/19 [00:00<00:00, 168.13it/s]
[5 / 300] Train: Loss = 3.73614, PPX = 41.94:   4%|▎         | 25/677 [00:00<00:05, 129.10it/s]


втишисветауменяподутро\nолегвпрудуизвсехгода\nактотамтамещёипросто\nухсын\n</s>


[5 / 300] Train: Loss = 4.02100, PPX = 55.76: 100%|██████████| 677/677 [00:04<00:00, 160.47it/s]
[5 / 300]   Val: Loss = 4.16859, PPX = 64.62: 100%|██████████| 19/19 [00:00<00:00, 170.80it/s]
[6 / 300] Train: Loss = 3.71982, PPX = 41.26:   4%|▍         | 26/677 [00:00<00:05, 126.69it/s]

Optimizer lr = 5

вчеранапляжеуолега\nисамыйгодомсталодна\nаяхочуинехочуя\nяне\n</s>


[6 / 300] Train: Loss = 3.66725, PPX = 39.14: 100%|██████████| 677/677 [00:04<00:00, 158.74it/s]
[6 / 300]   Val: Loss = 4.07586, PPX = 58.90: 100%|██████████| 19/19 [00:00<00:00, 155.32it/s]
[7 / 300] Train: Loss = 3.65198, PPX = 38.55:   3%|▎         | 23/677 [00:00<00:05, 126.29it/s]


яизсебявупорсцветами\nиестьиниколайнетрожь\n-ненадобмнебыбыбыно\nнета\n</s>


[7 / 300] Train: Loss = 3.57539, PPX = 35.71: 100%|██████████| 677/677 [00:04<00:00, 153.50it/s]
[7 / 300]   Val: Loss = 4.07891, PPX = 59.08: 100%|██████████| 19/19 [00:00<00:00, 155.22it/s]
[8 / 300] Train: Loss = 3.47446, PPX = 32.28:   4%|▎         | 24/677 [00:00<00:05, 126.77it/s]

Optimizer lr = 1.25

янелюблювассвасдухом\nнепотомучтоглебдурак\nвитогечушьчтоонвпространстве\nпетра\n</s>


[8 / 300] Train: Loss = 3.46268, PPX = 31.90: 100%|██████████| 677/677 [00:04<00:00, 155.16it/s]
[8 / 300]   Val: Loss = 4.08058, PPX = 59.18: 100%|██████████| 19/19 [00:00<00:00, 148.85it/s]
[9 / 300] Train: Loss = 3.49627, PPX = 32.99:   3%|▎         | 23/677 [00:00<00:05, 122.78it/s]

Optimizer lr = 0.3125

янелюблювасзапивом\nичтобыдажебезсоюз\nавытутвнебескрикомскриком\nисним\n</s>


[9 / 300] Train: Loss = 3.42371, PPX = 30.68: 100%|██████████| 677/677 [00:04<00:00, 162.83it/s]
[9 / 300]   Val: Loss = 4.07984, PPX = 59.14: 100%|██████████| 19/19 [00:00<00:00, 154.42it/s]
[10 / 300] Train: Loss = 3.44546, PPX = 31.36:   4%|▎         | 24/677 [00:00<00:05, 124.07it/s]

Optimizer lr = 0.078125

чеговытутмнешлисбогом\nядажепотебенебог\nтоятовцеломнепроходит\nнепрям\n</s>


[10 / 300] Train: Loss = 3.41327, PPX = 30.36: 100%|██████████| 677/677 [00:04<00:00, 154.32it/s]
[10 / 300]   Val: Loss = 4.08120, PPX = 59.22: 100%|██████████| 19/19 [00:00<00:00, 148.43it/s]
[11 / 300] Train: Loss = 3.45051, PPX = 31.52:   4%|▎         | 24/677 [00:00<00:05, 126.13it/s]

Optimizer lr = 0.0195312

былтихийвечерупоэта\nвпостелисотсутствиемтруб\nчтовэтомразитихолезет\nивдуш\n</s>


[11 / 300] Train: Loss = 3.41025, PPX = 30.27: 100%|██████████| 677/677 [00:04<00:00, 155.81it/s]
[11 / 300]   Val: Loss = 4.08304, PPX = 59.33: 100%|██████████| 19/19 [00:00<00:00, 153.54it/s]
[12 / 300] Train: Loss = 3.55009, PPX = 34.82:   4%|▎         | 25/677 [00:00<00:05, 127.31it/s]

Optimizer lr = 0.00488281

ясдетстваднёмипесенвкрасном\nапослевассудьбы\nинаберегунепара\nавём\n</s>


[12 / 300] Train: Loss = 3.41000, PPX = 30.27: 100%|██████████| 677/677 [00:04<00:00, 155.32it/s]
[12 / 300]   Val: Loss = 4.08277, PPX = 59.31: 100%|██████████| 19/19 [00:00<00:00, 158.81it/s]
[13 / 300] Train: Loss = 3.43530, PPX = 31.04:   3%|▎         | 22/677 [00:00<00:05, 119.84it/s]

Optimizer lr = 0.0012207

мынерабывглазанебудет\nтыкакжечортменякоза\nновынетакужмноговжизни\nнесъел\n</s>


[13 / 300] Train: Loss = 3.40968, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 154.49it/s]
[13 / 300]   Val: Loss = 4.08230, PPX = 59.28: 100%|██████████| 19/19 [00:00<00:00, 150.51it/s]
[14 / 300] Train: Loss = 3.35575, PPX = 28.67:   3%|▎         | 22/677 [00:00<00:06, 107.05it/s]

Optimizer lr = 0.000305176

янеумеютакпрекрасна\nивэтомнетеёнегость\nтыпростодолженбытькакдура\nибред\n</s>


[14 / 300] Train: Loss = 3.40958, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 152.72it/s]
[14 / 300]   Val: Loss = 4.08197, PPX = 59.26: 100%|██████████| 19/19 [00:00<00:00, 156.52it/s]
[15 / 300] Train: Loss = 3.46920, PPX = 32.11:   3%|▎         | 23/677 [00:00<00:05, 122.50it/s]

Optimizer lr = 7.62939e-05

явваслюблютебязатридцать\nаялюблютебякаквнём\nатонанейбылбылоине\nневерь\n</s>


[15 / 300] Train: Loss = 3.40987, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 153.45it/s]
[15 / 300]   Val: Loss = 4.08218, PPX = 59.27: 100%|██████████| 19/19 [00:00<00:00, 156.89it/s]
[16 / 300] Train: Loss = 3.46925, PPX = 32.11:   4%|▎         | 24/677 [00:00<00:05, 125.91it/s]

Optimizer lr = 1.90735e-05

счеготывэтойжизнибылвночи\nнаточтобылеёнето\nавывкредититакужбольно\nтоя\n</s>


[16 / 300] Train: Loss = 3.40965, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 157.34it/s]
[16 / 300]   Val: Loss = 4.08295, PPX = 59.32: 100%|██████████| 19/19 [00:00<00:00, 152.90it/s]
[17 / 300] Train: Loss = 3.42955, PPX = 30.86:   3%|▎         | 22/677 [00:00<00:05, 123.03it/s]

Optimizer lr = 4.76837e-06

ниянивчёмпоэтимнето\nнезналчтодумаюотом\nчтоянезналчтоостаётся\nнаты\n</s>


[17 / 300] Train: Loss = 3.40968, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 153.80it/s]
[17 / 300]   Val: Loss = 4.08211, PPX = 59.27: 100%|██████████| 19/19 [00:00<00:00, 155.01it/s]
[18 / 300] Train: Loss = 3.52533, PPX = 33.97:   3%|▎         | 23/677 [00:00<00:05, 123.97it/s]

Optimizer lr = 1.19209e-06

вокносработыкаждыйдесять\nияединственныйитру\nитолькочтоонимненужен\nамуж\n</s>


[18 / 300] Train: Loss = 3.40969, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 157.47it/s]
[18 / 300]   Val: Loss = 4.08354, PPX = 59.36: 100%|██████████| 19/19 [00:00<00:00, 159.98it/s]
[19 / 300] Train: Loss = 3.49720, PPX = 33.02:   4%|▎         | 24/677 [00:00<00:05, 127.63it/s]

Optimizer lr = 2.98023e-07

чтозначитвэтоммиреможно\nспросиляпьюикактаков\nаяужеинежелает\nавнем\n</s>


[19 / 300] Train: Loss = 3.40949, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 156.60it/s]
[19 / 300]   Val: Loss = 4.08268, PPX = 59.30: 100%|██████████| 19/19 [00:00<00:00, 151.51it/s]
[20 / 300] Train: Loss = 3.19376, PPX = 24.38:   4%|▎         | 24/677 [00:00<00:05, 123.60it/s]

Optimizer lr = 7.45058e-08

янехочупоэтуводу\nневсилахливэтомневчом\nвотвотужехотьнаработу\nия\n</s>


[20 / 300] Train: Loss = 3.40982, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 153.12it/s]
[20 / 300]   Val: Loss = 4.08100, PPX = 59.20: 100%|██████████| 19/19 [00:00<00:00, 150.30it/s]
[21 / 300] Train: Loss = 3.39221, PPX = 29.73:   4%|▎         | 24/677 [00:00<00:05, 125.69it/s]

Optimizer lr = 1.86265e-08

втвоихрукенавсётовшляпе\nиэтоткрестпотомвесьдень\nанаменяниподнестанет\nнинет\n</s>


[21 / 300] Train: Loss = 3.40968, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 151.86it/s]
[21 / 300]   Val: Loss = 4.08273, PPX = 59.31: 100%|██████████| 19/19 [00:00<00:00, 152.34it/s]
[22 / 300] Train: Loss = 3.44157, PPX = 31.24:   4%|▎         | 25/677 [00:00<00:05, 128.58it/s]

Optimizer lr = 4.65661e-09

напляжетихоопустился\nвглазахуженщинынахрам\nинехватаетмнессобой\nнева\n</s>


[22 / 300] Train: Loss = 3.40960, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 154.81it/s]
[22 / 300]   Val: Loss = 4.08212, PPX = 59.27: 100%|██████████| 19/19 [00:00<00:00, 169.83it/s]
[23 / 300] Train: Loss = 3.47803, PPX = 32.40:   4%|▍         | 26/677 [00:00<00:04, 134.60it/s]

Optimizer lr = 1.16415e-09

ядумалчтотамзаздоровье\nипоутрамвсегдасемья\nнонехватаетисместа\nсума\n</s>


[23 / 300] Train: Loss = 3.40974, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 156.54it/s]
[23 / 300]   Val: Loss = 4.08259, PPX = 59.30: 100%|██████████| 19/19 [00:00<00:00, 152.10it/s]
[24 / 300] Train: Loss = 3.31789, PPX = 27.60:   4%|▎         | 24/677 [00:00<00:05, 124.39it/s]

Optimizer lr = 2.91038e-10

янехочубытьзанами\nневиднобольшечемнето\nаточтоэтобылпоследний\nзачем\n</s>


[24 / 300] Train: Loss = 3.40978, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 153.35it/s]
[24 / 300]   Val: Loss = 4.08246, PPX = 59.29: 100%|██████████| 19/19 [00:00<00:00, 156.08it/s]
[25 / 300] Train: Loss = 3.31423, PPX = 27.50:   4%|▎         | 24/677 [00:00<00:05, 126.63it/s]

Optimizer lr = 7.27596e-11

вкостюмеотношенийнапороге\nвдругвдругнаголовевлесу\nатонеможетбытькакраньше\nнета\n</s>


[25 / 300] Train: Loss = 3.40957, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 154.23it/s]
[25 / 300]   Val: Loss = 4.08206, PPX = 59.27: 100%|██████████| 19/19 [00:00<00:00, 148.72it/s]
[26 / 300] Train: Loss = 3.46126, PPX = 31.86:   3%|▎         | 23/677 [00:00<00:05, 124.65it/s]

Optimizer lr = 1.81899e-11

увасвадуестьгдетовэтом\nсказаламамаэтобольно\nимнестобоюделатьбвночьль\nнете\n</s>


[26 / 300] Train: Loss = 3.40960, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 161.81it/s]
[26 / 300]   Val: Loss = 4.08350, PPX = 59.35: 100%|██████████| 19/19 [00:00<00:00, 151.82it/s]
[27 / 300] Train: Loss = 3.40608, PPX = 30.15:   4%|▎         | 25/677 [00:00<00:04, 135.79it/s]

Optimizer lr = 4.54747e-12

янаночьпостепенноколя\nивнёмотэтогонестал\nатакжекакбыянепомню\nничей\n</s>


[27 / 300] Train: Loss = 3.40932, PPX = 30.24: 100%|██████████| 677/677 [00:04<00:00, 154.48it/s]
[27 / 300]   Val: Loss = 4.08187, PPX = 59.26: 100%|██████████| 19/19 [00:00<00:00, 154.86it/s]
[28 / 300] Train: Loss = 3.30222, PPX = 27.17:   3%|▎         | 23/677 [00:00<00:05, 126.28it/s]

Optimizer lr = 1.13687e-12

онвсёбезнаснеговорили\nнавсехтехктобылводном\nаможетбытьещёнеможет\nизъян\n</s>


[28 / 300] Train: Loss = 3.40970, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 154.46it/s]
[28 / 300]   Val: Loss = 4.08217, PPX = 59.27: 100%|██████████| 19/19 [00:00<00:00, 153.90it/s]
[29 / 300] Train: Loss = 3.33672, PPX = 28.13:   4%|▎         | 24/677 [00:00<00:05, 127.59it/s]

Optimizer lr = 2.84217e-13

мысвамикакнибудьвприхожей\nявтомчтовнёмвсёпьянивней\nнукакжетыменянелюбишь\nкутру\n</s>


[29 / 300] Train: Loss = 3.40967, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 155.14it/s]
[29 / 300]   Val: Loss = 4.08179, PPX = 59.25: 100%|██████████| 19/19 [00:00<00:00, 153.88it/s]
[30 / 300] Train: Loss = 3.20728, PPX = 24.71:   4%|▎         | 25/677 [00:00<00:05, 126.13it/s]

Optimizer lr = 7.10543e-14

янелюблюваспостоянно\nненадобытьбыладавно\nатакжедниивовсене\nнеплачь\n</s>


[30 / 300] Train: Loss = 3.40967, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 152.60it/s]
[30 / 300]   Val: Loss = 4.08154, PPX = 59.24: 100%|██████████| 19/19 [00:00<00:00, 155.30it/s]
[31 / 300] Train: Loss = 3.35783, PPX = 28.73:   4%|▎         | 25/677 [00:00<00:05, 124.68it/s]

Optimizer lr = 1.77636e-14

когдаявнашколхозприехал\nвнутрименябольшойипуст\nауменянаэтотслучай\nвночь\n</s>


[31 / 300] Train: Loss = 3.40941, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 161.12it/s]
[31 / 300]   Val: Loss = 4.08075, PPX = 59.19: 100%|██████████| 19/19 [00:00<00:00, 166.80it/s]
[32 / 300] Train: Loss = 3.30739, PPX = 27.31:   4%|▎         | 25/677 [00:00<00:05, 128.46it/s]

Optimizer lr = 4.44089e-15

зухрасбукетомнаработу\nивэтомнетсовсехсторон\nнотыирыбувэтомтапки\nвосмысле\n</s>


[32 / 300] Train: Loss = 3.40972, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 157.96it/s]
[32 / 300]   Val: Loss = 4.08270, PPX = 59.31: 100%|██████████| 19/19 [00:00<00:00, 157.56it/s]
[33 / 300] Train: Loss = 3.41039, PPX = 30.28:   3%|▎         | 23/677 [00:00<00:05, 119.65it/s]

Optimizer lr = 1.11022e-15

враювидалмывсемывместе\nянемогупонятьвамгод\nонможетбытьинехватает\nвпакет\n</s>


[33 / 300] Train: Loss = 3.40963, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 157.85it/s]
[33 / 300]   Val: Loss = 4.08207, PPX = 59.27: 100%|██████████| 19/19 [00:00<00:00, 164.84it/s]
[34 / 300] Train: Loss = 3.35462, PPX = 28.63:   4%|▎         | 25/677 [00:00<00:04, 132.56it/s]

Optimizer lr = 2.77556e-16

начтотожуткотамприснилось\nитымнеговоришькакзнать\nамыстобойвглазахнистранно\nнивчём\n</s>


[34 / 300] Train: Loss = 3.40975, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 157.10it/s]
[34 / 300]   Val: Loss = 4.08204, PPX = 59.27: 100%|██████████| 19/19 [00:00<00:00, 152.70it/s]
[35 / 300] Train: Loss = 3.26702, PPX = 26.23:   4%|▎         | 24/677 [00:00<00:05, 126.85it/s]

Optimizer lr = 6.93889e-17

нучтожтызнаешьоприроде\nчтовэтомбылсовсемнерад\nионвсебяилишьпожизни\nвконе\n</s>


[35 / 300] Train: Loss = 3.40968, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 154.48it/s]
[35 / 300]   Val: Loss = 4.08195, PPX = 59.26: 100%|██████████| 19/19 [00:00<00:00, 156.75it/s]
[36 / 300] Train: Loss = 3.29997, PPX = 27.11:   3%|▎         | 23/677 [00:00<00:05, 126.03it/s]

Optimizer lr = 1.73472e-17

неточтобоченьчастовжизни\nнепотомучтоянебыл\nтыбснейлитывтеплеивбелом\nнея\n</s>


[36 / 300] Train: Loss = 3.40954, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 155.26it/s]
[36 / 300]   Val: Loss = 4.08176, PPX = 59.25: 100%|██████████| 19/19 [00:00<00:00, 157.49it/s]
[37 / 300] Train: Loss = 3.37480, PPX = 29.22:   4%|▎         | 24/677 [00:00<00:05, 124.85it/s]

Optimizer lr = 4.33681e-18

накухнескемтостранно\nивдомедажекнимб\nлишьяженанеёнотоже\nнераз\n</s>


[37 / 300] Train: Loss = 3.40964, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 154.61it/s]
[37 / 300]   Val: Loss = 4.08271, PPX = 59.31: 100%|██████████| 19/19 [00:00<00:00, 160.36it/s]
[38 / 300] Train: Loss = 3.32142, PPX = 27.70:   4%|▎         | 25/677 [00:00<00:04, 132.43it/s]

Optimizer lr = 1.0842e-18

влесунадняхолегсцветами\nнанеберекииротвпальто\nнанольияиягдесоздал\nвквадрат\n</s>


[38 / 300] Train: Loss = 3.40947, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 161.86it/s]
[38 / 300]   Val: Loss = 4.08255, PPX = 59.30: 100%|██████████| 19/19 [00:00<00:00, 153.77it/s]
[39 / 300] Train: Loss = 3.57475, PPX = 35.69:   3%|▎         | 23/677 [00:00<00:05, 125.48it/s]

Optimizer lr = 2.71051e-19

мывсебнарынкетакнаместе\nчтонавойненетакужплох\nматьлишьнастолекнейскриком\nивпуть\n</s>


[39 / 300] Train: Loss = 3.40972, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 154.06it/s]
[39 / 300]   Val: Loss = 4.08146, PPX = 59.23: 100%|██████████| 19/19 [00:00<00:00, 157.10it/s]
[40 / 300] Train: Loss = 3.49006, PPX = 32.79:   4%|▎         | 24/677 [00:00<00:05, 126.66it/s]

Optimizer lr = 6.77626e-20

авотивсёжечтотоестьли\nненадобылоинето\nчтожтытакэтовсёкаквжизни\nспетром\n</s>


[40 / 300] Train: Loss = 3.40955, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 154.95it/s]
[40 / 300]   Val: Loss = 4.08288, PPX = 59.32: 100%|██████████| 19/19 [00:00<00:00, 156.45it/s]
[41 / 300] Train: Loss = 3.49503, PPX = 32.95:   3%|▎         | 23/677 [00:00<00:05, 117.57it/s]

Optimizer lr = 1.69407e-20

ясдетстваваснепонимаю\nновотивсёневнейнивчём\nнивнихниестьнипосленина\nавчём\n</s>


[41 / 300] Train: Loss = 3.40968, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 154.88it/s]
[41 / 300]   Val: Loss = 4.08202, PPX = 59.27: 100%|██████████| 19/19 [00:00<00:00, 152.15it/s]
[42 / 300] Train: Loss = 3.36896, PPX = 29.05:   4%|▎         | 24/677 [00:00<00:05, 123.24it/s]

Optimizer lr = 4.23516e-21

воттыбнезнаешьчтослучилось\nиснамиялежитвпальто\nивдругвглазахивуховпарке\nвгостях\n</s>


[42 / 300] Train: Loss = 3.40981, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 154.56it/s]
[42 / 300]   Val: Loss = 4.08198, PPX = 59.26: 100%|██████████| 19/19 [00:00<00:00, 164.46it/s]
[43 / 300] Train: Loss = 3.41845, PPX = 30.52:   4%|▎         | 25/677 [00:00<00:04, 133.00it/s]

Optimizer lr = 1.05879e-21

зухрасогромнымивпостели\nзаходитвкартынастоле\nпоканетенетолькоане\nкаквсе\n</s>


[43 / 300] Train: Loss = 3.40972, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 159.38it/s]
[43 / 300]   Val: Loss = 4.08207, PPX = 59.27: 100%|██████████| 19/19 [00:00<00:00, 162.07it/s]
[44 / 300] Train: Loss = 3.33099, PPX = 27.97:   4%|▎         | 25/677 [00:00<00:05, 129.47it/s]

Optimizer lr = 2.64698e-22

кнамизкустовоксаны\nивужасеконецбокал\nитутжевдомевсёбезводки\nвокно\n</s>


[44 / 300] Train: Loss = 3.40976, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 159.14it/s]
[44 / 300]   Val: Loss = 4.08250, PPX = 59.29: 100%|██████████| 19/19 [00:00<00:00, 157.37it/s]
[45 / 300] Train: Loss = 3.42746, PPX = 30.80:   3%|▎         | 22/677 [00:00<00:06, 108.87it/s]

Optimizer lr = 6.61744e-23

воттакябьюпрочудомама\nпотомещёводинлицо\nвокновглазахменяисердце\nитут\n</s>


[45 / 300] Train: Loss = 3.40988, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 152.65it/s]
[45 / 300]   Val: Loss = 4.08156, PPX = 59.24: 100%|██████████| 19/19 [00:00<00:00, 154.28it/s]
[46 / 300] Train: Loss = 3.52913, PPX = 34.09:   3%|▎         | 23/677 [00:00<00:05, 125.50it/s]

Optimizer lr = 1.65436e-23

агдетовпрошломзатобою\nвоттаквотэтувотгусь\nинатаблоиванвсемтелом\nичорт\n</s>


[46 / 300] Train: Loss = 3.40964, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 153.78it/s]
[46 / 300]   Val: Loss = 4.08178, PPX = 59.25: 100%|██████████| 19/19 [00:00<00:00, 158.36it/s]
[47 / 300] Train: Loss = 3.33764, PPX = 28.15:   4%|▎         | 25/677 [00:00<00:05, 127.69it/s]

Optimizer lr = 4.1359e-24

однаждывпещеренепомню\nктомучтовэтотгоддругой\nитольковцеломнехватает\nвруке\n</s>


[47 / 300] Train: Loss = 3.40963, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 152.82it/s]
[47 / 300]   Val: Loss = 4.08397, PPX = 59.38: 100%|██████████| 19/19 [00:00<00:00, 153.18it/s]
[48 / 300] Train: Loss = 3.54482, PPX = 34.63:   4%|▎         | 25/677 [00:00<00:05, 126.43it/s]

Optimizer lr = 1.03398e-24

янемогутебяродная\nзачтоиянерадносекс\nаэтопростоговоритьв\nяне\n</s>


[48 / 300] Train: Loss = 3.40935, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 155.15it/s]
[48 / 300]   Val: Loss = 4.08227, PPX = 59.28: 100%|██████████| 19/19 [00:00<00:00, 157.45it/s]
[49 / 300] Train: Loss = 3.44610, PPX = 31.38:   3%|▎         | 22/677 [00:00<00:05, 117.72it/s]

Optimizer lr = 2.58494e-25

неждётмыбудеммыхотели\nянасебявтомсутьнедам\nатоскемвынетакужплохо\nатыб\n</s>


[49 / 300] Train: Loss = 3.40966, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 155.45it/s]
[49 / 300]   Val: Loss = 4.08221, PPX = 59.28: 100%|██████████| 19/19 [00:00<00:00, 152.37it/s]
[50 / 300] Train: Loss = 3.51529, PPX = 33.63:   3%|▎         | 23/677 [00:00<00:05, 126.86it/s]

Optimizer lr = 6.46235e-26

втишиженывнииивпарке\nиговоритнучтожтыж\nноонвлесуиснимидаже\nвплену\n</s>


[50 / 300] Train: Loss = 3.40990, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 153.35it/s]
[50 / 300]   Val: Loss = 4.08180, PPX = 59.25: 100%|██████████| 19/19 [00:00<00:00, 150.67it/s]
[51 / 300] Train: Loss = 3.58484, PPX = 36.05:   4%|▎         | 24/677 [00:00<00:05, 125.92it/s]

Optimizer lr = 1.61559e-26

олегкричитмнеснитсягород\nитольковтишинеполос\nянемогутебесегодня\nния\n</s>


[51 / 300] Train: Loss = 3.40952, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 155.19it/s]
[51 / 300]   Val: Loss = 4.08069, PPX = 59.19: 100%|██████████| 19/19 [00:00<00:00, 156.19it/s]
[52 / 300] Train: Loss = 3.43613, PPX = 31.07:   4%|▎         | 25/677 [00:00<00:05, 127.56it/s]

Optimizer lr = 4.03897e-27

авынебылотакаркадий\nвсеэтисмотрятналуну\nииззагорсработымордой\nнадне\n</s>


[52 / 300] Train: Loss = 3.40968, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 155.57it/s]
[52 / 300]   Val: Loss = 4.08303, PPX = 59.32: 100%|██████████| 19/19 [00:00<00:00, 168.26it/s]
[53 / 300] Train: Loss = 3.42038, PPX = 30.58:   4%|▎         | 24/677 [00:00<00:05, 124.14it/s]

Optimizer lr = 1.00974e-27

янемогуяэтобольше\nнеоченьбылоинето\nивтомянебылбольшениза\nния\n</s>


[53 / 300] Train: Loss = 3.40939, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 155.90it/s]
[53 / 300]   Val: Loss = 4.08323, PPX = 59.34: 100%|██████████| 19/19 [00:00<00:00, 157.60it/s]
[54 / 300] Train: Loss = 3.35100, PPX = 28.53:   4%|▎         | 24/677 [00:00<00:05, 124.62it/s]

Optimizer lr = 2.52435e-28

вглазахуженщинытакая\nвконтактеянетакужплох\nномненедалиноначтото\nвглаза\n</s>


[54 / 300] Train: Loss = 3.40983, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 163.02it/s]
[54 / 300]   Val: Loss = 4.08189, PPX = 59.26: 100%|██████████| 19/19 [00:00<00:00, 167.14it/s]
[55 / 300] Train: Loss = 3.29045, PPX = 26.85:   4%|▍         | 26/677 [00:00<00:04, 133.88it/s]

Optimizer lr = 6.31089e-29

вконцеконцовмынезаметил\nинеодинниктосума\nавчёмтобылоднапотом\nкакты\n</s>


[55 / 300] Train: Loss = 3.40987, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 164.70it/s]
[55 / 300]   Val: Loss = 4.08234, PPX = 59.28: 100%|██████████| 19/19 [00:00<00:00, 164.52it/s]
[56 / 300] Train: Loss = 3.20253, PPX = 24.59:   4%|▍         | 26/677 [00:00<00:04, 134.86it/s]

Optimizer lr = 1.57772e-29

вмоихглазахкогдатымнебы\nнесталлинонезаспиной\nябудусталеёнемного\nдоста\n</s>


[56 / 300] Train: Loss = 3.40976, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 161.09it/s]
[56 / 300]   Val: Loss = 4.08312, PPX = 59.33: 100%|██████████| 19/19 [00:00<00:00, 166.69it/s]
[57 / 300] Train: Loss = 3.56507, PPX = 35.34:   4%|▎         | 24/677 [00:00<00:05, 119.37it/s]

Optimizer lr = 3.9443e-30

ясдетстваднёмкаквдетствевреку\nивэтомбыловсёине\nкакбудтобынепопадает\nнаты\n</s>


[57 / 300] Train: Loss = 3.40970, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 157.42it/s]
[57 / 300]   Val: Loss = 4.08320, PPX = 59.33: 100%|██████████| 19/19 [00:00<00:00, 153.03it/s]
[58 / 300] Train: Loss = 3.47298, PPX = 32.23:   4%|▎         | 24/677 [00:00<00:05, 129.25it/s]

Optimizer lr = 9.86076e-31

яждалтебявтебеработу\nитутжеясовсемнерад\nнувотисделаймненадежда\nваду\n</s>


[58 / 300] Train: Loss = 3.40986, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 161.66it/s]
[58 / 300]   Val: Loss = 4.08146, PPX = 59.23: 100%|██████████| 19/19 [00:00<00:00, 150.24it/s]
[59 / 300] Train: Loss = 3.41803, PPX = 30.51:   3%|▎         | 23/677 [00:00<00:05, 122.71it/s]

Optimizer lr = 2.46519e-31

янемогупонятьаркадий\nнеточтояужедомой\nяпростотакжемогизагроши\nваду\n</s>


[59 / 300] Train: Loss = 3.40971, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 150.63it/s]
[59 / 300]   Val: Loss = 4.08140, PPX = 59.23: 100%|██████████| 19/19 [00:00<00:00, 151.05it/s]
[60 / 300] Train: Loss = 3.44938, PPX = 31.48:   4%|▎         | 24/677 [00:00<00:05, 119.66it/s]

Optimizer lr = 6.16298e-32

явдетствебылнеходит\nчтовтишинеиложь\nтытакдавнобынехотелось\nавдруг\n</s>


[60 / 300] Train: Loss = 3.40979, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 155.14it/s]
[60 / 300]   Val: Loss = 4.08183, PPX = 59.25: 100%|██████████| 19/19 [00:00<00:00, 157.89it/s]
[61 / 300] Train: Loss = 3.41357, PPX = 30.37:   3%|▎         | 23/677 [00:00<00:05, 124.06it/s]

Optimizer lr = 1.54074e-32

янемогутеберодная\nавытакоемнеббезслов\nнеяльнезналчтоввидучтовнём\nивпах\n</s>


[61 / 300] Train: Loss = 3.40955, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 159.10it/s]
[61 / 300]   Val: Loss = 4.08216, PPX = 59.27: 100%|██████████| 19/19 [00:00<00:00, 170.15it/s]
[62 / 300] Train: Loss = 3.36505, PPX = 28.94:   4%|▎         | 25/677 [00:00<00:04, 132.27it/s]

Optimizer lr = 3.85186e-33

втуночькогдатымнеприснился\nшепнулмнедокторнавчера\nаоннехочетхотьсцветами\nавглаз\n</s>


[62 / 300] Train: Loss = 3.40978, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 156.59it/s]
[62 / 300]   Val: Loss = 4.08144, PPX = 59.23: 100%|██████████| 19/19 [00:00<00:00, 156.21it/s]
[63 / 300] Train: Loss = 3.32658, PPX = 27.84:   4%|▎         | 24/677 [00:00<00:05, 127.46it/s]

Optimizer lr = 9.62965e-34

явдетствебылидукночи\nивнихпопрежнемукакцой\nнаэтомнадобылобыло\nвтвери\n</s>


[63 / 300] Train: Loss = 3.40972, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 155.94it/s]
[63 / 300]   Val: Loss = 4.08230, PPX = 59.28: 100%|██████████| 19/19 [00:00<00:00, 157.95it/s]
[64 / 300] Train: Loss = 3.41238, PPX = 30.34:   4%|▎         | 24/677 [00:00<00:05, 126.21it/s]

Optimizer lr = 2.40741e-34

наберегупятойпоколено\nивновьнаголовуминут\nивдругнатрёхпроцентовкрикнуть\nнея\n</s>


[64 / 300] Train: Loss = 3.40991, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 156.07it/s]
[64 / 300]   Val: Loss = 4.08105, PPX = 59.21: 100%|██████████| 19/19 [00:00<00:00, 154.51it/s]
[65 / 300] Train: Loss = 3.43528, PPX = 31.04:   4%|▎         | 24/677 [00:00<00:05, 126.63it/s]

Optimizer lr = 6.01853e-35

янехочубытьвжизнибольше\nянемогусказатьвамплох\nнежальчтовэтомнеосталось\nнимест\n</s>


[65 / 300] Train: Loss = 3.40980, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 156.34it/s]
[65 / 300]   Val: Loss = 4.08214, PPX = 59.27: 100%|██████████| 19/19 [00:00<00:00, 154.46it/s]
[66 / 300] Train: Loss = 3.48496, PPX = 32.62:   4%|▎         | 24/677 [00:00<00:05, 126.00it/s]

Optimizer lr = 1.50463e-35

яждалвасраднонезаметил\nинемогусказатьзнакдом\nаянепротивнонебудет\nссобой\n</s>


[66 / 300] Train: Loss = 3.40973, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 163.88it/s]
[66 / 300]   Val: Loss = 4.08266, PPX = 59.30: 100%|██████████| 19/19 [00:00<00:00, 168.37it/s]
[67 / 300] Train: Loss = 3.44323, PPX = 31.29:   4%|▍         | 26/677 [00:00<00:04, 134.78it/s]

Optimizer lr = 3.76158e-36

янехочубытьподругому\nвчёмможнодажевдоменет\nибылнемногонемешает\nссобой\n</s>


[67 / 300] Train: Loss = 3.40956, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 151.61it/s]
[67 / 300]   Val: Loss = 4.08196, PPX = 59.26: 100%|██████████| 19/19 [00:00<00:00, 153.46it/s]
[68 / 300] Train: Loss = 3.33807, PPX = 28.16:   4%|▎         | 24/677 [00:00<00:05, 123.72it/s]

Optimizer lr = 9.40395e-37

явваслюблютебяжениться\nияинемогувина\nамневответещенебудет\nнораз\n</s>


[68 / 300] Train: Loss = 3.40973, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 154.98it/s]
[68 / 300]   Val: Loss = 4.08223, PPX = 59.28: 100%|██████████| 19/19 [00:00<00:00, 165.60it/s]
[69 / 300] Train: Loss = 3.24697, PPX = 25.71:   4%|▎         | 24/677 [00:00<00:05, 125.39it/s]

Optimizer lr = 2.35099e-37

втритридцатьстосемьлетненадо\nябылначтотовсянесу\nидажесразувиднобыло\nвсебя\n</s>


[69 / 300] Train: Loss = 3.40984, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 158.49it/s]
[69 / 300]   Val: Loss = 4.08214, PPX = 59.27: 100%|██████████| 19/19 [00:00<00:00, 156.48it/s]
[70 / 300] Train: Loss = 3.28977, PPX = 26.84:   4%|▎         | 24/677 [00:00<00:05, 125.79it/s]

Optimizer lr = 5.87747e-38

ябылнаразнавернодура\nтакиеэтотмысльнета\nинехватаетзаворота\nдодна\n</s>


[70 / 300] Train: Loss = 3.40993, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 155.96it/s]
[70 / 300]   Val: Loss = 4.08162, PPX = 59.24: 100%|██████████| 19/19 [00:00<00:00, 156.64it/s]
[71 / 300] Train: Loss = 3.41499, PPX = 30.42:   3%|▎         | 23/677 [00:00<00:05, 127.32it/s]

Optimizer lr = 1.46937e-38

янелюбилаяприехал\nиневадуянемила\nяжневтотпервойуоксаны\nивнос\n</s>


[71 / 300] Train: Loss = 3.40949, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 155.41it/s]
[71 / 300]   Val: Loss = 4.08205, PPX = 59.27: 100%|██████████| 19/19 [00:00<00:00, 154.27it/s]
[72 / 300] Train: Loss = 3.49349, PPX = 32.90:   4%|▎         | 24/677 [00:00<00:05, 122.93it/s]

Optimizer lr = 3.67342e-39

меняназваляверю\nразвчаспикеёнедели\nивоттеперьнепишетпитер\nсоси\n</s>


[72 / 300] Train: Loss = 3.40941, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 154.11it/s]
[72 / 300]   Val: Loss = 4.08167, PPX = 59.24: 100%|██████████| 19/19 [00:00<00:00, 155.22it/s]
[73 / 300] Train: Loss = 3.32227, PPX = 27.72:   4%|▎         | 24/677 [00:00<00:05, 127.51it/s]

Optimizer lr = 9.18355e-40

яухожуиззакровати\nикакдавайнанейвопрос\nивэтомместебылещёи\nдодна\n</s>


[73 / 300] Train: Loss = 3.40969, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 154.65it/s]
[73 / 300]   Val: Loss = 4.08083, PPX = 59.19: 100%|██████████| 19/19 [00:00<00:00, 149.80it/s]
[74 / 300] Train: Loss = 3.45685, PPX = 31.72:   3%|▎         | 23/677 [00:00<00:05, 119.41it/s]

Optimizer lr = 2.29589e-40

всвязисутранакухнефото\nитамнаголовевижузал\nаялюблютебяродная\nая\n</s>


[74 / 300] Train: Loss = 3.40990, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 153.91it/s]
[74 / 300]   Val: Loss = 4.08296, PPX = 59.32: 100%|██████████| 19/19 [00:00<00:00, 155.94it/s]
[75 / 300] Train: Loss = 3.34296, PPX = 28.30:   4%|▎         | 24/677 [00:00<00:05, 123.76it/s]

Optimizer lr = 5.73972e-41

вчетвёртыйразвмоейпостели\nвобъятьяхнетиниодной\nачтоподелатьесличестно\nтыза\n</s>


[75 / 300] Train: Loss = 3.40953, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 154.22it/s]
[75 / 300]   Val: Loss = 4.08292, PPX = 59.32: 100%|██████████| 19/19 [00:00<00:00, 153.57it/s]
[76 / 300] Train: Loss = 3.25275, PPX = 25.86:   4%|▎         | 24/677 [00:00<00:05, 126.13it/s]

Optimizer lr = 1.43493e-41

аянезнаешьчтоналето\nязастоломсказалсврачу\nаяидуипопривычке\nирад\n</s>


[76 / 300] Train: Loss = 3.40970, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 154.96it/s]
[76 / 300]   Val: Loss = 4.08123, PPX = 59.22: 100%|██████████| 19/19 [00:00<00:00, 152.63it/s]
[77 / 300] Train: Loss = 3.40946, PPX = 30.25:   4%|▎         | 25/677 [00:00<00:05, 127.42it/s]

Optimizer lr = 3.58732e-42

явюностистобоюкаждый\nибылсогласеннастоле\nнокактовсамомделевсё\nруккх\n</s>


[77 / 300] Train: Loss = 3.40969, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 160.08it/s]
[77 / 300]   Val: Loss = 4.08259, PPX = 59.30: 100%|██████████| 19/19 [00:00<00:00, 161.07it/s]
[78 / 300] Train: Loss = 3.43783, PPX = 31.12:   3%|▎         | 23/677 [00:00<00:05, 117.99it/s]

Optimizer lr = 8.96831e-43

агдетовтойтовверхтовверхто\nчтовдетствежизниинирук\nтуттолькоскаждымгодомдесять\nсконём\n</s>


[78 / 300] Train: Loss = 3.40988, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 161.87it/s]
[78 / 300]   Val: Loss = 4.08197, PPX = 59.26: 100%|██████████| 19/19 [00:00<00:00, 167.75it/s]
[79 / 300] Train: Loss = 3.28956, PPX = 26.83:   3%|▎         | 23/677 [00:00<00:05, 116.92it/s]

Optimizer lr = 2.24208e-43

нагрудьукларыссобою\nяненавижулетизрук\nаявответвконцетоннеля\nнета\n</s>


[79 / 300] Train: Loss = 3.40969, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 155.56it/s]
[79 / 300]   Val: Loss = 4.08135, PPX = 59.23: 100%|██████████| 19/19 [00:00<00:00, 154.16it/s]
[80 / 300] Train: Loss = 3.58333, PPX = 35.99:   4%|▎         | 24/677 [00:00<00:05, 124.71it/s]

Optimizer lr = 5.60519e-44

однаждывдомсемьтысяч\nномнененадобытькакбог\nвытакмнененужныаможет\nивас\n</s>


[80 / 300] Train: Loss = 3.40972, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 153.82it/s]
[80 / 300]   Val: Loss = 4.08292, PPX = 59.32: 100%|██████████| 19/19 [00:00<00:00, 151.42it/s]
[81 / 300] Train: Loss = 3.36918, PPX = 29.05:   4%|▎         | 25/677 [00:00<00:05, 127.01it/s]

Optimizer lr = 1.4013e-44

янелюблювасзагоды\nиэтовсёравностихи\nиявесьманепонимаю\nия\n</s>


[81 / 300] Train: Loss = 3.40972, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 153.48it/s]
[81 / 300]   Val: Loss = 4.08177, PPX = 59.25: 100%|██████████| 19/19 [00:00<00:00, 154.61it/s]
[82 / 300] Train: Loss = 3.56191, PPX = 35.23:   3%|▎         | 23/677 [00:00<00:05, 117.96it/s]

Optimizer lr = 3.50325e-45

янелюблюпричинвокошко\nсвоймирнезналчтозаспиной\nнетакужбольшенехватает\nсутра\n</s>


[82 / 300] Train: Loss = 3.40953, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 153.30it/s]
[82 / 300]   Val: Loss = 4.08156, PPX = 59.24: 100%|██████████| 19/19 [00:00<00:00, 156.48it/s]
[83 / 300] Train: Loss = 3.45089, PPX = 31.53:   4%|▎         | 25/677 [00:00<00:05, 125.80it/s]

Optimizer lr = 8.75812e-46

явдетствебылсовсемнексчастью\nавотужеиполицу\nдаипопрежнемунелюбишь\nсконца\n</s>


[83 / 300] Train: Loss = 3.40972, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 157.52it/s]
[83 / 300]   Val: Loss = 4.08169, PPX = 59.25: 100%|██████████| 19/19 [00:00<00:00, 151.81it/s]
[84 / 300] Train: Loss = 3.40525, PPX = 30.12:   4%|▎         | 24/677 [00:00<00:05, 127.88it/s]

Optimizer lr = 2.18953e-46

всемьеунаспроблемыхьюстон\nитутоднанамнеодна\nатотжекактостранновреку\nнавсе\n</s>


[84 / 300] Train: Loss = 3.40989, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 153.07it/s]
[84 / 300]   Val: Loss = 4.08209, PPX = 59.27: 100%|██████████| 19/19 [00:00<00:00, 155.67it/s]
[85 / 300] Train: Loss = 3.41412, PPX = 30.39:   4%|▎         | 24/677 [00:00<00:05, 129.60it/s]

Optimizer lr = 5.47382e-47

насклонелетнадвадцатьслучай\nивнёмбылбудетхорошо\nвдругбаципрямопопривычке\nнаюг\n</s>


[85 / 300] Train: Loss = 3.40965, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 154.50it/s]
[85 / 300]   Val: Loss = 4.08177, PPX = 59.25: 100%|██████████| 19/19 [00:00<00:00, 153.17it/s]
[86 / 300] Train: Loss = 3.34367, PPX = 28.32:   3%|▎         | 23/677 [00:00<00:05, 125.58it/s]

Optimizer lr = 1.36846e-47

вкоторыйразтебявсёлюди\nсказалнамсэтимнеидёт\nатымойангелвбеломскриком\nвгробу\n</s>


[86 / 300] Train: Loss = 3.41012, PPX = 30.27: 100%|██████████| 677/677 [00:04<00:00, 160.83it/s]
[86 / 300]   Val: Loss = 4.08282, PPX = 59.31: 100%|██████████| 19/19 [00:00<00:00, 162.08it/s]
[87 / 300] Train: Loss = 3.33823, PPX = 28.17:   4%|▎         | 25/677 [00:00<00:05, 125.01it/s]

Optimizer lr = 3.42114e-48

оксанавтёмномпаркелето\nивсердцедажелишьворжи\nинаполднярожденья\nвговно\n</s>


[87 / 300] Train: Loss = 3.40962, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 157.23it/s]
[87 / 300]   Val: Loss = 4.08232, PPX = 59.28: 100%|██████████| 19/19 [00:00<00:00, 153.50it/s]
[88 / 300] Train: Loss = 3.48585, PPX = 32.65:   4%|▎         | 24/677 [00:00<00:05, 124.67it/s]

Optimizer lr = 8.55285e-49

япервыйзапахиззадверью\nтакинесмогбынетая\nмненужнобылочтостобою\nвпесок\n</s>


[88 / 300] Train: Loss = 3.40975, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 154.55it/s]
[88 / 300]   Val: Loss = 4.08404, PPX = 59.38: 100%|██████████| 19/19 [00:00<00:00, 153.96it/s]
[89 / 300] Train: Loss = 3.53081, PPX = 34.15:   4%|▎         | 24/677 [00:00<00:05, 124.83it/s]

Optimizer lr = 2.13821e-49

невсилахвамянезачто\nтамсдетстванебылотак\nананегонебудетпозже\nнемой\n</s>


[89 / 300] Train: Loss = 3.40965, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 155.08it/s]
[89 / 300]   Val: Loss = 4.08288, PPX = 59.32: 100%|██████████| 19/19 [00:00<00:00, 149.51it/s]
[90 / 300] Train: Loss = 3.53090, PPX = 34.15:   4%|▎         | 24/677 [00:00<00:05, 122.11it/s]

Optimizer lr = 5.34553e-50

унасвдеревневсёслучилось\nнавсеженщиныион\nаязанеюнехватает\nимозг\n</s>


[90 / 300] Train: Loss = 3.40970, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 153.49it/s]
[90 / 300]   Val: Loss = 4.08284, PPX = 59.31: 100%|██████████| 19/19 [00:00<00:00, 150.65it/s]
[91 / 300] Train: Loss = 3.35290, PPX = 28.59:   3%|▎         | 23/677 [00:00<00:05, 115.80it/s]

Optimizer lr = 1.33638e-50

ясдетствавижувшколу\nтаксразухорошо\nамытонехотелбы\nикак\n</s>


[91 / 300] Train: Loss = 3.40967, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 156.23it/s]
[91 / 300]   Val: Loss = 4.08236, PPX = 59.29: 100%|██████████| 19/19 [00:00<00:00, 170.79it/s]
[92 / 300] Train: Loss = 3.26795, PPX = 26.26:   4%|▎         | 25/677 [00:00<00:05, 127.70it/s]

Optimizer lr = 3.34096e-51

намойвопросбылестьпроблема\nивэтомсамомделелайков\nинеочемянежелаю\nнидруг\n</s>


[92 / 300] Train: Loss = 3.40959, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 163.62it/s]
[92 / 300]   Val: Loss = 4.08261, PPX = 59.30: 100%|██████████| 19/19 [00:00<00:00, 161.21it/s]
[93 / 300] Train: Loss = 3.41705, PPX = 30.48:   4%|▎         | 24/677 [00:00<00:05, 123.61it/s]

Optimizer lr = 8.35239e-52

тымневсемьеневиделя\nавымнедвадцатьлеткакбэ\nкактысгодамидалдовстречи\nнаюг\n</s>


[93 / 300] Train: Loss = 3.40955, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 158.46it/s]
[93 / 300]   Val: Loss = 4.08255, PPX = 59.30: 100%|██████████| 19/19 [00:00<00:00, 159.34it/s]
[94 / 300] Train: Loss = 3.43162, PPX = 30.93:   4%|▎         | 24/677 [00:00<00:05, 125.98it/s]

Optimizer lr = 2.0881e-52

ненадомнеянехудожник\nнеможетбылобневчом\nтыбзалюбовьменянетрожь\nнеты\n</s>


[94 / 300] Train: Loss = 3.40949, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 155.69it/s]
[94 / 300]   Val: Loss = 4.08168, PPX = 59.25: 100%|██████████| 19/19 [00:00<00:00, 157.14it/s]
[95 / 300] Train: Loss = 3.43755, PPX = 31.11:   4%|▎         | 24/677 [00:00<00:05, 125.72it/s]

Optimizer lr = 5.22024e-53

наместеуневестыречки\nнаполисголовыдоног\nидажевпаркенеосталось\nнизги\n</s>


[95 / 300] Train: Loss = 3.40988, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 153.68it/s]
[95 / 300]   Val: Loss = 4.08151, PPX = 59.23: 100%|██████████| 19/19 [00:00<00:00, 158.50it/s]
[96 / 300] Train: Loss = 3.33970, PPX = 28.21:   4%|▎         | 24/677 [00:00<00:05, 125.64it/s]

Optimizer lr = 1.30506e-53

играявспальнюнепроходит\nихрустичестьиречь\nивэтомяинесогласен\nнета\n</s>


[96 / 300] Train: Loss = 3.40976, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 150.41it/s]
[96 / 300]   Val: Loss = 4.08242, PPX = 59.29: 100%|██████████| 19/19 [00:00<00:00, 161.02it/s]
[97 / 300] Train: Loss = 3.40908, PPX = 30.24:   4%|▎         | 24/677 [00:00<00:05, 125.43it/s]

Optimizer lr = 3.26265e-54

вотвыбвпорядкебылневидно\nчтоякричуянемогу\nябвамдолженбытьзаэто\nпроцесс\n</s>


[97 / 300] Train: Loss = 3.40952, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 160.11it/s]
[97 / 300]   Val: Loss = 4.08115, PPX = 59.21: 100%|██████████| 19/19 [00:00<00:00, 166.82it/s]
[98 / 300] Train: Loss = 3.35991, PPX = 28.79:   4%|▎         | 25/677 [00:00<00:05, 127.35it/s]

Optimizer lr = 8.15663e-55

унасвдеревнедвепроцентов\nведьянесмогтеперьния\nатовраюуженезная\nивид\n</s>


[98 / 300] Train: Loss = 3.40968, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 154.19it/s]
[98 / 300]   Val: Loss = 4.08210, PPX = 59.27: 100%|██████████| 19/19 [00:00<00:00, 155.80it/s]
[99 / 300] Train: Loss = 3.42180, PPX = 30.62:   4%|▎         | 24/677 [00:00<00:05, 125.39it/s]

Optimizer lr = 2.03916e-55

нувотивсёсказалаольга\nаяпросилужедавно\nивэтомяегокакпрежде\nвуме\n</s>


[99 / 300] Train: Loss = 3.40955, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 153.30it/s]
[99 / 300]   Val: Loss = 4.08126, PPX = 59.22: 100%|██████████| 19/19 [00:00<00:00, 157.73it/s]
[100 / 300] Train: Loss = 3.39936, PPX = 29.94:   3%|▎         | 23/677 [00:00<00:05, 127.23it/s]

Optimizer lr = 5.09789e-56

мысдругомсчастьювваннусмороза\nкогдавночивплащесума\nиявтебяивсеневнёмне\nукасс\n</s>


[100 / 300] Train: Loss = 3.40956, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 155.44it/s]
[100 / 300]   Val: Loss = 4.08228, PPX = 59.28: 100%|██████████| 19/19 [00:00<00:00, 156.77it/s]
[101 / 300] Train: Loss = 3.39949, PPX = 29.95:   4%|▎         | 24/677 [00:00<00:05, 127.87it/s]

Optimizer lr = 1.27447e-56

когдаятотсказаларкадий\nивотужеитотжегод\nивнёмявижувсёивсёже\nнето\n</s>


[101 / 300] Train: Loss = 3.40959, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 155.12it/s]
[101 / 300]   Val: Loss = 4.08235, PPX = 59.28: 100%|██████████| 19/19 [00:00<00:00, 159.52it/s]
[102 / 300] Train: Loss = 3.43179, PPX = 30.93:   4%|▎         | 25/677 [00:00<00:04, 132.57it/s]

Optimizer lr = 3.18618e-57

янемогуявамрешили\nнотымнебудешьневполне\nмогусказатьнонехотелось\nакак\n</s>


[102 / 300] Train: Loss = 3.40953, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 161.38it/s]
[102 / 300]   Val: Loss = 4.08148, PPX = 59.23: 100%|██████████| 19/19 [00:00<00:00, 159.28it/s]
[103 / 300] Train: Loss = 3.50620, PPX = 33.32:   4%|▎         | 24/677 [00:00<00:05, 124.52it/s]

Optimizer lr = 7.96546e-58

олегсработынарыбалку\nиемидоутраану\nатытакойчтовсёнебудет\nвмужья\n</s>


[103 / 300] Train: Loss = 3.40956, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 156.48it/s]
[103 / 300]   Val: Loss = 4.08051, PPX = 59.18: 100%|██████████| 19/19 [00:00<00:00, 157.49it/s]
[104 / 300] Train: Loss = 3.44877, PPX = 31.46:   4%|▎         | 24/677 [00:00<00:05, 125.67it/s]

Optimizer lr = 1.99136e-58

какаяжетымнеприснилось\nимыстобойкакнизаней\nаяивовсененавстречу\nикак\n</s>


[104 / 300] Train: Loss = 3.40969, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 155.25it/s]
[104 / 300]   Val: Loss = 4.08082, PPX = 59.19: 100%|██████████| 19/19 [00:00<00:00, 154.08it/s]
[105 / 300] Train: Loss = 3.35920, PPX = 28.77:   4%|▎         | 25/677 [00:00<00:05, 126.73it/s]

Optimizer lr = 4.97841e-59

японимаювваснебуду\nатывсюжизньнедодыр\nкакразтебявсебенеможет\nкакзнать\n</s>


[105 / 300] Train: Loss = 3.40983, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 155.88it/s]
[105 / 300]   Val: Loss = 4.08272, PPX = 59.31: 100%|██████████| 19/19 [00:00<00:00, 157.89it/s]
[106 / 300] Train: Loss = 3.38419, PPX = 29.49:   3%|▎         | 22/677 [00:00<00:05, 114.63it/s]

Optimizer lr = 1.2446e-59

втотденькогдатымнесказали\nневсилахянетакужплох\nябневсюжизнькакнистранно\nневшутку\n</s>


[106 / 300] Train: Loss = 3.40953, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 156.25it/s]
[106 / 300]   Val: Loss = 4.08265, PPX = 59.30: 100%|██████████| 19/19 [00:00<00:00, 150.55it/s]
[107 / 300] Train: Loss = 3.36488, PPX = 28.93:   4%|▎         | 24/677 [00:00<00:05, 125.39it/s]

Optimizer lr = 3.11151e-60

янехочумогудетейя\nнотакионсовсемнерад\nявообщенепонимаю\nнонеожиданно\n</s>


[107 / 300] Train: Loss = 3.40970, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 160.17it/s]
[107 / 300]   Val: Loss = 4.08110, PPX = 59.21: 100%|██████████| 19/19 [00:00<00:00, 161.31it/s]
[108 / 300] Train: Loss = 3.44904, PPX = 31.47:   4%|▎         | 25/677 [00:00<00:05, 127.54it/s]

Optimizer lr = 7.77877e-61

втвоихглазахугробапоэты\nиначалянетакужпрост\nакакжевсётакитакое\nназла\n</s>


[108 / 300] Train: Loss = 3.40969, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 155.65it/s]
[108 / 300]   Val: Loss = 4.08189, PPX = 59.26: 100%|██████████| 19/19 [00:00<00:00, 156.13it/s]
[109 / 300] Train: Loss = 3.44559, PPX = 31.36:   4%|▎         | 24/677 [00:00<00:05, 124.86it/s]

Optimizer lr = 1.94469e-61

ввасбыловсёнаэтомсвете\nотомчтотынетакужплох\nимыстобойкакнистранно\nвниесть\n</s>


[109 / 300] Train: Loss = 3.40970, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 152.19it/s]
[109 / 300]   Val: Loss = 4.08149, PPX = 59.23: 100%|██████████| 19/19 [00:00<00:00, 154.58it/s]
[110 / 300] Train: Loss = 3.48768, PPX = 32.71:   4%|▎         | 25/677 [00:00<00:05, 124.37it/s]

Optimizer lr = 4.86173e-62

мывсеумрёмнапланете\nтыснимидоутра\nамненаскладенесцветами\nичто\n</s>


[110 / 300] Train: Loss = 3.40964, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 162.88it/s]
[110 / 300]   Val: Loss = 4.08184, PPX = 59.25: 100%|██████████| 19/19 [00:00<00:00, 167.66it/s]
[111 / 300] Train: Loss = 3.28258, PPX = 26.64:   4%|▎         | 24/677 [00:00<00:04, 135.13it/s]

Optimizer lr = 1.21543e-62

немноговбабполокотьнужно\nнеможетяневиноват\nяжнелюблюябудустану\nнеточтобянезаместа\nмынемогупонятьженубы\nнепьян\n</s>


[111 / 300] Train: Loss = 3.40972, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 164.99it/s]
[111 / 300]   Val: Loss = 4.08209, PPX = 59.27: 100%|██████████| 19/19 [00:00<00:00, 183.10it/s]
[112 / 300] Train: Loss = 3.40134, PPX = 30.00:   4%|▎         | 25/677 [00:00<00:04, 134.95it/s]

Optimizer lr = 3.03858e-63

изтехктотольковэтоммире\nстехпоркаквнебегусьдвабыл\nатывозьмименяобратно\nвнёму\n</s>


[112 / 300] Train: Loss = 3.40944, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 158.42it/s]
[112 / 300]   Val: Loss = 4.08098, PPX = 59.20: 100%|██████████| 19/19 [00:00<00:00, 153.11it/s]
[113 / 300] Train: Loss = 3.41354, PPX = 30.37:   3%|▎         | 23/677 [00:00<00:05, 116.87it/s]

Optimizer lr = 7.59645e-64

тынемоглибыябы\nнеточтобвынетакужплох\nнуладнодайбынонеслишком\nая\n</s>


[113 / 300] Train: Loss = 3.40969, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 154.39it/s]
[113 / 300]   Val: Loss = 4.08178, PPX = 59.25: 100%|██████████| 19/19 [00:00<00:00, 158.42it/s]
[114 / 300] Train: Loss = 3.42085, PPX = 30.60:   4%|▎         | 24/677 [00:00<00:05, 125.39it/s]

Optimizer lr = 1.89911e-64

неверюясказалгеннадий\nвынемоглибымненатреть\nияинежноключназавтрак\nсгубх\n</s>


[114 / 300] Train: Loss = 3.40978, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 153.86it/s]
[114 / 300]   Val: Loss = 4.08067, PPX = 59.19: 100%|██████████| 19/19 [00:00<00:00, 157.49it/s]
[115 / 300] Train: Loss = 3.37092, PPX = 29.11:   4%|▎         | 24/677 [00:00<00:05, 125.17it/s]

Optimizer lr = 4.74778e-65

яехалвротнамгновенье\nменявконцеконцовярад\nинемогутебеответить\nнея\n</s>


[115 / 300] Train: Loss = 3.40980, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 156.76it/s]
[115 / 300]   Val: Loss = 4.08248, PPX = 59.29: 100%|██████████| 19/19 [00:00<00:00, 164.74it/s]
[116 / 300] Train: Loss = 3.41295, PPX = 30.35:   4%|▎         | 25/677 [00:00<00:05, 129.51it/s]

Optimizer lr = 1.18695e-65

аянепотвоимкриков\nненужносделатьлишьвлюбви\nявнёмиянепростосекса\nнельзя\n</s>


[116 / 300] Train: Loss = 3.40966, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 156.61it/s]
[116 / 300]   Val: Loss = 4.08037, PPX = 59.17: 100%|██████████| 19/19 [00:00<00:00, 163.03it/s]
[117 / 300] Train: Loss = 3.47780, PPX = 32.39:   4%|▎         | 25/677 [00:00<00:05, 129.95it/s]

Optimizer lr = 2.96736e-66

унасопятьпроблемыхьюстон\nнепотомучтоячтокот\nичтотохочетсяневсилах\nния\n</s>


[117 / 300] Train: Loss = 3.40986, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 160.24it/s]
[117 / 300]   Val: Loss = 4.08089, PPX = 59.20: 100%|██████████| 19/19 [00:00<00:00, 150.84it/s]
[118 / 300] Train: Loss = 3.44207, PPX = 31.25:   3%|▎         | 23/677 [00:00<00:05, 119.50it/s]

Optimizer lr = 7.41841e-67

явдетствебылнемногомного\nинелюблюкуданивчём\nяпростотакхочутудаи\nнея\n</s>


[118 / 300] Train: Loss = 3.40977, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 155.57it/s]
[118 / 300]   Val: Loss = 4.08126, PPX = 59.22: 100%|██████████| 19/19 [00:00<00:00, 154.23it/s]
[119 / 300] Train: Loss = 3.45623, PPX = 31.70:   4%|▎         | 24/677 [00:00<00:05, 125.90it/s]

Optimizer lr = 1.8546e-67

тыпомнишькакнакухнеморе\nивпоискахлюдейвысокя\nауолегатольковкосмос\nнеон\n</s>


[119 / 300] Train: Loss = 3.40980, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 154.46it/s]
[119 / 300]   Val: Loss = 4.08131, PPX = 59.22: 100%|██████████| 19/19 [00:00<00:00, 150.69it/s]
[120 / 300] Train: Loss = 3.44765, PPX = 31.43:   4%|▎         | 25/677 [00:00<00:05, 125.74it/s]

Optimizer lr = 4.63651e-68

нанебесахминутнапляже\nиоченьжеоднапримат\nитутжесталопоквартире\nнаюг\n</s>


[120 / 300] Train: Loss = 3.40963, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 155.71it/s]
[120 / 300]   Val: Loss = 4.08183, PPX = 59.25: 100%|██████████| 19/19 [00:00<00:00, 156.04it/s]
[121 / 300] Train: Loss = 3.31941, PPX = 27.64:   4%|▎         | 25/677 [00:00<00:05, 127.14it/s]

Optimizer lr = 1.15913e-68

актоунастутсамыйумный\nоннемогупонятьвамнет\nавывответятакужлучше\nивпуть\n</s>


[121 / 300] Train: Loss = 3.40954, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 154.62it/s]
[121 / 300]   Val: Loss = 4.08232, PPX = 59.28: 100%|██████████| 19/19 [00:00<00:00, 157.79it/s]
[122 / 300] Train: Loss = 3.45886, PPX = 31.78:   4%|▎         | 24/677 [00:00<00:05, 126.95it/s]

Optimizer lr = 2.89782e-69

янехочучтобэтолетом\nтоятотамнетотоесть\nавсмыслеснимибниподвсяких\nнивив\n</s>


[122 / 300] Train: Loss = 3.40938, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 155.55it/s]
[122 / 300]   Val: Loss = 4.08264, PPX = 59.30: 100%|██████████| 19/19 [00:00<00:00, 151.93it/s]
[123 / 300] Train: Loss = 3.39536, PPX = 29.83:   4%|▎         | 25/677 [00:00<00:05, 129.24it/s]

Optimizer lr = 7.24454e-70

какмноговэтоммиремире\nянемогупонятьместа\nиялишьчайвнанемуглеба\nневся\n</s>


[123 / 300] Train: Loss = 3.40970, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 160.52it/s]
[123 / 300]   Val: Loss = 4.08218, PPX = 59.27: 100%|██████████| 19/19 [00:00<00:00, 168.27it/s]
[124 / 300] Train: Loss = 3.50226, PPX = 33.19:   4%|▍         | 26/677 [00:00<00:04, 134.27it/s]

Optimizer lr = 1.81114e-70

явдетствебылчутьневпостели\nинеумелнастоле\nштобзамнойянежелаю\nнеты\n</s>


[124 / 300] Train: Loss = 3.40972, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 151.37it/s]
[124 / 300]   Val: Loss = 4.08276, PPX = 59.31: 100%|██████████| 19/19 [00:00<00:00, 150.37it/s]
[125 / 300] Train: Loss = 3.34812, PPX = 28.45:   3%|▎         | 23/677 [00:00<00:05, 121.84it/s]

Optimizer lr = 4.52784e-71

аеслибянинежелаю\nневсилахоттогочтоб\nнелюбятнетуданебудет\nивплач\n</s>


[125 / 300] Train: Loss = 3.40991, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 153.17it/s]
[125 / 300]   Val: Loss = 4.08052, PPX = 59.18: 100%|██████████| 19/19 [00:00<00:00, 153.12it/s]
[126 / 300] Train: Loss = 3.44357, PPX = 31.30:   4%|▎         | 25/677 [00:00<00:05, 125.07it/s]

Optimizer lr = 1.13196e-71

какхорошовадуятолько\nчтовасотэтогодослёз\nаяхочутебясегодня\nия\n</s>


[126 / 300] Train: Loss = 3.40977, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 158.77it/s]
[126 / 300]   Val: Loss = 4.08213, PPX = 59.27: 100%|██████████| 19/19 [00:00<00:00, 156.41it/s]
[127 / 300] Train: Loss = 3.33420, PPX = 28.06:   3%|▎         | 23/677 [00:00<00:05, 119.76it/s]

Optimizer lr = 2.8299e-72

унаспопоясбродитутром\nпокругуистрехутра\nимневраюиграюнадо\nвглаза\n</s>


[127 / 300] Train: Loss = 3.40970, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 154.19it/s]
[127 / 300]   Val: Loss = 4.08166, PPX = 59.24: 100%|██████████| 19/19 [00:00<00:00, 166.71it/s]
[128 / 300] Train: Loss = 3.45834, PPX = 31.76:   4%|▎         | 24/677 [00:00<00:05, 129.12it/s]

Optimizer lr = 7.07475e-73

язалюбовьтебяневидел\nчтоможновтомчтомненебог\nаяхочубынехотелось\nния\n</s>


[128 / 300] Train: Loss = 3.40998, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 160.45it/s]
[128 / 300]   Val: Loss = 4.08225, PPX = 59.28: 100%|██████████| 19/19 [00:00<00:00, 161.84it/s]
[129 / 300] Train: Loss = 3.43120, PPX = 30.91:   4%|▎         | 25/677 [00:00<00:05, 127.44it/s]

Optimizer lr = 1.76869e-73

явдетстверазтебянебуду\nясамневерюбыдослёз\nауменяестьсполовиной\nсомной\n</s>


[129 / 300] Train: Loss = 3.40992, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 157.58it/s]
[129 / 300]   Val: Loss = 4.08291, PPX = 59.32: 100%|██████████| 19/19 [00:00<00:00, 158.38it/s]
[130 / 300] Train: Loss = 3.46863, PPX = 32.09:   4%|▎         | 24/677 [00:00<00:05, 126.67it/s]

Optimizer lr = 4.42172e-74

вметросутрапроснулсяутром\nчтовсевтеплеитеневком\nавсмыслекакбыневернулся\nкаксыр\n</s>


[130 / 300] Train: Loss = 3.40960, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 154.72it/s]
[130 / 300]   Val: Loss = 4.08246, PPX = 59.29: 100%|██████████| 19/19 [00:00<00:00, 154.65it/s]
[131 / 300] Train: Loss = 3.38725, PPX = 29.58:   4%|▎         | 24/677 [00:00<00:05, 125.50it/s]

Optimizer lr = 1.10543e-74

когдатымедленнозателом\nнадачетвойпокойирот\nивнёмтеперьянесогласен\nнея\n</s>


[131 / 300] Train: Loss = 3.40960, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 154.54it/s]
[131 / 300]   Val: Loss = 4.07976, PPX = 59.13: 100%|██████████| 19/19 [00:00<00:00, 152.10it/s]
[132 / 300] Train: Loss = 3.28001, PPX = 26.58:   4%|▎         | 25/677 [00:00<00:05, 129.32it/s]

Optimizer lr = 2.76357e-75

явдетствебылневиделсмысла\nаявответтебенирад\nиненанейавотисвете\nнея\n</s>


[132 / 300] Train: Loss = 3.40960, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 156.06it/s]
[132 / 300]   Val: Loss = 4.08230, PPX = 59.28: 100%|██████████| 19/19 [00:00<00:00, 153.56it/s]
[133 / 300] Train: Loss = 3.31675, PPX = 27.57:   3%|▎         | 23/677 [00:00<00:05, 119.71it/s]

Optimizer lr = 6.90893e-76

вуглусемьлетспустясемьикс\nитывменясовсемнерад\nавэтомчтомнеостаётся\nнавы\n</s>


[133 / 300] Train: Loss = 3.40973, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 156.86it/s]
[133 / 300]   Val: Loss = 4.08258, PPX = 59.30: 100%|██████████| 19/19 [00:00<00:00, 149.70it/s]
[134 / 300] Train: Loss = 3.32310, PPX = 27.75:   4%|▍         | 26/677 [00:00<00:05, 127.31it/s]

Optimizer lr = 1.72723e-76

влесуподпоясназаборе\nивэтомнетупроник\nмнеговорятчтомненеможем\nотвас\n</s>


[134 / 300] Train: Loss = 3.40963, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 159.68it/s]
[134 / 300]   Val: Loss = 4.08145, PPX = 59.23: 100%|██████████| 19/19 [00:00<00:00, 169.92it/s]
[135 / 300] Train: Loss = 3.48007, PPX = 32.46:   4%|▎         | 25/677 [00:00<00:05, 128.15it/s]

Optimizer lr = 4.31808e-77

апочемувыбезвсегото\nябснимповезлоненой\nнетакужмногонопохоже\nнете\n</s>


[135 / 300] Train: Loss = 3.40978, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 158.81it/s]
[135 / 300]   Val: Loss = 4.08058, PPX = 59.18: 100%|██████████| 19/19 [00:00<00:00, 155.88it/s]
[136 / 300] Train: Loss = 3.54897, PPX = 34.78:   4%|▎         | 25/677 [00:00<00:05, 125.94it/s]

Optimizer lr = 1.07952e-77

янаработенезаметил\nаяведьинепротив\nвсети\n</s>


[136 / 300] Train: Loss = 3.40965, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 156.64it/s]
[136 / 300]   Val: Loss = 4.08199, PPX = 59.26: 100%|██████████| 19/19 [00:00<00:00, 160.25it/s]
[137 / 300] Train: Loss = 3.48292, PPX = 32.55:   3%|▎         | 23/677 [00:00<00:04, 131.43it/s]

Optimizer lr = 2.6988e-78

мывсебнакухнеэтодело\nивэтомвремянамвсети\nвтуденьстобойвмоюпалаты\nикрест\n</s>


[137 / 300] Train: Loss = 3.40968, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 155.94it/s]
[137 / 300]   Val: Loss = 4.08200, PPX = 59.26: 100%|██████████| 19/19 [00:00<00:00, 152.88it/s]
[138 / 300] Train: Loss = 3.38924, PPX = 29.64:   4%|▎         | 24/677 [00:00<00:05, 124.55it/s]

Optimizer lr = 6.74701e-79

аможномненаскладеземлю\nнеточтобтыменяхотьраз\nинеизнихотэтихслови\nябна\n</s>


[138 / 300] Train: Loss = 3.40965, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 155.22it/s]
[138 / 300]   Val: Loss = 4.08272, PPX = 59.31: 100%|██████████| 19/19 [00:00<00:00, 148.79it/s]
[139 / 300] Train: Loss = 3.37440, PPX = 29.21:   3%|▎         | 23/677 [00:00<00:05, 118.55it/s]

Optimizer lr = 1.68675e-79

неважноснаминиконцанимеры\nниногневшуткунинагрудь\nавотимамаэтопросто\nвдекрет\n</s>


[139 / 300] Train: Loss = 3.40976, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 154.14it/s]
[139 / 300]   Val: Loss = 4.08177, PPX = 59.25: 100%|██████████| 19/19 [00:00<00:00, 155.30it/s]
[140 / 300] Train: Loss = 3.40407, PPX = 30.09:   4%|▎         | 24/677 [00:00<00:05, 127.41it/s]

Optimizer lr = 4.21688e-80

явовторыхучорнойкровати\nитутжеяуженерад\nтычтокпримерунемужчина\nнесмог\n</s>


[140 / 300] Train: Loss = 3.40966, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 153.78it/s]
[140 / 300]   Val: Loss = 4.08199, PPX = 59.26: 100%|██████████| 19/19 [00:00<00:00, 156.04it/s]
[141 / 300] Train: Loss = 3.45439, PPX = 31.64:   4%|▎         | 25/677 [00:00<00:04, 131.88it/s]

Optimizer lr = 1.05422e-80

насъездеюныхи\nявжизнитолькоговорит\nиговоритнучтотытоже\nая\n</s>


[141 / 300] Train: Loss = 3.40972, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 159.94it/s]
[141 / 300]   Val: Loss = 4.08253, PPX = 59.30: 100%|██████████| 19/19 [00:00<00:00, 153.69it/s]
[142 / 300] Train: Loss = 3.36270, PPX = 28.87:   4%|▎         | 24/677 [00:00<00:05, 129.09it/s]

Optimizer lr = 2.63555e-81

явдетствебылнастолькомного\nикаквсегдавовсёмвозьми\nитолькомыегонепара\nигод\n</s>


[142 / 300] Train: Loss = 3.40977, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 154.18it/s]
[142 / 300]   Val: Loss = 4.08298, PPX = 59.32: 100%|██████████| 19/19 [00:00<00:00, 165.42it/s]
[143 / 300] Train: Loss = 3.35506, PPX = 28.65:   4%|▍         | 26/677 [00:00<00:04, 136.14it/s]

Optimizer lr = 6.58887e-82

явдетствебылнепонимаю\nатыпришёлвовсейкрасе\nатактодумалчтостобою\nисдох\n</s>


[143 / 300] Train: Loss = 3.40971, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 166.11it/s]
[143 / 300]   Val: Loss = 4.08252, PPX = 59.29: 100%|██████████| 19/19 [00:00<00:00, 168.21it/s]
[144 / 300] Train: Loss = 3.45941, PPX = 31.80:   4%|▎         | 25/677 [00:00<00:05, 126.72it/s]

Optimizer lr = 1.64722e-82

янескажусказалевгений\nивнёмполноитакилень\nвсёпотомучтовчёрномслица\nнемой\n</s>


[144 / 300] Train: Loss = 3.40965, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 154.27it/s]
[144 / 300]   Val: Loss = 4.08276, PPX = 59.31: 100%|██████████| 19/19 [00:00<00:00, 166.06it/s]
[145 / 300] Train: Loss = 3.40815, PPX = 30.21:   4%|▍         | 26/677 [00:00<00:05, 128.75it/s]

Optimizer lr = 4.11805e-83

насценусчастьевсёпрекрасно\nкакбудтонеидёткакойто\nтамяинебылвэтовремя\nнивте\n</s>


[145 / 300] Train: Loss = 3.40962, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 157.84it/s]
[145 / 300]   Val: Loss = 4.08181, PPX = 59.25: 100%|██████████| 19/19 [00:00<00:00, 170.21it/s]
[146 / 300] Train: Loss = 3.32277, PPX = 27.74:   4%|▎         | 25/677 [00:00<00:05, 129.95it/s]

Optimizer lr = 1.02951e-83

оксанаянепонимаю\nяпоследвухнетакгруб\nиневдушексебеавсмысле\nнивчём\n</s>


[146 / 300] Train: Loss = 3.40968, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 160.44it/s]
[146 / 300]   Val: Loss = 4.08235, PPX = 59.28: 100%|██████████| 19/19 [00:00<00:00, 161.45it/s]
[147 / 300] Train: Loss = 3.30863, PPX = 27.35:   4%|▎         | 24/677 [00:00<00:05, 128.17it/s]

Optimizer lr = 2.57378e-84

яобожаювкусвтумане\nтакинебылоинеесть\nатутивэтомтовитоге\nваду\n</s>


[147 / 300] Train: Loss = 3.40953, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 160.68it/s]
[147 / 300]   Val: Loss = 4.08333, PPX = 59.34: 100%|██████████| 19/19 [00:00<00:00, 153.83it/s]
[148 / 300] Train: Loss = 3.41048, PPX = 30.28:   4%|▎         | 24/677 [00:00<00:05, 126.87it/s]

Optimizer lr = 6.43445e-85

вамчтотовглубинемнеруки\nянемогувамдосмертинет\nнезналчтоявсеголишьпушкин\nтои\n</s>


[148 / 300] Train: Loss = 3.40971, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 154.62it/s]
[148 / 300]   Val: Loss = 4.08193, PPX = 59.26: 100%|██████████| 19/19 [00:00<00:00, 151.39it/s]
[149 / 300] Train: Loss = 3.31297, PPX = 27.47:   4%|▎         | 25/677 [00:00<00:05, 129.91it/s]

Optimizer lr = 1.60861e-85

какойвывсемчеготовморе\nятакинемогусказать\nновсёжевэтомнехватает\nивморг\n</s>


[149 / 300] Train: Loss = 3.40953, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 160.00it/s]
[149 / 300]   Val: Loss = 4.08281, PPX = 59.31: 100%|██████████| 19/19 [00:00<00:00, 156.62it/s]
[150 / 300] Train: Loss = 3.26775, PPX = 26.25:   4%|▎         | 25/677 [00:00<00:05, 124.80it/s]

Optimizer lr = 4.02153e-86

влесусоксанойнапороге\nрешиливочередиза\nвнёмтридцатьпятьплюсдвадцатьтри\nвсебя\n</s>


[150 / 300] Train: Loss = 3.40975, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 155.17it/s]
[150 / 300]   Val: Loss = 4.08165, PPX = 59.24: 100%|██████████| 19/19 [00:00<00:00, 150.20it/s]
[151 / 300] Train: Loss = 3.52352, PPX = 33.90:   3%|▎         | 23/677 [00:00<00:05, 124.78it/s]

Optimizer lr = 1.00538e-86

незнаюкакяневернулся\nнезнаючтолинастоле\nавынескемягдеигдето\nтыза\n</s>


[151 / 300] Train: Loss = 3.40969, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 152.74it/s]
[151 / 300]   Val: Loss = 4.08257, PPX = 59.30: 100%|██████████| 19/19 [00:00<00:00, 150.51it/s]
[152 / 300] Train: Loss = 3.46346, PPX = 31.93:   4%|▎         | 24/677 [00:00<00:05, 124.00it/s]

Optimizer lr = 2.51346e-87

неможетяпроснулсявгости\nвговненасемьдесятминут\nядамвамбудетнезаденьги\nвкусты\n</s>


[152 / 300] Train: Loss = 3.40976, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 150.87it/s]
[152 / 300]   Val: Loss = 4.08355, PPX = 59.36: 100%|██████████| 19/19 [00:00<00:00, 153.97it/s]
[153 / 300] Train: Loss = 3.40061, PPX = 29.98:   4%|▎         | 24/677 [00:00<00:05, 123.54it/s]

Optimizer lr = 6.28364e-88

мывсеумрёмвконцетоннеля\nспорогадажевживоте\nатыпридётсязасобою\nвдали\n</s>


[153 / 300] Train: Loss = 3.40983, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 154.44it/s]
[153 / 300]   Val: Loss = 4.08167, PPX = 59.24: 100%|██████████| 19/19 [00:00<00:00, 154.67it/s]
[154 / 300] Train: Loss = 3.32159, PPX = 27.70:   4%|▎         | 25/677 [00:00<00:05, 126.70it/s]

Optimizer lr = 1.57091e-88

когдаидиястанулетом\nянемогусказатьвамтру\nнояжнебылинехватает\nборща\n</s>


[154 / 300] Train: Loss = 3.40971, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 155.04it/s]
[154 / 300]   Val: Loss = 4.08161, PPX = 59.24: 100%|██████████| 19/19 [00:00<00:00, 157.00it/s]
[155 / 300] Train: Loss = 3.43185, PPX = 30.93:   4%|▎         | 24/677 [00:00<00:05, 127.37it/s]

Optimizer lr = 3.92727e-89

янетогочтомывсказки\nяиневиноватине\nнояжнепонимаюмнеостаться\nнете\n</s>


[155 / 300] Train: Loss = 3.40945, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 155.15it/s]
[155 / 300]   Val: Loss = 4.08339, PPX = 59.35: 100%|██████████| 19/19 [00:00<00:00, 152.12it/s]
[156 / 300] Train: Loss = 3.37603, PPX = 29.25:   4%|▎         | 25/677 [00:00<00:05, 127.84it/s]

Optimizer lr = 9.81819e-90

врукахогромныйдвенедели\nсутраглядитвконцетоннеля\nивсамомделевсёивсето\nналбу\n</s>


[156 / 300] Train: Loss = 3.40954, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 154.70it/s]
[156 / 300]   Val: Loss = 4.08287, PPX = 59.32: 100%|██████████| 19/19 [00:00<00:00, 158.10it/s]
[157 / 300] Train: Loss = 3.53211, PPX = 34.20:   3%|▎         | 23/677 [00:00<00:05, 119.17it/s]

Optimizer lr = 2.45455e-90

вашмозгнадниминеродился\nненадобылобненакухне\nноеслиесливыненужно\nяжна\n</s>


[157 / 300] Train: Loss = 3.40973, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 155.54it/s]
[157 / 300]   Val: Loss = 4.08222, PPX = 59.28: 100%|██████████| 19/19 [00:00<00:00, 161.02it/s]
[158 / 300] Train: Loss = 3.28128, PPX = 26.61:   4%|▎         | 24/677 [00:00<00:05, 126.32it/s]

Optimizer lr = 6.13637e-91

насценумненабеломсвете\nивотвамхочетсяхотьвчесть\nичтотохочетсяидаже\nнея\n</s>


[158 / 300] Train: Loss = 3.40960, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 155.60it/s]
[158 / 300]   Val: Loss = 4.08270, PPX = 59.31: 100%|██████████| 19/19 [00:00<00:00, 154.16it/s]
[159 / 300] Train: Loss = 3.25530, PPX = 25.93:   4%|▎         | 25/677 [00:00<00:05, 125.42it/s]

Optimizer lr = 1.53409e-91

ачтовызнаетенаморе\nянанегоневшуткуни\nидажевнейнеговорила\nвтебе\n</s>


[159 / 300] Train: Loss = 3.40967, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 155.30it/s]
[159 / 300]   Val: Loss = 4.08184, PPX = 59.25: 100%|██████████| 19/19 [00:00<00:00, 159.07it/s]
[160 / 300] Train: Loss = 3.39187, PPX = 29.72:   3%|▎         | 23/677 [00:00<00:05, 124.25it/s]

Optimizer lr = 3.83523e-92

ачтозавасянезнакомженщин\nнивчёмкакниоднойнизги\nапослеваснеостаётся\nяза\n</s>


[160 / 300] Train: Loss = 3.40965, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 155.16it/s]
[160 / 300]   Val: Loss = 4.08156, PPX = 59.24: 100%|██████████| 19/19 [00:00<00:00, 153.94it/s]
[161 / 300] Train: Loss = 3.44850, PPX = 31.45:   4%|▎         | 24/677 [00:00<00:05, 123.77it/s]

Optimizer lr = 9.58807e-93

янехочуяалкоголик\nаявчераилиневы\nнокакобычновсёслучилось\nивпуть\n</s>


[161 / 300] Train: Loss = 3.40960, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 156.85it/s]
[161 / 300]   Val: Loss = 4.08240, PPX = 59.29: 100%|██████████| 19/19 [00:00<00:00, 153.56it/s]
[162 / 300] Train: Loss = 3.52746, PPX = 34.04:   4%|▎         | 24/677 [00:00<00:05, 125.30it/s]

Optimizer lr = 2.39702e-93

мнетакненуженчтотохочешь\nяжнепоймуянезаднём\nавнейвотэтотвотипросто\nненой\n</s>


[162 / 300] Train: Loss = 3.40959, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 154.11it/s]
[162 / 300]   Val: Loss = 4.08202, PPX = 59.27: 100%|██████████| 19/19 [00:00<00:00, 157.90it/s]
[163 / 300] Train: Loss = 3.38273, PPX = 29.45:   4%|▎         | 24/677 [00:00<00:05, 128.05it/s]

Optimizer lr = 5.99255e-94

мыпилиночьюврусскойплатье\nавылиневнутриль\nаяневерилчтооднажды\nнемуж\n</s>


[163 / 300] Train: Loss = 3.40968, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 158.92it/s]
[163 / 300]   Val: Loss = 4.08181, PPX = 59.25: 100%|██████████| 19/19 [00:00<00:00, 172.79it/s]
[164 / 300] Train: Loss = 3.50148, PPX = 33.16:   4%|▎         | 25/677 [00:00<00:04, 132.03it/s]

Optimizer lr = 1.49814e-94

насъездеюныхипесен\nневсилахмнеинесней\nанаглазахбылтакжеделать\nая\n</s>


[164 / 300] Train: Loss = 3.40987, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 154.25it/s]
[164 / 300]   Val: Loss = 4.08241, PPX = 59.29: 100%|██████████| 19/19 [00:00<00:00, 157.50it/s]
[165 / 300] Train: Loss = 3.41317, PPX = 30.36:   4%|▎         | 25/677 [00:00<00:05, 127.08it/s]

Optimizer lr = 3.74534e-95

поднувконцетоннелялето\nвсепередмиромгдекупить\nивотужтоженеуверен\nикто\n</s>


[165 / 300] Train: Loss = 3.40957, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 154.32it/s]
[165 / 300]   Val: Loss = 4.07991, PPX = 59.14: 100%|██████████| 19/19 [00:00<00:00, 153.48it/s]
[166 / 300] Train: Loss = 3.38630, PPX = 29.56:   4%|▎         | 24/677 [00:00<00:05, 122.37it/s]

Optimizer lr = 9.36335e-96

накоймнечтотакоедело\nятоженелюблюкогда\nнотынепростотакужлучше\nнея\n</s>


[166 / 300] Train: Loss = 3.40963, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 158.94it/s]
[166 / 300]   Val: Loss = 4.08212, PPX = 59.27: 100%|██████████| 19/19 [00:00<00:00, 163.06it/s]
[167 / 300] Train: Loss = 3.37500, PPX = 29.22:   4%|▍         | 26/677 [00:00<00:04, 134.77it/s]

Optimizer lr = 2.34084e-96

уольгивэтойпятомвселенной\nивчёмтобылсовсемнето\nивдругниктонепредвещало\nвзагсли\n</s>


[167 / 300] Train: Loss = 3.40944, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 157.21it/s]
[167 / 300]   Val: Loss = 4.08093, PPX = 59.20: 100%|██████████| 19/19 [00:00<00:00, 170.13it/s]
[168 / 300] Train: Loss = 3.42106, PPX = 30.60:   4%|▎         | 25/677 [00:00<00:05, 128.80it/s]

Optimizer lr = 5.8521e-97

оксаналебедьколегу\nподвечерточтомывпрах\nчтовнихтакбольшенехватает\nнегрех\n</s>


[168 / 300] Train: Loss = 3.40967, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 154.32it/s]
[168 / 300]   Val: Loss = 4.08278, PPX = 59.31: 100%|██████████| 19/19 [00:00<00:00, 150.24it/s]
[169 / 300] Train: Loss = 3.25719, PPX = 25.98:   3%|▎         | 23/677 [00:00<00:05, 117.29it/s]

Optimizer lr = 1.46302e-97

апомнишьвдетствебродитбыло\nсказалонвэтомнеспеша\nпохожеяуженебуду\nкарман\n</s>


[169 / 300] Train: Loss = 3.40971, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 154.08it/s]
[169 / 300]   Val: Loss = 4.08272, PPX = 59.31: 100%|██████████| 19/19 [00:00<00:00, 159.76it/s]
[170 / 300] Train: Loss = 3.39686, PPX = 29.87:   3%|▎         | 23/677 [00:00<00:05, 125.87it/s]

Optimizer lr = 3.65756e-98

ачтовызнаетеочомто\nмненемогусказатьсказать\nавымневотмойдругнасвете\nиты\n</s>


[170 / 300] Train: Loss = 3.40971, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 152.66it/s]
[170 / 300]   Val: Loss = 4.08263, PPX = 59.30: 100%|██████████| 19/19 [00:00<00:00, 165.54it/s]
[171 / 300] Train: Loss = 3.44350, PPX = 31.30:   4%|▎         | 24/677 [00:00<00:05, 126.80it/s]

Optimizer lr = 9.1439e-99

однаждывморгенепроходит\nябнатебенетакужплох\nнетянезнаянасаяне\nнесмог\n</s>


[171 / 300] Train: Loss = 3.40973, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 155.33it/s]
[171 / 300]   Val: Loss = 4.08096, PPX = 59.20: 100%|██████████| 19/19 [00:00<00:00, 156.62it/s]
[172 / 300] Train: Loss = 3.39048, PPX = 29.68:   3%|▎         | 23/677 [00:00<00:05, 119.24it/s]

Optimizer lr = 2.28597e-99

янелюблювасзатобою\nсказалавсёжеямилее\nаонвсёэтобылипросто\nилень\n</s>


[172 / 300] Train: Loss = 3.40953, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 156.57it/s]
[172 / 300]   Val: Loss = 4.08295, PPX = 59.32: 100%|██████████| 19/19 [00:00<00:00, 156.10it/s]
[173 / 300] Train: Loss = 3.38510, PPX = 29.52:   4%|▎         | 24/677 [00:00<00:05, 125.19it/s]

Optimizer lr = 5.71494e-100

янехочунемноголет\nнекаждыйденьвсвоейженой\nибылнавсюночьстарыйтыкать\nвменя\n</s>


[173 / 300] Train: Loss = 3.40965, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 154.15it/s]
[173 / 300]   Val: Loss = 4.08138, PPX = 59.23: 100%|██████████| 19/19 [00:00<00:00, 151.75it/s]
[174 / 300] Train: Loss = 3.52938, PPX = 34.10:   3%|▎         | 23/677 [00:00<00:05, 122.74it/s]

Optimizer lr = 1.42873e-100

явасмогучегожеболе\nивотивсёнетакужплох\nябылстобойкакиетолько\nсоси\n</s>


[174 / 300] Train: Loss = 3.40978, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 154.28it/s]
[174 / 300]   Val: Loss = 4.08277, PPX = 59.31: 100%|██████████| 19/19 [00:00<00:00, 155.86it/s]
[175 / 300] Train: Loss = 3.42138, PPX = 30.61:   3%|▎         | 23/677 [00:00<00:05, 124.18it/s]

Optimizer lr = 3.57184e-101

ачтовызнаетеобоже\nчтояинемогусказать\nаянепомнючтотакое\nмнебсднём\n</s>


[175 / 300] Train: Loss = 3.40939, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 154.83it/s]
[175 / 300]   Val: Loss = 4.08287, PPX = 59.32: 100%|██████████| 19/19 [00:00<00:00, 150.51it/s]
[176 / 300] Train: Loss = 3.14605, PPX = 23.24:   4%|▎         | 24/677 [00:00<00:05, 122.97it/s]

Optimizer lr = 8.92959e-102

какхорошочтомывпорядке\nноневыходитничерта\nнотуттовжизнивзяливсердце\nжена\n</s>


[176 / 300] Train: Loss = 3.40970, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 153.28it/s]
[176 / 300]   Val: Loss = 4.08209, PPX = 59.27: 100%|██████████| 19/19 [00:00<00:00, 166.18it/s]
[177 / 300] Train: Loss = 3.41350, PPX = 30.37:   4%|▎         | 24/677 [00:00<00:05, 125.81it/s]

Optimizer lr = 2.2324e-102

влюбвивглазахувасвмашине\nвсёвремявадкаквиноват\nкругомдачтоещёнебудет\nнизги\n</s>


[177 / 300] Train: Loss = 3.40985, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 162.52it/s]
[177 / 300]   Val: Loss = 4.08233, PPX = 59.28: 100%|██████████| 19/19 [00:00<00:00, 169.26it/s]
[178 / 300] Train: Loss = 3.50686, PPX = 33.34:   4%|▎         | 25/677 [00:00<00:05, 126.13it/s]

Optimizer lr = 5.58099e-103

вытакжекактоговорите\nаввасподзвукинивчём\nтамгдетовсердцепревратилась\nвдругой\n</s>


[178 / 300] Train: Loss = 3.40956, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 155.27it/s]
[178 / 300]   Val: Loss = 4.08243, PPX = 59.29: 100%|██████████| 19/19 [00:00<00:00, 156.88it/s]
[179 / 300] Train: Loss = 3.20588, PPX = 24.68:   4%|▎         | 24/677 [00:00<00:05, 124.31it/s]

Optimizer lr = 1.39525e-103

унасвглазахувасвпариже\nстобоюивплену\nапросточтобычтотакпросто\nктоесть\n</s>


[179 / 300] Train: Loss = 3.40975, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 159.92it/s]
[179 / 300]   Val: Loss = 4.08278, PPX = 59.31: 100%|██████████| 19/19 [00:00<00:00, 169.48it/s]
[180 / 300] Train: Loss = 3.49076, PPX = 32.81:   4%|▎         | 24/677 [00:00<00:05, 129.07it/s]

Optimizer lr = 3.48812e-104

менянетакужэтоплохо\nневымневдушуневговно\nноянакухненескулаками\nниесть\n</s>


[180 / 300] Train: Loss = 3.40976, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 156.09it/s]
[180 / 300]   Val: Loss = 4.08182, PPX = 59.25: 100%|██████████| 19/19 [00:00<00:00, 153.22it/s]
[181 / 300] Train: Loss = 3.69756, PPX = 40.35:   3%|▎         | 23/677 [00:00<00:05, 122.60it/s]

Optimizer lr = 8.7203e-105

явваслюблючегожеболе\nитутжемнепролобичто\nотнихменянеговорите\nнестой\n</s>


[181 / 300] Train: Loss = 3.40961, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 154.01it/s]
[181 / 300]   Val: Loss = 4.08277, PPX = 59.31: 100%|██████████| 19/19 [00:00<00:00, 156.47it/s]
[182 / 300] Train: Loss = 3.34060, PPX = 28.24:   4%|▎         | 24/677 [00:00<00:05, 123.31it/s]

Optimizer lr = 2.18008e-105

ямеждупрочимвбане\nсказалсулыбкойнакраю\nивдругснимвнатуренебудет\nния\n</s>


[182 / 300] Train: Loss = 3.40946, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 154.63it/s]
[182 / 300]   Val: Loss = 4.08191, PPX = 59.26: 100%|██████████| 19/19 [00:00<00:00, 154.84it/s]
[183 / 300] Train: Loss = 3.39393, PPX = 29.78:   4%|▎         | 24/677 [00:00<00:05, 125.89it/s]

Optimizer lr = 5.45019e-106

нетненужнытвоиячайка\nгдежеуменядавно\nнеточтобоченьнужентолько\nвменя\n</s>


[183 / 300] Train: Loss = 3.40957, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 153.57it/s]
[183 / 300]   Val: Loss = 4.08256, PPX = 59.30: 100%|██████████| 19/19 [00:00<00:00, 152.68it/s]
[184 / 300] Train: Loss = 3.34586, PPX = 28.38:   3%|▎         | 23/677 [00:00<00:05, 122.76it/s]

Optimizer lr = 1.36255e-106

вынемоглибывыбы\nневсмыслеоттогочтосё\nновыпроститевдругчтоэто\nикрест\n</s>


[184 / 300] Train: Loss = 3.40990, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 155.53it/s]
[184 / 300]   Val: Loss = 4.08200, PPX = 59.26: 100%|██████████| 19/19 [00:00<00:00, 157.38it/s]
[185 / 300] Train: Loss = 3.51658, PPX = 33.67:   4%|▎         | 24/677 [00:00<00:05, 126.89it/s]

Optimizer lr = 3.40637e-107

чтоэтовсёнепоприроде\nвстранельвпятьразнасветени\nтонемоглибыкакбымнебы\nнея\n</s>


[185 / 300] Train: Loss = 3.40954, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 154.22it/s]
[185 / 300]   Val: Loss = 4.08238, PPX = 59.29: 100%|██████████| 19/19 [00:00<00:00, 155.49it/s]
[186 / 300] Train: Loss = 3.47570, PPX = 32.32:   4%|▎         | 24/677 [00:00<00:05, 125.15it/s]

Optimizer lr = 8.51592e-108

ясталумрёмвработусмужем\nнезнаючтонибудьдомой\nинебылбольшенесомною\nния\n</s>


[186 / 300] Train: Loss = 3.40963, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 154.22it/s]
[186 / 300]   Val: Loss = 4.08049, PPX = 59.17: 100%|██████████| 19/19 [00:00<00:00, 153.98it/s]
[187 / 300] Train: Loss = 3.31116, PPX = 27.42:   4%|▎         | 24/677 [00:00<00:05, 124.60it/s]

Optimizer lr = 2.12898e-108

вынемогуянежелаю\nядумалчтоеёнето\nанатебеужелетела\nсутра\n</s>


[187 / 300] Train: Loss = 3.40974, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 154.48it/s]
[187 / 300]   Val: Loss = 4.08122, PPX = 59.22: 100%|██████████| 19/19 [00:00<00:00, 156.45it/s]
[188 / 300] Train: Loss = 3.44970, PPX = 31.49:   4%|▎         | 24/677 [00:00<00:05, 126.22it/s]

Optimizer lr = 5.32245e-109

безголовыунассегодня\nивнемпопрежнемукаквстарь\nненравитсятеперьнасбудет\nвменя\n</s>


[188 / 300] Train: Loss = 3.40979, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 155.42it/s]
[188 / 300]   Val: Loss = 4.08284, PPX = 59.31: 100%|██████████| 19/19 [00:00<00:00, 154.14it/s]
[189 / 300] Train: Loss = 3.44026, PPX = 31.20:   3%|▎         | 23/677 [00:00<00:05, 119.49it/s]

Optimizer lr = 1.33061e-109

янеизтехктоневернулся\nвсёвремявжизнинетнельзя\nяпонялчтокнемунебуду\nдыша\n</s>


[189 / 300] Train: Loss = 3.40969, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 154.51it/s]
[189 / 300]   Val: Loss = 4.08195, PPX = 59.26: 100%|██████████| 19/19 [00:00<00:00, 155.81it/s]
[190 / 300] Train: Loss = 3.39017, PPX = 29.67:   4%|▎         | 24/677 [00:00<00:05, 126.26it/s]

Optimizer lr = 3.32653e-110

ячастоговорючтодома\nневсмыслечтолинесмешно\nатынеможетчтозаэто\nвответ\n</s>


[190 / 300] Train: Loss = 3.40969, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 154.68it/s]
[190 / 300]   Val: Loss = 4.08277, PPX = 59.31: 100%|██████████| 19/19 [00:00<00:00, 163.70it/s]
[191 / 300] Train: Loss = 3.46046, PPX = 31.83:   4%|▎         | 25/677 [00:00<00:05, 129.68it/s]

Optimizer lr = 8.31633e-111

унасвглазахвсёвремявсердце\nспросилягдетотамдрузья\nчтодажевнёмнеполучалось\nнислов\n</s>


[191 / 300] Train: Loss = 3.40977, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 157.40it/s]
[191 / 300]   Val: Loss = 4.08149, PPX = 59.23: 100%|██████████| 19/19 [00:00<00:00, 152.53it/s]
[192 / 300] Train: Loss = 3.59456, PPX = 36.40:   3%|▎         | 23/677 [00:00<00:05, 119.53it/s]

Optimizer lr = 2.07908e-111

янелюблюапочемубы\nневсилахльвтомчтоянерад\nяпростождунолучшелучше\nнезнал\n</s>


[192 / 300] Train: Loss = 3.40971, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 154.41it/s]
[192 / 300]   Val: Loss = 4.08176, PPX = 59.25: 100%|██████████| 19/19 [00:00<00:00, 157.13it/s]
[193 / 300] Train: Loss = 3.44794, PPX = 31.44:   4%|▎         | 24/677 [00:00<00:05, 124.09it/s]

Optimizer lr = 5.1977e-112

зачеммыбылитакпрекрасно\nяженщинавтебенерад\nипустьвответнанихжениться\nдовас\n</s>


[193 / 300] Train: Loss = 3.40953, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 154.02it/s]
[193 / 300]   Val: Loss = 4.08213, PPX = 59.27: 100%|██████████| 19/19 [00:00<00:00, 154.18it/s]
[194 / 300] Train: Loss = 3.44922, PPX = 31.48:   4%|▎         | 24/677 [00:00<00:05, 124.61it/s]

Optimizer lr = 1.29943e-112

явдетствебылнемногосчастья\nянемогумогумогу\nаятебенепонимаю\nничо\n</s>


[194 / 300] Train: Loss = 3.40965, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 155.13it/s]
[194 / 300]   Val: Loss = 4.08225, PPX = 59.28: 100%|██████████| 19/19 [00:00<00:00, 154.51it/s]
[195 / 300] Train: Loss = 3.37098, PPX = 29.11:   3%|▎         | 23/677 [00:00<00:05, 125.12it/s]

Optimizer lr = 3.24857e-113

воттымоймилыйчтозачудо\nивэтотразябылвплену\nноябынехотелбыябы\nиза\n</s>


[195 / 300] Train: Loss = 3.40960, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 156.85it/s]
[195 / 300]   Val: Loss = 4.08231, PPX = 59.28: 100%|██████████| 19/19 [00:00<00:00, 156.09it/s]
[196 / 300] Train: Loss = 3.42713, PPX = 30.79:   4%|▎         | 24/677 [00:00<00:05, 125.50it/s]

Optimizer lr = 8.12141e-114

вотснамивышелподглазами\nинесмотрикаквбреду\nпокругудушиегоидуи\nвкровать\n</s>


[196 / 300] Train: Loss = 3.40998, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 155.72it/s]
[196 / 300]   Val: Loss = 4.08174, PPX = 59.25: 100%|██████████| 19/19 [00:00<00:00, 156.72it/s]
[197 / 300] Train: Loss = 3.41109, PPX = 30.30:   4%|▎         | 24/677 [00:00<00:05, 124.08it/s]

Optimizer lr = 2.03035e-114

комнеодинвсёвремявжизни\nивсердцесмотрятнабобра\nичтобнестанетинестанет\nавлоб\n</s>


[197 / 300] Train: Loss = 3.40985, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 154.90it/s]
[197 / 300]   Val: Loss = 4.08176, PPX = 59.25: 100%|██████████| 19/19 [00:00<00:00, 159.44it/s]
[198 / 300] Train: Loss = 3.51683, PPX = 33.68:   4%|▎         | 24/677 [00:00<00:05, 126.87it/s]

Optimizer lr = 5.07588e-115

насклонелетятотпростите\nикаквтебяиванспешат\nдаябыраднооказалось\nнаты\n</s>


[198 / 300] Train: Loss = 3.40987, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 152.15it/s]
[198 / 300]   Val: Loss = 4.08202, PPX = 59.27: 100%|██████████| 19/19 [00:00<00:00, 147.03it/s]
[199 / 300] Train: Loss = 3.36201, PPX = 28.85:   3%|▎         | 23/677 [00:00<00:05, 123.38it/s]

Optimizer lr = 1.26897e-115

успеладопорыдокрая\nивмыслисмертьивнёмвпруду\nмнеговоритчтонеузнали\nион\n</s>


[199 / 300] Train: Loss = 3.40984, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 152.67it/s]
[199 / 300]   Val: Loss = 4.08186, PPX = 59.26: 100%|██████████| 19/19 [00:00<00:00, 148.31it/s]
[200 / 300] Train: Loss = 3.53936, PPX = 34.44:   3%|▎         | 22/677 [00:00<00:05, 109.81it/s]

Optimizer lr = 3.17243e-116

любовьпохожаянаужин\nивуходверьитемноте\nивнёмнасамомделевышло\nвменя\n</s>


[200 / 300] Train: Loss = 3.40955, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 151.02it/s]
[200 / 300]   Val: Loss = 4.08120, PPX = 59.22: 100%|██████████| 19/19 [00:00<00:00, 157.21it/s]
[201 / 300] Train: Loss = 3.44476, PPX = 31.34:   4%|▎         | 24/677 [00:00<00:05, 129.05it/s]

Optimizer lr = 7.93107e-117

нучтовамтакоеосмыслежизни\nневсилахвшуткуниколай\nивполделебыловсёжекнейс\nивгроб\n</s>


[201 / 300] Train: Loss = 3.40970, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 155.29it/s]
[201 / 300]   Val: Loss = 4.08331, PPX = 59.34: 100%|██████████| 19/19 [00:00<00:00, 162.17it/s]
[202 / 300] Train: Loss = 3.28249, PPX = 26.64:   3%|▎         | 23/677 [00:00<00:05, 118.47it/s]

Optimizer lr = 1.98277e-117

впустомквартиревсёивполе\nневсилахльвчёмтутутуту\nаялишьвчёрномнонакухне\nнаты\n</s>


[202 / 300] Train: Loss = 3.40969, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 156.91it/s]
[202 / 300]   Val: Loss = 4.08165, PPX = 59.24: 100%|██████████| 19/19 [00:00<00:00, 157.52it/s]
[203 / 300] Train: Loss = 3.40531, PPX = 30.12:   3%|▎         | 23/677 [00:00<00:05, 125.68it/s]

Optimizer lr = 4.95692e-118

какбудтобыбыжитьвпорядке\nтотамналбутонето\nавэтитридцатьвечербылсегодня\nвборще\n</s>


[203 / 300] Train: Loss = 3.40969, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 155.09it/s]
[203 / 300]   Val: Loss = 4.08181, PPX = 59.25: 100%|██████████| 19/19 [00:00<00:00, 158.48it/s]
[204 / 300] Train: Loss = 3.57131, PPX = 35.56:   4%|▎         | 24/677 [00:00<00:05, 126.52it/s]

Optimizer lr = 1.23923e-118

нучтоужтынезнаешьчтоли\nябвасвсвойстихнепьюине\nненадомноюзаокошком\nия\n</s>


[204 / 300] Train: Loss = 3.40969, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 159.91it/s]
[204 / 300]   Val: Loss = 4.08119, PPX = 59.22: 100%|██████████| 19/19 [00:00<00:00, 166.06it/s]
[205 / 300] Train: Loss = 3.34501, PPX = 28.36:   4%|▎         | 25/677 [00:00<00:05, 129.03it/s]

Optimizer lr = 3.09807e-119

олегберётотсчастьяпью\nаточтогоднемогине\nтыжнанегонеестьлиэто\nнея\n</s>


[205 / 300] Train: Loss = 3.40971, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 163.78it/s]
[205 / 300]   Val: Loss = 4.08256, PPX = 59.30: 100%|██████████| 19/19 [00:00<00:00, 159.03it/s]
[206 / 300] Train: Loss = 3.40494, PPX = 30.11:   3%|▎         | 23/677 [00:00<00:05, 127.17it/s]

Optimizer lr = 7.74518e-120

янехочупогороскопу\nанатовремябылбыя\nинебытьможетотпоэта\nния\n</s>


[206 / 300] Train: Loss = 3.40991, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 154.36it/s]
[206 / 300]   Val: Loss = 4.08219, PPX = 59.28: 100%|██████████| 19/19 [00:00<00:00, 153.17it/s]
[207 / 300] Train: Loss = 3.43605, PPX = 31.06:   4%|▎         | 24/677 [00:00<00:05, 124.97it/s]

Optimizer lr = 1.9363e-120

стоюсцветамиивпостели\nосмыслежизниктосильней\nичтотовэтойпервойнету\nнезнал\n</s>


[207 / 300] Train: Loss = 3.40962, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 152.73it/s]
[207 / 300]   Val: Loss = 4.08165, PPX = 59.24: 100%|██████████| 19/19 [00:00<00:00, 150.48it/s]
[208 / 300] Train: Loss = 3.69847, PPX = 40.39:   4%|▎         | 24/677 [00:00<00:05, 126.87it/s]

Optimizer lr = 4.84074e-121

чемменьшеженщинумылюбим\nтехктостобойвконцеконцов\nавынезнаючтозаэто\nзабудь\n</s>


[208 / 300] Train: Loss = 3.40959, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 155.75it/s]
[208 / 300]   Val: Loss = 4.08302, PPX = 59.32: 100%|██████████| 19/19 [00:00<00:00, 153.76it/s]
[209 / 300] Train: Loss = 3.23529, PPX = 25.41:   3%|▎         | 23/677 [00:00<00:05, 124.02it/s]

Optimizer lr = 1.21018e-121

янемогупрошувасдома\nичтобнеможетбытьсмогли\nябудусамегонемного\nихдве\n</s>


[209 / 300] Train: Loss = 3.40968, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 149.83it/s]
[209 / 300]   Val: Loss = 4.08241, PPX = 59.29: 100%|██████████| 19/19 [00:00<00:00, 143.46it/s]
[210 / 300] Train: Loss = 3.41791, PPX = 30.51:   4%|▎         | 24/677 [00:00<00:05, 123.30it/s]

Optimizer lr = 3.02546e-122

явэтойжизнивсёзамечаю\nимнеотодногодетей\nаточтовэтомпервый\nикак\n</s>


[210 / 300] Train: Loss = 3.40985, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 151.74it/s]
[210 / 300]   Val: Loss = 4.08135, PPX = 59.23: 100%|██████████| 19/19 [00:00<00:00, 152.20it/s]
[211 / 300] Train: Loss = 3.34387, PPX = 28.33:   3%|▎         | 23/677 [00:00<00:05, 125.63it/s]

Optimizer lr = 7.56366e-123

ястаринелюблювасболе\nнапамятьнелюблювасвнём\nчтоможноскемнемогипрежде\nакто\n</s>


[211 / 300] Train: Loss = 3.40942, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 149.92it/s]
[211 / 300]   Val: Loss = 4.08188, PPX = 59.26: 100%|██████████| 19/19 [00:00<00:00, 150.26it/s]
[212 / 300] Train: Loss = 3.59432, PPX = 36.39:   3%|▎         | 23/677 [00:00<00:05, 119.56it/s]

Optimizer lr = 1.89091e-123

увсехбываетчтоаркадий\nнаполеснейнеспеша\nатытакойвоттакичтоже\nнея\n</s>


[212 / 300] Train: Loss = 3.40993, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 154.95it/s]
[212 / 300]   Val: Loss = 4.08243, PPX = 59.29: 100%|██████████| 19/19 [00:00<00:00, 153.72it/s]
[213 / 300] Train: Loss = 3.49664, PPX = 33.00:   4%|▎         | 24/677 [00:00<00:05, 124.57it/s]

Optimizer lr = 4.72729e-124

навсехсвятоговалентина\nондумалсветаонвплену\nатынанеместьговорите\nвокно\n</s>


[213 / 300] Train: Loss = 3.40968, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 154.64it/s]
[213 / 300]   Val: Loss = 4.08314, PPX = 59.33: 100%|██████████| 19/19 [00:00<00:00, 161.18it/s]
[214 / 300] Train: Loss = 3.29074, PPX = 26.86:   4%|▎         | 25/677 [00:00<00:04, 132.46it/s]

Optimizer lr = 1.18182e-124

всельпоидоутрадогроба\nлишьвжизнинетниодного\nаялюблюсовсемнепомню\nневсе\n</s>


[214 / 300] Train: Loss = 3.40960, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 156.87it/s]
[214 / 300]   Val: Loss = 4.08155, PPX = 59.24: 100%|██████████| 19/19 [00:00<00:00, 152.31it/s]
[215 / 300] Train: Loss = 3.37543, PPX = 29.24:   3%|▎         | 22/677 [00:00<00:05, 114.52it/s]

Optimizer lr = 2.95455e-125

янемогуяверювморе\nноздесьнебудетинесо\nномынакухненевнихто\nнобрат\n</s>


[215 / 300] Train: Loss = 3.40968, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 153.07it/s]
[215 / 300]   Val: Loss = 4.08136, PPX = 59.23: 100%|██████████| 19/19 [00:00<00:00, 157.10it/s]
[216 / 300] Train: Loss = 3.28751, PPX = 26.78:   3%|▎         | 23/677 [00:00<00:05, 124.43it/s]

Optimizer lr = 7.38638e-126

апомнишьмыстобойполужам\nивдомедажебезума\nаянемогбыбылбыбылбы\nвсебя\n</s>


[216 / 300] Train: Loss = 3.40971, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 153.90it/s]
[216 / 300]   Val: Loss = 4.08306, PPX = 59.33: 100%|██████████| 19/19 [00:00<00:00, 152.48it/s]
[217 / 300] Train: Loss = 3.32832, PPX = 27.89:   4%|▎         | 24/677 [00:00<00:05, 124.20it/s]

Optimizer lr = 1.8466e-126

зачемтыкдвериневтвоиприроде\nнаэтойновыйтричаса\nавыневерюкакнасвете\nиза\n</s>


[217 / 300] Train: Loss = 3.40954, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 154.53it/s]
[217 / 300]   Val: Loss = 4.08194, PPX = 59.26: 100%|██████████| 19/19 [00:00<00:00, 152.12it/s]
[218 / 300] Train: Loss = 3.50820, PPX = 33.39:   4%|▎         | 24/677 [00:00<00:05, 122.65it/s]

Optimizer lr = 4.61649e-127

япомнюкакнаэтикосмос\nнакойнасветевсехмилее\nувасжеяиневернулся\nнея\n</s>


[218 / 300] Train: Loss = 3.40983, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 154.67it/s]
[218 / 300]   Val: Loss = 4.08218, PPX = 59.27: 100%|██████████| 19/19 [00:00<00:00, 156.73it/s]
[219 / 300] Train: Loss = 3.41667, PPX = 30.47:   4%|▎         | 24/677 [00:00<00:05, 127.51it/s]

Optimizer lr = 1.15412e-127

янемогупонятьродная\nмнечтоточтозанейне\nобожечтоянесогласен\nния\n</s>


[219 / 300] Train: Loss = 3.40987, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 156.87it/s]
[219 / 300]   Val: Loss = 4.08073, PPX = 59.19: 100%|██████████| 19/19 [00:00<00:00, 172.22it/s]
[220 / 300] Train: Loss = 3.39736, PPX = 29.89:   4%|▎         | 25/677 [00:00<00:04, 131.72it/s]

Optimizer lr = 2.88531e-128

янемогуястануутром\nунастутнетуиобид\nаэтобылсмоейлюбовью\nнавкус\n</s>


[220 / 300] Train: Loss = 3.40963, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 156.94it/s]
[220 / 300]   Val: Loss = 4.08180, PPX = 59.25: 100%|██████████| 19/19 [00:00<00:00, 154.31it/s]
[221 / 300] Train: Loss = 3.38017, PPX = 29.38:   4%|▎         | 24/677 [00:00<00:05, 123.64it/s]

Optimizer lr = 7.21326e-129

ятаклюблючегожеболе\nиямогудлявастут\nиявначалелетнебуду\nнея\n</s>


[221 / 300] Train: Loss = 3.40963, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 154.01it/s]
[221 / 300]   Val: Loss = 4.08194, PPX = 59.26: 100%|██████████| 19/19 [00:00<00:00, 159.59it/s]
[222 / 300] Train: Loss = 3.40997, PPX = 30.26:   3%|▎         | 23/677 [00:00<00:05, 116.00it/s]

Optimizer lr = 1.80332e-129

иябоюсьвсебеоксане\nидрузьнадваносканету\nяневсвоеймереэточайка\nнея\n</s>


[222 / 300] Train: Loss = 3.40972, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 153.74it/s]
[222 / 300]   Val: Loss = 4.08294, PPX = 59.32: 100%|██████████| 19/19 [00:00<00:00, 156.19it/s]
[223 / 300] Train: Loss = 3.54885, PPX = 34.77:   4%|▎         | 24/677 [00:00<00:05, 124.78it/s]

Optimizer lr = 4.50829e-130

когданибудьуженевзяли\nидажевэтомнемогу\nаниколайвответстобою\nтыправ\n</s>


[223 / 300] Train: Loss = 3.40983, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 154.89it/s]
[223 / 300]   Val: Loss = 4.08233, PPX = 59.28: 100%|██████████| 19/19 [00:00<00:00, 156.65it/s]
[224 / 300] Train: Loss = 3.56662, PPX = 35.40:   4%|▎         | 24/677 [00:00<00:05, 126.64it/s]

Optimizer lr = 1.12707e-130

всеговорятчтомывроссии\nяговорювамнемогу\nинелюблюятожебуду\nнеты\n</s>


[224 / 300] Train: Loss = 3.40974, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 155.46it/s]
[224 / 300]   Val: Loss = 4.08291, PPX = 59.32: 100%|██████████| 19/19 [00:00<00:00, 152.24it/s]
[225 / 300] Train: Loss = 3.45809, PPX = 31.76:   4%|▎         | 25/677 [00:00<00:05, 124.41it/s]

Optimizer lr = 2.81768e-131

вгипотетическойвселенной\nиначалхорошо\nатыприходишьвамнелюбишь\nивдруг\n</s>


[225 / 300] Train: Loss = 3.40971, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 154.20it/s]
[225 / 300]   Val: Loss = 4.08186, PPX = 59.26: 100%|██████████| 19/19 [00:00<00:00, 153.99it/s]
[226 / 300] Train: Loss = 3.39217, PPX = 29.73:   3%|▎         | 23/677 [00:00<00:05, 116.67it/s]

Optimizer lr = 7.0442e-132

враюниразунепоцели\nхотялюблюявамнетру\nаятебевтебежениться\nсутра\n</s>


[226 / 300] Train: Loss = 3.40964, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 157.44it/s]
[226 / 300]   Val: Loss = 4.08210, PPX = 59.27: 100%|██████████| 19/19 [00:00<00:00, 154.87it/s]
[227 / 300] Train: Loss = 3.36706, PPX = 28.99:   4%|▎         | 25/677 [00:00<00:05, 128.38it/s]

Optimizer lr = 1.76105e-132

явиделвамвначалемая\nаясутранеодинок\nидосихпорнедоутрано\nссобой\n</s>


[227 / 300] Train: Loss = 3.40978, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 165.00it/s]
[227 / 300]   Val: Loss = 4.08260, PPX = 59.30: 100%|██████████| 19/19 [00:00<00:00, 169.19it/s]
[228 / 300] Train: Loss = 3.32856, PPX = 27.90:   4%|▎         | 24/677 [00:00<00:05, 127.57it/s]

Optimizer lr = 4.40263e-133

глебговоритнемноговжизни\nноеслинетневиноват\nияпожизнинезанихне\nненой\n</s>


[228 / 300] Train: Loss = 3.40983, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 159.46it/s]
[228 / 300]   Val: Loss = 4.08254, PPX = 59.30: 100%|██████████| 19/19 [00:00<00:00, 153.48it/s]
[229 / 300] Train: Loss = 3.36377, PPX = 28.90:   4%|▎         | 24/677 [00:00<00:05, 125.66it/s]

Optimizer lr = 1.10066e-133

яухожуссебявпостели\nвсёпотомучтотакидаже\nавнёмбылбудеточеньплохо\nвменя\n</s>


[229 / 300] Train: Loss = 3.40957, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 154.38it/s]
[229 / 300]   Val: Loss = 4.07999, PPX = 59.15: 100%|██████████| 19/19 [00:00<00:00, 151.32it/s]
[230 / 300] Train: Loss = 3.50906, PPX = 33.42:   4%|▎         | 25/677 [00:00<00:05, 128.34it/s]

Optimizer lr = 2.75164e-134

яваслюблютебяродная\nчтонемогусказатьвамрад\nизасобойещёиподпись\nнемой\n</s>


[230 / 300] Train: Loss = 3.40971, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 151.35it/s]
[230 / 300]   Val: Loss = 4.08260, PPX = 59.30: 100%|██████████| 19/19 [00:00<00:00, 159.74it/s]
[231 / 300] Train: Loss = 3.13186, PPX = 22.92:   4%|▎         | 25/677 [00:00<00:05, 128.57it/s]

Optimizer lr = 6.87911e-135

янепоймусегоднявгород\nябябылтебявсебепальто\nаэтопростонебывает\nбезсил\n</s>


[231 / 300] Train: Loss = 3.40957, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 158.72it/s]
[231 / 300]   Val: Loss = 4.08207, PPX = 59.27: 100%|██████████| 19/19 [00:00<00:00, 154.07it/s]
[232 / 300] Train: Loss = 3.55350, PPX = 34.94:   3%|▎         | 23/677 [00:00<00:05, 118.57it/s]

Optimizer lr = 1.71978e-135

янехочуячайкавидел\nибольшетакневиноват\nанатебеихдвадцатьвосемь\nневрот\n</s>


[232 / 300] Train: Loss = 3.40962, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 161.02it/s]
[232 / 300]   Val: Loss = 4.08102, PPX = 59.21: 100%|██████████| 19/19 [00:00<00:00, 156.03it/s]
[233 / 300] Train: Loss = 3.42515, PPX = 30.73:   4%|▎         | 25/677 [00:00<00:05, 125.95it/s]

Optimizer lr = 4.29944e-136

когдаябылнестанучто\nяпростосаммогуженой\nаяврагуинехотелось\nнета\n</s>


[233 / 300] Train: Loss = 3.40958, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 157.97it/s]
[233 / 300]   Val: Loss = 4.08161, PPX = 59.24: 100%|██████████| 19/19 [00:00<00:00, 167.40it/s]
[234 / 300] Train: Loss = 3.49158, PPX = 32.84:   4%|▎         | 25/677 [00:00<00:04, 132.75it/s]

Optimizer lr = 1.07486e-136

янемогупрошувасвидел\nнехватитвгородеяваду\nаеслибяинемогуя\nпросмерть\n</s>


[234 / 300] Train: Loss = 3.40984, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 156.80it/s]
[234 / 300]   Val: Loss = 4.08222, PPX = 59.28: 100%|██████████| 19/19 [00:00<00:00, 150.25it/s]
[235 / 300] Train: Loss = 3.30610, PPX = 27.28:   3%|▎         | 23/677 [00:00<00:05, 123.77it/s]

Optimizer lr = 2.68715e-137

всемьенекаждыйкакнасвете\nнонетакаямнесума\nноскрикомвдругегододома\nнавы\n</s>


[235 / 300] Train: Loss = 3.40970, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 154.47it/s]
[235 / 300]   Val: Loss = 4.08286, PPX = 59.31: 100%|██████████| 19/19 [00:00<00:00, 153.79it/s]
[236 / 300] Train: Loss = 3.46040, PPX = 31.83:   4%|▎         | 25/677 [00:00<00:05, 128.03it/s]

Optimizer lr = 6.71788e-138

опятькакойтомнеоксана\nинезаметилвэтомход\nянетебеаэтозначит\nния\n</s>


[236 / 300] Train: Loss = 3.40986, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 154.72it/s]
[236 / 300]   Val: Loss = 4.08245, PPX = 59.29: 100%|██████████| 19/19 [00:00<00:00, 167.01it/s]
[237 / 300] Train: Loss = 3.37609, PPX = 29.26:   4%|▍         | 26/677 [00:00<00:05, 128.12it/s]

Optimizer lr = 1.67947e-138

аркадийхочетбынепонял\nиямогутебемогу\nятакусталиненастолько\nирад\n</s>


[237 / 300] Train: Loss = 3.40956, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 153.70it/s]
[237 / 300]   Val: Loss = 4.08264, PPX = 59.30: 100%|██████████| 19/19 [00:00<00:00, 165.70it/s]
[238 / 300] Train: Loss = 3.27296, PPX = 26.39:   4%|▍         | 26/677 [00:00<00:04, 134.42it/s]

Optimizer lr = 4.19867e-139

тынеповеритеаэто\nсказалвыжнедлянас\nипочемутутнеуверен\nнеда\n</s>


[238 / 300] Train: Loss = 3.40988, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 159.42it/s]
[238 / 300]   Val: Loss = 4.08286, PPX = 59.31: 100%|██████████| 19/19 [00:00<00:00, 156.23it/s]
[239 / 300] Train: Loss = 3.32305, PPX = 27.74:   4%|▎         | 24/677 [00:00<00:05, 124.97it/s]

Optimizer lr = 1.04967e-139

ясдетстваднёмипомервсердце\nневерювдоменемужик\nаяужеинежелаю\nнебрат\n</s>


[239 / 300] Train: Loss = 3.40954, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 156.82it/s]
[239 / 300]   Val: Loss = 4.08193, PPX = 59.26: 100%|██████████| 19/19 [00:00<00:00, 152.95it/s]
[240 / 300] Train: Loss = 3.43800, PPX = 31.12:   4%|▍         | 26/677 [00:00<00:04, 134.87it/s]

Optimizer lr = 2.62417e-140

ясдетстваднейвконцетоннеля\nипонатуренетакуж\nнепьётнебудетвэтомместе\nвговно\n</s>


[240 / 300] Train: Loss = 3.40977, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 160.73it/s]
[240 / 300]   Val: Loss = 4.08185, PPX = 59.26: 100%|██████████| 19/19 [00:00<00:00, 156.25it/s]
[241 / 300] Train: Loss = 3.45570, PPX = 31.68:   4%|▎         | 24/677 [00:00<00:05, 123.04it/s]

Optimizer lr = 6.56043e-141

аэточтотакоестранный\nянемогупонятькруги\nисамссебянетакужбольше\nненой\n</s>


[241 / 300] Train: Loss = 3.40955, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 152.97it/s]
[241 / 300]   Val: Loss = 4.08269, PPX = 59.30: 100%|██████████| 19/19 [00:00<00:00, 149.94it/s]
[242 / 300] Train: Loss = 3.26323, PPX = 26.13:   4%|▎         | 24/677 [00:00<00:05, 122.57it/s]

Optimizer lr = 1.64011e-141

когданибудьиянебуду\nнивчёмможнопростобог\nатовменяужененадо\nсменя\n</s>


[242 / 300] Train: Loss = 3.40977, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 152.94it/s]
[242 / 300]   Val: Loss = 4.08315, PPX = 59.33: 100%|██████████| 19/19 [00:00<00:00, 151.20it/s]
[243 / 300] Train: Loss = 3.18593, PPX = 24.19:   4%|▎         | 24/677 [00:00<00:05, 127.54it/s]

Optimizer lr = 4.10027e-142

накухнечтотоскулаками\nиямогутебекаквины\nчтоневнеёаянепомню\nничем\n</s>


[243 / 300] Train: Loss = 3.40967, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 153.95it/s]
[243 / 300]   Val: Loss = 4.08218, PPX = 59.27: 100%|██████████| 19/19 [00:00<00:00, 171.05it/s]
[244 / 300] Train: Loss = 3.28267, PPX = 26.65:   4%|▎         | 25/677 [00:00<00:05, 128.59it/s]

Optimizer lr = 1.02507e-142

явзеркаленепонимаю\nнонемогучтоббольшевчом\nавывпорядкенетудабы\nнайду\n</s>


[244 / 300] Train: Loss = 3.40981, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 157.12it/s]
[244 / 300]   Val: Loss = 4.08153, PPX = 59.24: 100%|██████████| 19/19 [00:00<00:00, 155.27it/s]
[245 / 300] Train: Loss = 3.47474, PPX = 32.29:   3%|▎         | 23/677 [00:00<00:05, 125.85it/s]

Optimizer lr = 2.56267e-143

яговорюочёмтоочень\nияхочутебялюблю\nночемтоненачтотозначит\nжена\n</s>


[245 / 300] Train: Loss = 3.40969, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 160.16it/s]
[245 / 300]   Val: Loss = 4.08148, PPX = 59.23: 100%|██████████| 19/19 [00:00<00:00, 165.89it/s]
[246 / 300] Train: Loss = 3.36396, PPX = 28.90:   4%|▎         | 25/677 [00:00<00:04, 131.76it/s]

Optimizer lr = 6.40667e-144

янелюблювасзакемдесять\nиянанейтаммышьирот\nакакжехочетсявночной\nвокно\n</s>


[246 / 300] Train: Loss = 3.40954, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 154.52it/s]
[246 / 300]   Val: Loss = 4.08122, PPX = 59.22: 100%|██████████| 19/19 [00:00<00:00, 153.72it/s]
[247 / 300] Train: Loss = 3.45610, PPX = 31.69:   4%|▎         | 24/677 [00:00<00:05, 124.37it/s]

Optimizer lr = 1.60167e-144

каккнамизваснезацели\nянепоймутебялюблю\nиначалячтобылвначале\nвменя\n</s>


[247 / 300] Train: Loss = 3.40958, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 158.23it/s]
[247 / 300]   Val: Loss = 4.08235, PPX = 59.28: 100%|██████████| 19/19 [00:00<00:00, 149.81it/s]
[248 / 300] Train: Loss = 3.46261, PPX = 31.90:   4%|▎         | 24/677 [00:00<00:05, 122.86it/s]

Optimizer lr = 4.00417e-145

извсехизречкивышелвышел\nичтобемунеестнесмог\nномнебынепожизнинужно\nана\n</s>


[248 / 300] Train: Loss = 3.40956, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 149.96it/s]
[248 / 300]   Val: Loss = 4.08202, PPX = 59.27: 100%|██████████| 19/19 [00:00<00:00, 145.93it/s]
[249 / 300] Train: Loss = 3.33857, PPX = 28.18:   4%|▎         | 24/677 [00:00<00:05, 119.68it/s]

Optimizer lr = 1.00104e-145

явдетствебылсовсемнепомню\nивсамомделеянемог\nинемогучтобмнененадо\nвобед\n</s>


[249 / 300] Train: Loss = 3.40958, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 151.62it/s]
[249 / 300]   Val: Loss = 4.08281, PPX = 59.31: 100%|██████████| 19/19 [00:00<00:00, 156.55it/s]
[250 / 300] Train: Loss = 3.36790, PPX = 29.02:   4%|▎         | 25/677 [00:00<00:05, 124.04it/s]

Optimizer lr = 2.5026e-146

апомнишьтанябылокакто\nаснимижизньамнеодна\nаяхотелипонимаю\nваду\n</s>


[250 / 300] Train: Loss = 3.40951, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 155.74it/s]
[250 / 300]   Val: Loss = 4.08242, PPX = 59.29: 100%|██████████| 19/19 [00:00<00:00, 166.04it/s]
[251 / 300] Train: Loss = 3.34839, PPX = 28.46:   4%|▎         | 24/677 [00:00<00:05, 122.39it/s]

Optimizer lr = 6.25651e-147

олегсработынаработу\nивсёжеиногдадавно\nичтотовобщемтовсказке\nсженой\n</s>


[251 / 300] Train: Loss = 3.40975, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 153.71it/s]
[251 / 300]   Val: Loss = 4.08250, PPX = 59.29: 100%|██████████| 19/19 [00:00<00:00, 160.57it/s]
[252 / 300] Train: Loss = 3.37038, PPX = 29.09:   4%|▎         | 25/677 [00:00<00:05, 129.40it/s]

Optimizer lr = 1.56413e-147

вшкафуукрадкойснебакрыши\nснатурыивглазахмозгу\nилишьвглазахубабытоже\nструдом\n</s>


[252 / 300] Train: Loss = 3.40990, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 159.80it/s]
[252 / 300]   Val: Loss = 4.08194, PPX = 59.26: 100%|██████████| 19/19 [00:00<00:00, 164.28it/s]
[253 / 300] Train: Loss = 3.45326, PPX = 31.60:   4%|▎         | 25/677 [00:00<00:05, 128.65it/s]

Optimizer lr = 3.91032e-148

явваслюблютебяпостроил\nноянезнаючтотысам\nнемнеужбольшенеосталось\nнете\n</s>


[253 / 300] Train: Loss = 3.40962, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 163.93it/s]
[253 / 300]   Val: Loss = 4.08242, PPX = 59.29: 100%|██████████| 19/19 [00:00<00:00, 169.89it/s]
[254 / 300] Train: Loss = 3.36495, PPX = 28.93:   4%|▍         | 26/677 [00:00<00:04, 133.03it/s]

Optimizer lr = 9.7758e-149

янемогучемпередвами\nикнеймедведьмояжена\nискаждымднёмеёикцели\nсебя\n</s>


[254 / 300] Train: Loss = 3.40955, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 158.85it/s]
[254 / 300]   Val: Loss = 4.08197, PPX = 59.26: 100%|██████████| 19/19 [00:00<00:00, 155.09it/s]
[255 / 300] Train: Loss = 3.47130, PPX = 32.18:   3%|▎         | 23/677 [00:00<00:05, 121.24it/s]

Optimizer lr = 2.44395e-149

явваслюблючегожеболе\nинемогусказатьсмотреть\nтогдавотиупаловшколу\nнащи\n</s>


[255 / 300] Train: Loss = 3.40942, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 154.29it/s]
[255 / 300]   Val: Loss = 4.08035, PPX = 59.17: 100%|██████████| 19/19 [00:00<00:00, 155.09it/s]
[256 / 300] Train: Loss = 3.50573, PPX = 33.31:   4%|▎         | 24/677 [00:00<00:05, 124.37it/s]

Optimizer lr = 6.10987e-150

наберегуунасвроссии\nянемогучемпотому\nагдежетыменянаужин\nнепей\n</s>


[256 / 300] Train: Loss = 3.40951, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 155.67it/s]
[256 / 300]   Val: Loss = 4.08194, PPX = 59.26: 100%|██████████| 19/19 [00:00<00:00, 151.46it/s]
[257 / 300] Train: Loss = 3.50615, PPX = 33.32:   4%|▎         | 24/677 [00:00<00:05, 127.51it/s]

Optimizer lr = 1.52747e-150

ачтоподелатьеслиэто\nсутраитакнемог\nнаэтихднейянесогласен\nвтебе\n</s>


[257 / 300] Train: Loss = 3.40979, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 155.13it/s]
[257 / 300]   Val: Loss = 4.08250, PPX = 59.29: 100%|██████████| 19/19 [00:00<00:00, 156.05it/s]
[258 / 300] Train: Loss = 3.34249, PPX = 28.29:   4%|▎         | 24/677 [00:00<00:05, 127.26it/s]

Optimizer lr = 3.81867e-151

вотмойлетитнапляже\nаэтобылятакдавно\nноясовздохомвашусчастье\nнезря\n</s>


[258 / 300] Train: Loss = 3.40959, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 153.63it/s]
[258 / 300]   Val: Loss = 4.08260, PPX = 59.30: 100%|██████████| 19/19 [00:00<00:00, 151.67it/s]
[259 / 300] Train: Loss = 3.44345, PPX = 31.29:   4%|▎         | 25/677 [00:00<00:05, 125.37it/s]

Optimizer lr = 9.54668e-152

влучахквартирекаждыйсердце\nабылвокнобольшойипуст\nаможномненасамомделе\nсходил\n</s>


[259 / 300] Train: Loss = 3.40946, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 162.65it/s]
[259 / 300]   Val: Loss = 4.08259, PPX = 59.30: 100%|██████████| 19/19 [00:00<00:00, 157.31it/s]
[260 / 300] Train: Loss = 3.41484, PPX = 30.41:   3%|▎         | 23/677 [00:00<00:05, 118.48it/s]

Optimizer lr = 2.38667e-152

явваслюблючегожеболе\nясдетстванепоймунепойму\nхотябыжитьидоболине\nтыбес\n</s>


[260 / 300] Train: Loss = 3.40953, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 157.55it/s]
[260 / 300]   Val: Loss = 4.08132, PPX = 59.22: 100%|██████████| 19/19 [00:00<00:00, 161.80it/s]
[261 / 300] Train: Loss = 3.41517, PPX = 30.42:   4%|▎         | 25/677 [00:00<00:05, 128.80it/s]

Optimizer lr = 5.96667e-153

вменялунавкостюмемая\nнавсёчтосдавнихпор\nаявесьденьбылвпервыйвечер\nиза\n</s>


[261 / 300] Train: Loss = 3.40993, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 159.41it/s]
[261 / 300]   Val: Loss = 4.08291, PPX = 59.32: 100%|██████████| 19/19 [00:00<00:00, 153.87it/s]
[262 / 300] Train: Loss = 3.35392, PPX = 28.61:   4%|▎         | 24/677 [00:00<00:05, 124.82it/s]

Optimizer lr = 1.49167e-153

отомчтоянепонимаю\nчтоянетакужмноголет\nятакинехотелосьзамуж\nнея\n</s>


[262 / 300] Train: Loss = 3.40973, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 154.65it/s]
[262 / 300]   Val: Loss = 4.08181, PPX = 59.25: 100%|██████████| 19/19 [00:00<00:00, 156.45it/s]
[263 / 300] Train: Loss = 3.45329, PPX = 31.60:   4%|▎         | 25/677 [00:00<00:05, 126.63it/s]

Optimizer lr = 3.72917e-154

люблювоттаквотвотвотраньше\nавотуменячетвёртыйгод\nнеточтобпостоянновжизни\nстоски\n</s>


[263 / 300] Train: Loss = 3.40979, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 155.53it/s]
[263 / 300]   Val: Loss = 4.08294, PPX = 59.32: 100%|██████████| 19/19 [00:00<00:00, 150.66it/s]
[264 / 300] Train: Loss = 3.17030, PPX = 23.81:   4%|▎         | 24/677 [00:00<00:05, 124.88it/s]

Optimizer lr = 9.32293e-155

намойвопросбылвэтойжизни\nолегссобойнаберегу\nисталчитатькакмноговжизни\nвгостях\n</s>


[264 / 300] Train: Loss = 3.40968, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 153.49it/s]
[264 / 300]   Val: Loss = 4.08179, PPX = 59.25: 100%|██████████| 19/19 [00:00<00:00, 156.78it/s]
[265 / 300] Train: Loss = 3.47096, PPX = 32.17:   4%|▎         | 24/677 [00:00<00:05, 125.32it/s]

Optimizer lr = 2.33073e-155

аеслитыдавнонеможет\nябудурадноонвтебя\nавотивсёотэтихделать\nвтебя\n</s>


[265 / 300] Train: Loss = 3.40962, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 152.44it/s]
[265 / 300]   Val: Loss = 4.08255, PPX = 59.30: 100%|██████████| 19/19 [00:00<00:00, 154.33it/s]
[266 / 300] Train: Loss = 3.45648, PPX = 31.71:   4%|▍         | 27/677 [00:00<00:05, 123.82it/s]

Optimizer lr = 5.82683e-156

сутрапораньшескрикомна</s>


[266 / 300] Train: Loss = 3.40976, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 152.26it/s]
[266 / 300]   Val: Loss = 4.08290, PPX = 59.32: 100%|██████████| 19/19 [00:00<00:00, 155.81it/s]
[267 / 300] Train: Loss = 3.48290, PPX = 32.55:   3%|▎         | 23/677 [00:00<00:05, 125.57it/s]

Optimizer lr = 1.45671e-156

нучтожможнобезсмерти\nяпростонехочубытьрад\nвитогеянетенетолько\nния\n</s>


[267 / 300] Train: Loss = 3.40978, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 155.19it/s]
[267 / 300]   Val: Loss = 4.08304, PPX = 59.33: 100%|██████████| 19/19 [00:00<00:00, 152.94it/s]
[268 / 300] Train: Loss = 3.42620, PPX = 30.76:   4%|▎         | 24/677 [00:00<00:05, 124.78it/s]

Optimizer lr = 3.64177e-157

унасвпалатеуоксаны\nчтовнервныйразвсеголишьгод\nауменявдругойконцовне\nнесмог\n</s>


[268 / 300] Train: Loss = 3.40961, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 155.17it/s]
[268 / 300]   Val: Loss = 4.08248, PPX = 59.29: 100%|██████████| 19/19 [00:00<00:00, 154.26it/s]
[269 / 300] Train: Loss = 3.46549, PPX = 31.99:   4%|▎         | 25/677 [00:00<00:05, 128.41it/s]

Optimizer lr = 9.10442e-158

влучахгрудивселенной\nкаквтонмедведьидёткврачу\nнаужинятамподногами\nвметро\n</s>


[269 / 300] Train: Loss = 3.40963, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 155.14it/s]
[269 / 300]   Val: Loss = 4.08243, PPX = 59.29: 100%|██████████| 19/19 [00:00<00:00, 150.45it/s]
[270 / 300] Train: Loss = 3.35191, PPX = 28.56:   4%|▎         | 24/677 [00:00<00:05, 124.64it/s]

Optimizer lr = 2.2761e-158

мыстольколетсовсемнемною\nнукакжевынемнель\nвмоюль\n</s>


[270 / 300] Train: Loss = 3.40977, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 150.19it/s]
[270 / 300]   Val: Loss = 4.08191, PPX = 59.26: 100%|██████████| 19/19 [00:00<00:00, 145.44it/s]
[271 / 300] Train: Loss = 3.48063, PPX = 32.48:   3%|▎         | 22/677 [00:00<00:05, 112.87it/s]

Optimizer lr = 5.69026e-159

вмоихглазахседыелето\nизгодавгодизнихневчём\nятольколишьнаэтомсвете\nнеты\n</s>


[271 / 300] Train: Loss = 3.40967, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 152.95it/s]
[271 / 300]   Val: Loss = 4.08095, PPX = 59.20: 100%|██████████| 19/19 [00:00<00:00, 160.14it/s]
[272 / 300] Train: Loss = 3.31324, PPX = 27.47:   4%|▎         | 24/677 [00:00<00:05, 123.06it/s]

Optimizer lr = 1.42257e-159

вчасденькогдатымнеприснился\nисразусталохорошо\nясталнеголовуавотне\nнуда\n</s>


[272 / 300] Train: Loss = 3.40961, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 153.42it/s]
[272 / 300]   Val: Loss = 4.08152, PPX = 59.24: 100%|██████████| 19/19 [00:00<00:00, 152.47it/s]
[273 / 300] Train: Loss = 3.49847, PPX = 33.06:   3%|▎         | 23/677 [00:00<00:05, 124.40it/s]

Optimizer lr = 3.55641e-160

ктотамгдераньшевэтоммире\nябвамнемогусказатьбно\nнезналчтохочешьжизньневсилах\nитак\n</s>


[273 / 300] Train: Loss = 3.40978, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 156.35it/s]
[273 / 300]   Val: Loss = 4.08201, PPX = 59.26: 100%|██████████| 19/19 [00:00<00:00, 152.14it/s]
[274 / 300] Train: Loss = 3.35679, PPX = 28.70:   4%|▎         | 24/677 [00:00<00:05, 122.87it/s]

Optimizer lr = 8.89103e-161

влесунаольгувсёслучилось\nивэтомнужнобыловсё\nивашиснаминебывает\nваду\n</s>


[274 / 300] Train: Loss = 3.40964, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 154.74it/s]
[274 / 300]   Val: Loss = 4.08282, PPX = 59.31: 100%|██████████| 19/19 [00:00<00:00, 154.59it/s]
[275 / 300] Train: Loss = 3.31175, PPX = 27.43:   3%|▎         | 22/677 [00:00<00:06, 107.74it/s]

Optimizer lr = 2.22276e-161

отсловибудетнехватает\nивэтотразеётайком\nисновапроситнехватает\nвгостях\n</s>


[275 / 300] Train: Loss = 3.40961, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 156.64it/s]
[275 / 300]   Val: Loss = 4.08257, PPX = 59.30: 100%|██████████| 19/19 [00:00<00:00, 172.70it/s]
[276 / 300] Train: Loss = 3.49910, PPX = 33.09:   4%|▎         | 25/677 [00:00<00:05, 128.84it/s]

Optimizer lr = 5.5569e-162

яналунуиванпетрович\nяпонялчтостобойногой\nипостепенноначинает\nвводе\n</s>


[276 / 300] Train: Loss = 3.40944, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 162.97it/s]
[276 / 300]   Val: Loss = 4.08215, PPX = 59.27: 100%|██████████| 19/19 [00:00<00:00, 169.26it/s]
[277 / 300] Train: Loss = 3.64114, PPX = 38.14:   4%|▍         | 26/677 [00:00<00:04, 133.44it/s]

Optimizer lr = 1.38922e-162

янемогуамывэтовремя\nвсеэтиносомвмагазин\nанаглазахтакиебабы\nввине\n</s>


[277 / 300] Train: Loss = 3.40963, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 164.13it/s]
[277 / 300]   Val: Loss = 4.08109, PPX = 59.21: 100%|██████████| 19/19 [00:00<00:00, 171.37it/s]
[278 / 300] Train: Loss = 3.35900, PPX = 28.76:   4%|▎         | 25/677 [00:00<00:05, 129.95it/s]

Optimizer lr = 3.47306e-163

женаидетиэтомама\nядаженемогудетей\nноятосороктысячгода\nвпальто\n</s>


[278 / 300] Train: Loss = 3.40957, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 163.37it/s]
[278 / 300]   Val: Loss = 4.08284, PPX = 59.31: 100%|██████████| 19/19 [00:00<00:00, 170.11it/s]
[279 / 300] Train: Loss = 3.32977, PPX = 27.93:   4%|▎         | 25/677 [00:00<00:05, 128.74it/s]

Optimizer lr = 8.68265e-164

извсехменяроднаятолько\nненадобылочтоестьвтом\nневбровьянебылпервый\nвтоске\n</s>


[279 / 300] Train: Loss = 3.40963, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 160.08it/s]
[279 / 300]   Val: Loss = 4.08198, PPX = 59.26: 100%|██████████| 19/19 [00:00<00:00, 151.50it/s]
[280 / 300] Train: Loss = 3.46385, PPX = 31.94:   4%|▎         | 24/677 [00:00<00:05, 125.67it/s]

Optimizer lr = 2.17066e-164

накухнескрышиневернулся\nинепоймучтоонзаней\nнеточтобразнемогмнетоже\nнея\n</s>


[280 / 300] Train: Loss = 3.40975, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 156.49it/s]
[280 / 300]   Val: Loss = 4.08314, PPX = 59.33: 100%|██████████| 19/19 [00:00<00:00, 153.83it/s]
[281 / 300] Train: Loss = 3.38943, PPX = 29.65:   4%|▎         | 24/677 [00:00<00:05, 128.64it/s]

Optimizer lr = 5.42666e-165

наденьсвятоговалентина\nвночиупетраитакужплох\nчтоонменязаточтобольше\nнеты\n</s>


[281 / 300] Train: Loss = 3.40959, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 153.34it/s]
[281 / 300]   Val: Loss = 4.08235, PPX = 59.28: 100%|██████████| 19/19 [00:00<00:00, 150.72it/s]
[282 / 300] Train: Loss = 3.40904, PPX = 30.24:   4%|▎         | 24/677 [00:00<00:05, 124.46it/s]

Optimizer lr = 1.35666e-165

апомнишькактутходит\nясмысльюнехватало\nнонемогутеперьнаместе\nавпуть\n</s>


[282 / 300] Train: Loss = 3.40964, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 152.50it/s]
[282 / 300]   Val: Loss = 4.08348, PPX = 59.35: 100%|██████████| 19/19 [00:00<00:00, 154.47it/s]
[283 / 300] Train: Loss = 3.56943, PPX = 35.50:   4%|▎         | 24/677 [00:00<00:05, 123.37it/s]

Optimizer lr = 3.39166e-166

втотденькогдатымнеприснился\nиясогласензаспиной\nночтотовэтотвиде\nвокно\n</s>


[283 / 300] Train: Loss = 3.40988, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 155.59it/s]
[283 / 300]   Val: Loss = 4.08238, PPX = 59.29: 100%|██████████| 19/19 [00:00<00:00, 157.21it/s]
[284 / 300] Train: Loss = 3.59220, PPX = 36.31:   4%|▎         | 24/677 [00:00<00:05, 126.40it/s]

Optimizer lr = 8.47915e-167

стобойимыегонестало\nиневответнизасобой\nиниоднойизнихнестанет\nнида\n</s>


[284 / 300] Train: Loss = 3.40967, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 156.08it/s]
[284 / 300]   Val: Loss = 4.08239, PPX = 59.29: 100%|██████████| 19/19 [00:00<00:00, 151.76it/s]
[285 / 300] Train: Loss = 3.44204, PPX = 31.25:   4%|▎         | 25/677 [00:00<00:05, 124.40it/s]

Optimizer lr = 2.11979e-167

впорядкевсёпрекраснодело\nмнепростодолженбытькаклох\nатысовсемкактакипервый\nкакжаль\n</s>


[285 / 300] Train: Loss = 3.40966, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 155.31it/s]
[285 / 300]   Val: Loss = 4.08204, PPX = 59.27: 100%|██████████| 19/19 [00:00<00:00, 155.21it/s]
[286 / 300] Train: Loss = 3.49679, PPX = 33.01:   3%|▎         | 23/677 [00:00<00:05, 124.34it/s]

Optimizer lr = 5.29947e-168

тыпомнишькаквымнесказали\nненадоснейнетакужплох\nнокакобычноонвответна\nменя\n</s>


[286 / 300] Train: Loss = 3.40959, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 155.72it/s]
[286 / 300]   Val: Loss = 4.08299, PPX = 59.32: 100%|██████████| 19/19 [00:00<00:00, 155.66it/s]
[287 / 300] Train: Loss = 3.54291, PPX = 34.57:   4%|▎         | 24/677 [00:00<00:05, 124.09it/s]

Optimizer lr = 1.32487e-168

нетакужмноговжизнинужно\nянемогутебесказать\nкакякричуявижувнебо\nоттить\n</s>


[287 / 300] Train: Loss = 3.40949, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 154.54it/s]
[287 / 300]   Val: Loss = 4.08205, PPX = 59.27: 100%|██████████| 19/19 [00:00<00:00, 157.11it/s]
[288 / 300] Train: Loss = 3.37296, PPX = 29.16:   4%|▎         | 24/677 [00:00<00:05, 124.35it/s]

Optimizer lr = 3.31217e-169

аэточтозачтоаркадий\nнеможетдобрыйвотужгод\nнотыжнебылотакинеот\nизагс\n</s>


[288 / 300] Train: Loss = 3.40971, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 152.45it/s]
[288 / 300]   Val: Loss = 4.08276, PPX = 59.31: 100%|██████████| 19/19 [00:00<00:00, 174.72it/s]
[289 / 300] Train: Loss = 3.45477, PPX = 31.65:   4%|▎         | 25/677 [00:00<00:05, 127.69it/s]

Optimizer lr = 8.28042e-170

намигявижумирвпорядке\nкаквдругтычтоещёнемог\nавасвитогебылоб\nнепей\n</s>


[289 / 300] Train: Loss = 3.40972, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 156.63it/s]
[289 / 300]   Val: Loss = 4.08336, PPX = 59.34: 100%|██████████| 19/19 [00:00<00:00, 157.87it/s]
[290 / 300] Train: Loss = 3.49783, PPX = 33.04:   4%|▎         | 24/677 [00:00<00:05, 123.76it/s]

Optimizer lr = 2.07011e-170

япредтобойвчератывшколу\nнетакужбольшенелюблю\nянемогусебенапамять\nсижу\n</s>


[290 / 300] Train: Loss = 3.40971, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 154.83it/s]
[290 / 300]   Val: Loss = 4.08168, PPX = 59.24: 100%|██████████| 19/19 [00:00<00:00, 153.20it/s]
[291 / 300] Train: Loss = 3.51224, PPX = 33.52:   4%|▎         | 25/677 [00:00<00:05, 125.90it/s]

Optimizer lr = 5.17526e-171

оксанавечеромкоксане\nивэтомсамомделестих\nиначаллучшейотоксаны\nвкровать\n</s>


[291 / 300] Train: Loss = 3.40956, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 159.95it/s]
[291 / 300]   Val: Loss = 4.08246, PPX = 59.29: 100%|██████████| 19/19 [00:00<00:00, 155.76it/s]
[292 / 300] Train: Loss = 3.26614, PPX = 26.21:   4%|▎         | 25/677 [00:00<00:05, 125.08it/s]

Optimizer lr = 1.29382e-171

ненужноинечеловеком\nненадобылобневхрам\nатамстобойвпостели\nивпах\n</s>


[292 / 300] Train: Loss = 3.40968, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 156.61it/s]
[292 / 300]   Val: Loss = 4.08126, PPX = 59.22: 100%|██████████| 19/19 [00:00<00:00, 170.10it/s]
[293 / 300] Train: Loss = 3.39321, PPX = 29.76:   4%|▎         | 25/677 [00:00<00:05, 127.24it/s]

Optimizer lr = 3.23454e-172

аркадийвкартысоксаной\nиговоритондоутра\nавыкакразбымнестобою\nия\n</s>


[293 / 300] Train: Loss = 3.40970, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 157.78it/s]
[293 / 300]   Val: Loss = 4.08161, PPX = 59.24: 100%|██████████| 19/19 [00:00<00:00, 154.85it/s]
[294 / 300] Train: Loss = 3.34242, PPX = 28.29:   4%|▎         | 24/677 [00:00<00:05, 122.92it/s]

Optimizer lr = 8.08635e-173

мневдетствеснегещенебойся\nдлянасженикогданесмог\nаянаихродился\nсутра\n</s>


[294 / 300] Train: Loss = 3.40976, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 150.56it/s]
[294 / 300]   Val: Loss = 4.08153, PPX = 59.24: 100%|██████████| 19/19 [00:00<00:00, 149.97it/s]
[295 / 300] Train: Loss = 3.40839, PPX = 30.22:   4%|▎         | 24/677 [00:00<00:05, 126.86it/s]

Optimizer lr = 2.02159e-173

яваспрошунепонимаю\nдатыжесамтакойиэти\nномневсёэтовсётаки\nнатреть\n</s>


[295 / 300] Train: Loss = 3.40963, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 157.52it/s]
[295 / 300]   Val: Loss = 4.08232, PPX = 59.28: 100%|██████████| 19/19 [00:00<00:00, 160.11it/s]
[296 / 300] Train: Loss = 3.38923, PPX = 29.64:   4%|▍         | 26/677 [00:00<00:04, 134.03it/s]

Optimizer lr = 5.05397e-174

ятакхочубытьстануутром\nвесьденьиникогоневтот\nпишувчёмразвначалелета\nвсети\n</s>


[296 / 300] Train: Loss = 3.40966, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 155.45it/s]
[296 / 300]   Val: Loss = 4.08182, PPX = 59.25: 100%|██████████| 19/19 [00:00<00:00, 167.54it/s]
[297 / 300] Train: Loss = 3.38780, PPX = 29.60:   4%|▎         | 25/677 [00:00<00:05, 128.13it/s]

Optimizer lr = 1.26349e-174

вметронаместесмотритвполночь\nзанимябылсказалчтомы\nкаквдругнавидтыэтотам\nвменя\n</s>


[297 / 300] Train: Loss = 3.40994, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 153.96it/s]
[297 / 300]   Val: Loss = 4.08207, PPX = 59.27: 100%|██████████| 19/19 [00:00<00:00, 162.23it/s]
[298 / 300] Train: Loss = 3.43564, PPX = 31.05:   4%|▎         | 25/677 [00:00<00:05, 129.98it/s]

Optimizer lr = 3.15873e-175

янехочусегоднявкосмос\nвнёмбылооченьвсёравно\nичтостобоюделатьдаже\nкакты\n</s>


[298 / 300] Train: Loss = 3.40969, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 155.32it/s]
[298 / 300]   Val: Loss = 4.08331, PPX = 59.34: 100%|██████████| 19/19 [00:00<00:00, 154.96it/s]
[299 / 300] Train: Loss = 3.36825, PPX = 29.03:   4%|▎         | 24/677 [00:00<00:05, 124.70it/s]

Optimizer lr = 7.89683e-176

ачтотымненевидел\nявнихвассутранесталпоэт\nязатебяяпонимаю\nисвой\n</s>


[299 / 300] Train: Loss = 3.40956, PPX = 30.25: 100%|██████████| 677/677 [00:04<00:00, 156.39it/s]
[299 / 300]   Val: Loss = 4.08120, PPX = 59.22: 100%|██████████| 19/19 [00:00<00:00, 158.45it/s]
[300 / 300] Train: Loss = 3.27135, PPX = 26.35:   4%|▎         | 24/677 [00:00<00:05, 124.71it/s]

Optimizer lr = 1.97421e-176

тымнесовсемнесомужчины\nаяопятьневиноват\nнотынеможеттакстобою\nая\n</s>


[300 / 300] Train: Loss = 3.40970, PPX = 30.26: 100%|██████████| 677/677 [00:04<00:00, 153.70it/s]
[300 / 300]   Val: Loss = 4.08263, PPX = 59.30: 100%|██████████| 19/19 [00:00<00:00, 150.12it/s]


Optimizer lr = 4.93552e-177

втотденькогдатымнеприснился\nянемогувамнемогу\nянеостатьсябытьнеможет\nнея\n</s>


### Добавление информации в выборку

Сейчас у нас каждое слово предствляется одним индексом. Модели очень сложно узнать, сколько в нем слогов - а значит, сложно генерировать корректное стихотворение.

На самом деле к каждому слову можно приписать кусочек из метрического шаблона:

![](https://hsto.org/web/59a/b39/bd0/59ab39bd020c49a78a12cbab62c80181.png " ")

**Задание** Обновите функцию `read_poem`, пусть она генерирует два списка - список слов и список кусков шаблона.  
Добавьте в модель вход - последовательности шаблонов, конкатенируйте их эмбеддинги со словами.  
Дополнительная идея - заставьте модель угадывать, какой шаблон должен идти следующим (где-то половина будет подходящими, остальные - нет). Добавьте дополнительные потери от угадывания шаблона.

In [317]:
import numpy as np
def split_pattern(p, _list):
    #assert len(p) >= sum(_list)
    _list = np.concatenate([[0], np.cumsum(_list)])
    return [p[_list[i]: _list[i+1]] for i in range(len(_list) - 1)]

split_pattern("sfwdfwfwe", [2,3,4])

['sf', 'wdf', 'wfwe']

In [318]:
def vowels_cout(word):
    return sum([1 if c in vowels else 0 for c in word])

def read_poem(path, P):
    poem = []
    parts = []
    with open(path, encoding='utf8') as f:
        i = 0
        for line in f:
            line = line.rstrip()
            if len(line) == 0:
                yield poem, parts
                poem = []
                parts = []
                continue
            
            pattern = P
            pattern_split = split_pattern(
                pattern, 
                [
                    vowels_cout(word) 
                    for word in line.split()
                ]
            )
            #pattern_split = [x if x != '' else '-' for x in pattern_split]
            parts.extend(pattern_split + ['\\n'])
            poem.extend(line.split() + ['\\n'])
            
perashki = list(read_poem('perashki.txt', odd_pattern))
poroshki = list(read_poem('poroshki.txt', even_pattern))

In [319]:
from torchtext.data import Field, Example, Dataset, BucketIterator

text_field = Field(init_token='<s>', eos_token='</s>')
pattern_field = Field(init_token='<s>', eos_token='</s>')
        
fields = [('text', text_field), ('pattern', pattern_field)]
examples = [Example.fromlist([poem[0], poem[1]], fields) for poem in poroshki]
dataset = Dataset(examples, fields)

text_field.build_vocab(dataset, min_freq=7)
pattern_field.build_vocab(dataset, min_freq = 1)

print('Vocab size =', len(text_field.vocab))
print('Vocab size =', len(pattern_field.vocab))
train_dataset, test_dataset = dataset.split(split_ratio=0.9)

train_iter, test_iter = BucketIterator.splits(datasets=(train_dataset, test_dataset), batch_sizes=(32, 128), 
                                              shuffle=True, device=DEVICE, sort=False)

Vocab size = 6298
Vocab size = 21


In [320]:
i = 1
batch.pattern.shape
seq = batch.pattern[:,i]
print([pattern_field.vocab.itos[x] for x in seq])
seq = batch.text[:,i]
print([text_field.vocab.itos[x] for x in seq])

['<s>', '-+', '-', '+-', '+', '-+', '\\n', '', '-+-', '+', '-+-+', '\\n', '-', '+', '-+-', '+-+', '\\n', '', '-+', '\\n', '</s>', '<pad>', '<pad>', '<pad>']
['<s>', 'илья', 'не', 'хочет', 'быть', '<unk>', '\\n', 'в', '<unk>', '<unk>', '<unk>', '\\n', 'и', 'не', 'подходят', '<unk>', '\\n', 'к', '<unk>', '\\n', '</s>', '<pad>', '<pad>', '<pad>']


In [321]:
class LMModelv3(nn.Module):
    def __init__(self, vocab_size, vocab_size_pattern, emb_dim=256, emb_dim_pattern=256, lstm_hidden_dim=256, num_layers=1):
        super().__init__()

        self._emb = nn.Embedding(vocab_size, emb_dim)
        self._emb_pattern = nn.Embedding(vocab_size_pattern, emb_dim_pattern)
        self._rnn = nn.LSTM(input_size=emb_dim + emb_dim_pattern, hidden_size=lstm_hidden_dim)
        self._out_layer = nn.Linear(lstm_hidden_dim, vocab_size)
        self._out_layer_pattern = nn.Linear(lstm_hidden_dim, vocab_size_pattern)
        
        self._init_weights()
        #self._out_layer.weight = self._emb.weight

    def _init_weights(self, init_range=0.1):
        self._emb.weight.data.uniform_(-init_range, init_range)
        self._out_layer.bias.data.zero_()
        self._out_layer.weight.data.uniform_(-init_range, init_range)
        
        self._emb_pattern.weight.data.uniform_(-init_range, init_range)
        self._out_layer_pattern.bias.data.zero_()
        self._out_layer.weight.data.uniform_(-init_range, init_range)

    def forward(self, inputs, patterns, hidden=None):
        x = self._emb(inputs)
        y = self._emb_pattern(patterns)
        x = torch.cat([x,y], dim = 2)
        x, hidden = self._rnn(x, hidden)
        out = self._out_layer(x)
        out1 = self._out_layer_pattern(x)
        return out, out1, hidden

In [322]:
batch = next(iter(train_iter))
model = LMModelv3(
    vocab_size=len(train_iter.dataset.fields['text'].vocab),
    vocab_size_pattern=len(train_iter.dataset.fields['pattern'].vocab)).to(DEVICE)

x = model(batch.text, batch.pattern)
print(x[0].shape, x[1].shape)

torch.Size([24, 32, 6298]) torch.Size([24, 32, 21])


In [323]:
def generate(model, temp=0.6):
    model.eval()
    with torch.no_grad():        
        prev_token = train_iter.dataset.fields['text'].vocab.stoi['<s>']
        prev_pattern = train_iter.dataset.fields['pattern'].vocab.stoi['<s>']
        
        end_token = train_iter.dataset.fields['text'].vocab.stoi['</s>']
        end_pattern = train_iter.dataset.fields['pattern'].vocab.stoi['</s>']
        #print(prev_token)
        
        hidden = None
        for _ in range(150):
            probs_text, probs_pattern, hidden = model(LongTensor([[prev_token]]), LongTensor([[prev_pattern]]), hidden)
            prev_token = sample(probs_text, temp)
            prev_pattern = sample(probs_pattern, temp)
            print(train_iter.dataset.fields['text'].vocab.itos[prev_token], end='')
            #print(train_iter.dataset.fields['pattern'].vocab.itos[prev_pattern], end='')
            
            if prev_token == end_token:
                return
#             if prev_pattern == end_pattern:
#                 return
                
generate(model)

тайнзачатьшапкизнайуткалучшекаждымджонзлодейлюдьмичетвёртыйнязениттоварищипридётсякрышеобщемребятбуратиноневесткнигаособопромолвилбобрыжеланьенебетёщабюджетрассветеосланеплохострахаположилбровейцеоткрытыйдвиженьязапойпроводитзабралигакингпрохожихпредставитьступайтэконтрольныйдругомукустонесинатюрмортминусоказалсявоетворонстарухулыбкойеврейоткрытымвпрочемлисувинилзаявляетглазамрельсахайпадшучуидёшьнебесныйиграянадоеламилыйвдвойнеполюртыбольнасомненьяанфастенейотраженьепилаповарполялитраженщиныутромподрядврагсдаётсяглухойухомпианисткарядоммытьсемёнаписалскажешьрубашкаитогволчоквызвалкняжначестипредлагаетчасыутюгстохамвойтизаводитсвязьфомарусикоролевачелоплывётстучитмеломштирлицзнайнадёжнотудаохотугрекисидятатуполапожарколялюбуюсьнервноушейоксанасугробнебесныйвасяночнойвначалеэтомузлодейждетделаюбровиподписьпредлагаетпиратстатуссыгралтамбовзять

In [324]:
import math
from tqdm import tqdm
tqdm.get_lock().locks = []


def do_epoch(model, criterion, data_iter, unk_idx, pad_idx, optimizer=None, name=None):
    epoch_loss = 0
    
    is_train = not optimizer is None
    name = name or ''
    model.train(is_train)
    
    batches_count = len(data_iter)
    
    pad_idx_p = train_iter.dataset.fields['pattern'].vocab.stoi['<pad>']
    unk_idx_p = train_iter.dataset.fields['pattern'].vocab.stoi['<unk>']
    
    with torch.autograd.set_grad_enabled(is_train):
        with tqdm(total=batches_count) as progress_bar:
            for i, batch in enumerate(data_iter):                
                logits, patterns, _ = model(batch.text, batch.pattern)

                targets_text = torch.cat(
                    [
                        batch.text[1:], batch.text.new_ones((1, batch.text.shape[1]))
                    ]
                )
                
                targets_pat = torch.cat(
                    [
                        batch.pattern[1:], batch.pattern.new_ones((1, batch.pattern.shape[1]))
                    ]
                )
                
                loss1 = criterion(logits.view(-1, logits.shape[-1]), targets_text.view(-1))
                loss2 = criterion(patterns.view(-1, patterns.shape[-1]), targets_pat.view(-1))
                
                mask1 = (1 - ((targets_text.view(-1) == unk_idx) + (targets_text.view(-1) == pad_idx))).float().cuda()
                mask2 = (1 - ((targets_pat.view(-1) == unk_idx_p) + (targets_pat.view(-1) == pad_idx_p))).float().cuda()
                
                loss1 = (loss1 * mask1).sum() / mask1.sum()
                loss2 = (loss2 * mask2).sum() / mask2.sum()
                loss = loss1 #+ loss2

                epoch_loss += loss.item()
                if optimizer:
                    optimizer.zero_grad()
                    loss.backward()
                    nn.utils.clip_grad_norm_(model.parameters(), 1.)
                    optimizer.step()

                progress_bar.update()
                progress_bar.set_description('{:>5s} Loss = {:.5f}, PPX = {:.2f}'.format(name, loss.item(), 
                                                                                         math.exp(loss.item())))
                
            progress_bar.set_description('{:>5s} Loss = {:.5f}, PPX = {:.2f}'.format(
                name, 
                epoch_loss / batches_count, 
                math.exp(epoch_loss / batches_count)
            )
            )
            progress_bar.refresh()

    return epoch_loss / batches_count


def fit(model, criterion, optimizer, train_iter, epochs_count=1, unk_idx=0, pad_idx=1, val_iter=None):
    best_val_loss = None
    for epoch in range(epochs_count):
        name_prefix = '[{} / {}] '.format(epoch + 1, epochs_count)
        train_loss = do_epoch(model, criterion, train_iter, unk_idx, pad_idx, optimizer, name_prefix + 'Train:')
        
        if not val_iter is None:
            val_loss = do_epoch(model, criterion, val_iter, unk_idx, pad_idx, None, name_prefix + '  Val:')
            
            if best_val_loss and val_loss > best_val_loss:
                optimizer.param_groups[0]['lr'] /= 4.
                print('Optimizer lr = {:g}'.format(optimizer.param_groups[0]['lr']))
            else:
                best_val_loss = val_loss
        print()
        generate(model)
        print()

In [325]:
model = LMModelv3(
    vocab_size=len(train_iter.dataset.fields['text'].vocab),
    vocab_size_pattern=len(train_iter.dataset.fields['pattern'].vocab)).to(DEVICE)

pad_idx = train_iter.dataset.fields['text'].vocab.stoi['<pad>']
unk_idx = train_iter.dataset.fields['text'].vocab.stoi['<unk>']
criterion = nn.CrossEntropyLoss(reduction='none').to(DEVICE)

optimizer = optim.SGD(model.parameters(), lr=20., weight_decay=1e-6)

fit(model, criterion, optimizer, train_iter, epochs_count=30, unk_idx=unk_idx, pad_idx=pad_idx, val_iter=test_iter)

[1 / 30] Train: Loss = 5.00039, PPX = 148.47: 100%|██████████| 677/677 [00:05<00:00, 129.34it/s]
[1 / 30]   Val: Loss = 4.41071, PPX = 82.33: 100%|██████████| 19/19 [00:00<00:00, 125.54it/s]
[2 / 30] Train: Loss = 4.72636, PPX = 112.88:   3%|▎         | 21/677 [00:00<00:06, 105.55it/s]


аввасвночь\nвресторане\n</s>


[2 / 30] Train: Loss = 4.29092, PPX = 73.03: 100%|██████████| 677/677 [00:05<00:00, 131.32it/s]
[2 / 30]   Val: Loss = 4.14415, PPX = 63.06: 100%|██████████| 19/19 [00:00<00:00, 125.17it/s]
[3 / 30] Train: Loss = 3.87826, PPX = 48.34:   3%|▎         | 21/677 [00:00<00:06, 109.01it/s]


кактыэто\nатоябзначит\nза\nивдень\nне\n</s>


[3 / 30] Train: Loss = 4.01747, PPX = 55.56: 100%|██████████| 677/677 [00:05<00:00, 129.76it/s]
[3 / 30]   Val: Loss = 4.02892, PPX = 56.20: 100%|██████████| 19/19 [00:00<00:00, 125.71it/s]
[4 / 30] Train: Loss = 3.95667, PPX = 52.28:   2%|▏         | 14/677 [00:00<00:06, 105.31it/s]


тынемог\nияне\nиразвдрузьяянето\nкакявгод\nвгодты\nвгодвнёмночью\nскем\nвнём\nяв\nтеловвнём\nчтоочень\nвгодчтоя\nвльто\nне\nявмирнезнаю\nвнём\n</s>


[4 / 30] Train: Loss = 4.03682, PPX = 56.65: 100%|██████████| 677/677 [00:05<00:00, 131.28it/s]
[4 / 30]   Val: Loss = 4.00713, PPX = 54.99: 100%|██████████| 19/19 [00:00<00:00, 130.46it/s]
[5 / 30] Train: Loss = 3.91788, PPX = 50.29:   3%|▎         | 21/677 [00:00<00:06, 108.34it/s]


вденьв\nивдругв\nивденьдомспор\nи\n</s>


[5 / 30] Train: Loss = 3.79535, PPX = 44.49: 100%|██████████| 677/677 [00:05<00:00, 129.85it/s]
[5 / 30]   Val: Loss = 3.96058, PPX = 52.49: 100%|██████████| 19/19 [00:00<00:00, 128.95it/s]
[6 / 30] Train: Loss = 3.56802, PPX = 35.45:   3%|▎         | 20/677 [00:00<00:06, 106.54it/s]


аясгодамиглеба\nсвас\nчтотозаэто\nнеякогда\n</s>


[6 / 30] Train: Loss = 3.74739, PPX = 42.41: 100%|██████████| 677/677 [00:05<00:00, 131.91it/s]
[6 / 30]   Val: Loss = 3.96189, PPX = 52.56: 100%|██████████| 19/19 [00:00<00:00, 128.83it/s]
[7 / 30] Train: Loss = 3.67279, PPX = 39.36:   2%|▏         | 13/677 [00:00<00:06, 108.12it/s]

Optimizer lr = 5

мыздесьэтокашу\nдавестьсемью\nмытутмышь\nчтоячайсней\nновгодне\nдобраспросилхреналетабред\nновотсвотнетне\nновротнет\nяненезмеябжальбввгодучас\nивпятьлет\nнесчтовразяввзором\nнамать\n</s>


[7 / 30] Train: Loss = 3.45810, PPX = 31.76: 100%|██████████| 677/677 [00:05<00:00, 129.96it/s]
[7 / 30]   Val: Loss = 3.92578, PPX = 50.69: 100%|██████████| 19/19 [00:00<00:00, 121.97it/s]
[8 / 30] Train: Loss = 3.40247, PPX = 30.04:   3%|▎         | 18/677 [00:00<00:06, 105.35it/s]


иныеу\nлишьочень\nвнашвопрос\nкогдавремя\nлишьонавтригодаив\nсказалапришвиншок\nто\nикак\nвдругбосс\nяналучшийсорок\n</s>


[8 / 30] Train: Loss = 3.39395, PPX = 29.78: 100%|██████████| 677/677 [00:05<00:00, 131.66it/s]
[8 / 30]   Val: Loss = 3.92822, PPX = 50.82: 100%|██████████| 19/19 [00:00<00:00, 128.40it/s]
[9 / 30] Train: Loss = 3.29619, PPX = 27.01:   1%|          | 7/677 [00:00<00:07, 91.63it/s]

Optimizer lr = 1.25

когдамывсебаба\nвотииснимпорвгодслон\nвбульончтоб\nиснасбылв\nвострый\nнасамомделебреддней\nавгодзанимиватсон\nвв\nвот\nнаи\nвгодделевотнет\nвотсчего\nвысвасвнас\nи\nсвотвамвотв\nявнев\nнавал\nвдорогу\nатовэтомкаквтригода\nвтото\nвлесуневиктотамвденьмамыможно\nсмать\nвтебя\n</s>


[9 / 30] Train: Loss = 3.29199, PPX = 26.90: 100%|██████████| 677/677 [00:05<00:00, 129.04it/s]
[9 / 30]   Val: Loss = 3.93226, PPX = 51.02: 100%|██████████| 19/19 [00:00<00:00, 129.25it/s]
[10 / 30] Train: Loss = 3.17163, PPX = 23.85:   1%|          | 4/677 [00:00<00:08, 79.94it/s]

Optimizer lr = 0.3125

яверюктебераньшесвамивсамомделе\nизподтолстымрешилналице\nтымнебыты\nневшутку\nятварьчесть\nябскулаками\nвявгоднезрясгод\nнагрудьнедамужин\nвавгоджизнив\nтония\nнамыаневлюдис\nводновсехтехктотоже\nяимвнейлужерот\nнанёмтыввечер\nвтебесбожевсказке\nиз\nначтовдень\nвходит\nчтоянена\nчтоясивчетыре\nнальду\n\n\nсутра\nвденьсвязьсльейод\nвчаснев\nвот\nмыинавесь\nидушавдень


[10 / 30] Train: Loss = 3.25635, PPX = 25.95: 100%|██████████| 677/677 [00:05<00:00, 133.12it/s]
[10 / 30]   Val: Loss = 3.93554, PPX = 51.19: 100%|██████████| 19/19 [00:00<00:00, 133.13it/s]
[11 / 30] Train: Loss = 3.31279, PPX = 27.46:   2%|▏         | 13/677 [00:00<00:06, 107.58it/s]

Optimizer lr = 0.078125

насердцелетогоды\nтынаужин\nиврот\nаяднём\nчто\nназло\nтакчтоснас\nвтриявгости\nвденьвсе\n\nзачтонеза\nвчассточки\nпо\nпотриднядругиимо\nвчас\nситригодаввечер\nвмигвбредув\nвденьтридцать\nв\n</s>


[11 / 30] Train: Loss = 3.24615, PPX = 25.69: 100%|██████████| 677/677 [00:05<00:00, 134.29it/s]
[11 / 30]   Val: Loss = 3.93661, PPX = 51.24: 100%|██████████| 19/19 [00:00<00:00, 135.20it/s]
[12 / 30] Train: Loss = 3.27433, PPX = 26.43:   2%|▏         | 13/677 [00:00<00:06, 104.93it/s]

Optimizer lr = 0.0195312

вденьолегс\nвтрине\n\nночтоневгодестьбог\nнетопокавселетасорокгубыввосемь\nвдень\nтоянетоянатри\n\nвденьсоль\nияневснеграз\nвденьмнебвморевечер\nявгости\nтаквотониненицшедамымамававгодпьян\nчтоподним</s>


[12 / 30] Train: Loss = 3.24381, PPX = 25.63: 100%|██████████| 677/677 [00:05<00:00, 132.57it/s]
[12 / 30]   Val: Loss = 3.93672, PPX = 51.25: 100%|██████████| 19/19 [00:00<00:00, 125.57it/s]
[13 / 30] Train: Loss = 3.36022, PPX = 28.80:   3%|▎         | 19/677 [00:00<00:06, 103.60it/s]

Optimizer lr = 0.00488281

якакна\nты\nбежитна\nчтониябэтовсёчтоможно\nвбожекак\nврот\n</s>


[13 / 30] Train: Loss = 3.24317, PPX = 25.61: 100%|██████████| 677/677 [00:05<00:00, 133.82it/s]
[13 / 30]   Val: Loss = 3.93749, PPX = 51.29: 100%|██████████| 19/19 [00:00<00:00, 133.13it/s]
[14 / 30] Train: Loss = 3.20652, PPX = 24.69:   2%|▏         | 13/677 [00:00<00:06, 108.30it/s]

Optimizer lr = 0.0012207

вотвотпуск\nявточку\nив\nчтотовнивчёмвремя\nтебе\nя\nневденьадомавс\nвотсчемдаложьматьбред\nвотватсон\nвкаквсетивв\nиневя\nнаось\nвомне\nввасвненадопростонужно\nинивгоднеизруквгодвуза\nбезруквдень\n</s>


[14 / 30] Train: Loss = 3.24329, PPX = 25.62: 100%|██████████| 677/677 [00:05<00:00, 133.70it/s]
[14 / 30]   Val: Loss = 3.93629, PPX = 51.23: 100%|██████████| 19/19 [00:00<00:00, 130.11it/s]
[15 / 30] Train: Loss = 3.21960, PPX = 25.02:   1%|          | 5/677 [00:00<00:07, 84.33it/s]

Optimizer lr = 0.000305176

ямогувжизни\nчтоневремясчасбог\nсвотвотнети\nвотвотвотвотсивотвотвотвотвоти\nтеперьбыловнёмв\nввотвлодке\nисвасявасв\nсдуши\nнесмогтойльвчассж\nскакойнелень\nзачемда\nявчасвденьгодыстрингивиз\nаяв\nчтояматьслонслон\nноуж\nтымневсевто\nнарядсвет\nнафонеаятамвдень\nвгодгдеэтобыловпраздник\nвтом\nчтода\nтоестьвчасв\nвденьмнев\n\n</s>


[15 / 30] Train: Loss = 3.24298, PPX = 25.61: 100%|██████████| 677/677 [00:05<00:00, 131.20it/s]
[15 / 30]   Val: Loss = 3.93643, PPX = 51.24: 100%|██████████| 19/19 [00:00<00:00, 130.01it/s]
[16 / 30] Train: Loss = 3.21944, PPX = 25.01:   2%|▏         | 16/677 [00:00<00:06, 104.44it/s]

Optimizer lr = 7.62939e-05

яитывдень\nневсилахвглазавчасчассожаленьюсвасвгодты\nсдоутра\nневрайраз\nтеперьбыунасв\nвнанёмвдень\nвлюк\nвдушемне\n</s>


[16 / 30] Train: Loss = 3.24309, PPX = 25.61: 100%|██████████| 677/677 [00:05<00:00, 130.15it/s]
[16 / 30]   Val: Loss = 3.93716, PPX = 51.27: 100%|██████████| 19/19 [00:00<00:00, 123.32it/s]
[17 / 30] Train: Loss = 3.29661, PPX = 27.02:   1%|          | 4/677 [00:00<00:09, 73.97it/s]

Optimizer lr = 1.90735e-05

наоксанеу\nкнигавморе\nвденьпланывчастыв\nналицеим\nясмотрюспахдамбредкто\nдавносвотвотнетявотнетнетнетнет\nяжнехочупрочтолицойсхлебомбред\nивотявотвотвденьвденьвчас\nвглазахвчас\nкакто\nвсе\nвбредизрукав\nнапол\nиктеперьонневжвчасввчас\nнеталькаквдва\nвтриктактакже\nизподгрудиногивжекак\nнагрудь\n\nвот\nтеперьонмнебы\nвсутьначесть\nнедотой\nнетыбудешь\n\n</s>


[17 / 30] Train: Loss = 3.24303, PPX = 25.61: 100%|██████████| 677/677 [00:05<00:00, 130.22it/s]
[17 / 30]   Val: Loss = 3.93585, PPX = 51.21: 100%|██████████| 19/19 [00:00<00:00, 126.29it/s]
[18 / 30] Train: Loss = 3.12346, PPX = 22.72:   2%|▏         | 13/677 [00:00<00:06, 99.86it/s]

Optimizer lr = 4.76837e-06

мненесейчасневчассутьдел\nвтвойраз\nнаработувточтотовчас\nмысваснаоксанебыловчас\nвкрасноминаполсыра\nсневсон\nвденьдень\nяидояваснедосмерти\nневойкостьчтовысбольшойдо\nсикрикомдетства\nвне\nтычо</s>


[18 / 30] Train: Loss = 3.24314, PPX = 25.61: 100%|██████████| 677/677 [00:05<00:00, 130.16it/s]
[18 / 30]   Val: Loss = 3.93726, PPX = 51.28: 100%|██████████| 19/19 [00:00<00:00, 132.99it/s]
[19 / 30] Train: Loss = 3.30519, PPX = 27.25:   3%|▎         | 17/677 [00:00<00:06, 108.94it/s]

Optimizer lr = 1.19209e-06

когдаявгодявгод\nкактяжеловонвтригода\nвденьневдень\nнетонкак\nновсёжевчоммог\nспросилмнев\nвотвотвначалесмертьсивдень\nсвот\nназемле</s>


[19 / 30] Train: Loss = 3.24279, PPX = 25.60: 100%|██████████| 677/677 [00:05<00:00, 130.46it/s]
[19 / 30]   Val: Loss = 3.93666, PPX = 51.25: 100%|██████████| 19/19 [00:00<00:00, 130.64it/s]
[20 / 30] Train: Loss = 3.27818, PPX = 26.53:   2%|▏         | 16/677 [00:00<00:06, 104.98it/s]

Optimizer lr = 2.98023e-07

янаэтоммногомиресмамывгодыснами\nмненеивотпятьлетдней\nноматьесть\nявсвас\nчтожизньсчемедетбогвстрасть\nнедохера\nвместесвашей\n\n</s>


[20 / 30] Train: Loss = 3.24303, PPX = 25.61: 100%|██████████| 677/677 [00:05<00:00, 130.84it/s]
[20 / 30]   Val: Loss = 3.93696, PPX = 51.26: 100%|██████████| 19/19 [00:00<00:00, 126.06it/s]
[21 / 30] Train: Loss = 3.33416, PPX = 28.05:   3%|▎         | 20/677 [00:00<00:06, 106.89it/s]

Optimizer lr = 7.45058e-08

вденьнасуткискому\nвземлюсутра\nвденьмнеплачивденьлето\nяи\n</s>


[21 / 30] Train: Loss = 3.24309, PPX = 25.61: 100%|██████████| 677/677 [00:05<00:00, 132.32it/s]
[21 / 30]   Val: Loss = 3.93828, PPX = 51.33: 100%|██████████| 19/19 [00:00<00:00, 129.94it/s]
[22 / 30] Train: Loss = 3.06833, PPX = 21.51:   3%|▎         | 18/677 [00:00<00:06, 107.47it/s]

Optimizer lr = 1.86265e-08

настонучтото\nтымынектебенезапогода\nвот\nзаночьна\nвотвот\nвотвотслон\nи\nя\n</s>


[22 / 30] Train: Loss = 3.24305, PPX = 25.61: 100%|██████████| 677/677 [00:05<00:00, 130.83it/s]
[22 / 30]   Val: Loss = 3.93599, PPX = 51.21: 100%|██████████| 19/19 [00:00<00:00, 131.38it/s]
[23 / 30] Train: Loss = 3.18912, PPX = 24.27:   2%|▏         | 16/677 [00:00<00:06, 107.37it/s]

Optimizer lr = 4.65661e-09

ясказалмиг\nявтишиввсёчтовя\nвотноменянеправ\nянемог\nаякактонебылонадевятьпраздник\nялюблюс\nвотсдвухнамивгодвунас\nвчасвкровати\nпрости\n\n</s>


[23 / 30] Train: Loss = 3.24306, PPX = 25.61: 100%|██████████| 677/677 [00:05<00:00, 128.00it/s]
[23 / 30]   Val: Loss = 3.93536, PPX = 51.18: 100%|██████████| 19/19 [00:00<00:00, 131.65it/s]
[24 / 30] Train: Loss = 3.20314, PPX = 24.61:   2%|▏         | 16/677 [00:00<00:06, 105.44it/s]

Optimizer lr = 1.16415e-09

яклюдямна\nатоочень\nзаивнебовдомевсамом\n\nисв\nкактыльяльбвдругбудусс\n\nияраб\nвнихявасржал\nвстенку\n</s>


[24 / 30] Train: Loss = 3.24318, PPX = 25.61: 100%|██████████| 677/677 [00:05<00:00, 130.86it/s]
[24 / 30]   Val: Loss = 3.93756, PPX = 51.29: 100%|██████████| 19/19 [00:00<00:00, 136.98it/s]
[25 / 30] Train: Loss = 3.08161, PPX = 21.79:   0%|          | 3/677 [00:00<00:09, 69.13it/s]

Optimizer lr = 2.91038e-10

явденьстенувморге\nвсвойяненапамять\nато\nпотомвденьэтих\nивсонгдепростотак\nтамза\nненасветев\nвшкафудни\nизивто\nножомивжея\nвнейуснусбожеглядяв\nввжизнивечер\nнамневбольно\nвтригода\nнетовдругизсердцав\nвэтатыивдень\nне\nвненинивто\nвденьвденьсместавчасосеньс\nвоткто\nяльбтамвденьчас\nвмойчасбыло\nвточтобнеот\nподкраюи\nвивнём\nвотты\nсейчасвасне\n\nнавесь\n


[25 / 30] Train: Loss = 3.24324, PPX = 25.62: 100%|██████████| 677/677 [00:05<00:00, 131.63it/s]
[25 / 30]   Val: Loss = 3.93718, PPX = 51.27: 100%|██████████| 19/19 [00:00<00:00, 128.96it/s]
[26 / 30] Train: Loss = 3.33055, PPX = 27.95:   3%|▎         | 18/677 [00:00<00:06, 105.45it/s]

Optimizer lr = 7.27596e-11

мыстобойвнаш\nввотя\nвневхвостивсебе\nчтовгод\nчтотывмозгая\nвлишь\nкакне\n</s>


[26 / 30] Train: Loss = 3.24282, PPX = 25.61: 100%|██████████| 677/677 [00:05<00:00, 128.51it/s]
[26 / 30]   Val: Loss = 3.93676, PPX = 51.25: 100%|██████████| 19/19 [00:00<00:00, 122.44it/s]
[27 / 30] Train: Loss = 3.12122, PPX = 22.67:   2%|▏         | 14/677 [00:00<00:06, 98.28it/s]

Optimizer lr = 1.81899e-11

олегналбусновасбогомв\nянесодоски\nвденьиз\nвмигместмноговгодбред\nчтобневточтоб\nнемнеавдень\nноввподарок\nкаквденьявгодслон\nтынея\nнев\n</s>


[27 / 30] Train: Loss = 3.24303, PPX = 25.61: 100%|██████████| 677/677 [00:05<00:00, 131.16it/s]
[27 / 30]   Val: Loss = 3.93667, PPX = 51.25: 100%|██████████| 19/19 [00:00<00:00, 128.75it/s]
[28 / 30] Train: Loss = 3.35721, PPX = 28.71:   1%|          | 4/677 [00:00<00:09, 74.19it/s]

Optimizer lr = 4.54747e-12

аяправданеочень\nянезнаюянонетнет\nхотябсчаскто\nясвасбвполне\nночто\nнетда\nневнезапноячтотолько\nянетварьбредзрясвотсчемжизнь\nвденьмнеотом\nдаявамс\nссобой\nвбокал\nипоуму\nвот\nвотв\nопятьпокинулнасемь\nивнёмявчас\nснимившколу\nи\nна\nвдушевивденьтымненетак\nвтри\nсва\nтывмесяцчащевычтожневвчас\nвокновсеот\nтри\nлишь\nкогдавстого\nвявасвсказке\nвтрив


[28 / 30] Train: Loss = 3.24287, PPX = 25.61: 100%|██████████| 677/677 [00:05<00:00, 131.93it/s]
[28 / 30]   Val: Loss = 3.93688, PPX = 51.26: 100%|██████████| 19/19 [00:00<00:00, 129.04it/s]
[29 / 30] Train: Loss = 3.27225, PPX = 26.37:   0%|          | 3/677 [00:00<00:09, 68.82it/s]

Optimizer lr = 1.13687e-12

онпостаройморювбанесутравчасвдень\nавяхочучтов\nявдвагодасчтовчас\nатоткаконвыбыть\nиводнодвагода\nотводки\nивотявгодчестьсна\nнеатобоже\nвденьнев\nяо\nвотивотвечервроткак\nвгрудьвсёвпорядке\n\nавотвотвтригода\nв\nвотвотнет\nвоттыснова\nвот\nноже\nиснова\nавнёмвсётакнету\n\nикактов\nвотвденьспол\nвденьновых\nяаявсев\nссмертьбредместа\nсдуши\nвдне\nизнедоя


[29 / 30] Train: Loss = 3.24303, PPX = 25.61: 100%|██████████| 677/677 [00:05<00:00, 133.21it/s]
[29 / 30]   Val: Loss = 3.93818, PPX = 51.33: 100%|██████████| 19/19 [00:00<00:00, 130.39it/s]
[30 / 30] Train: Loss = 3.35244, PPX = 28.57:   1%|          | 7/677 [00:00<00:06, 103.37it/s]

Optimizer lr = 2.84217e-13

унеё\nянасутки\nвденьнетывдушу\nсказалчтоты\nкаквотпуск\nвденьсутраявраз\nисулыбкой\nв\nвденьвденьсвчасместа\nяв\nчтовгодвдень\nвденьдве\nвотвчасвнём\nневденьчас\nвнёмнаокневдень\nмыбыяноскемтывечно\nчтояненадо\nтамявяжнеживу\nно\nатыкаквче\nспросилякактовтоямогу\nвчасна\nяденьгиотпуск\nя\nаизподпятнасдевятьдетства\nвденьточки\nдуракктоя\n</s>


[30 / 30] Train: Loss = 3.24317, PPX = 25.61: 100%|██████████| 677/677 [00:05<00:00, 132.30it/s]
[30 / 30]   Val: Loss = 3.93708, PPX = 51.27: 100%|██████████| 19/19 [00:00<00:00, 122.36it/s]


Optimizer lr = 7.10543e-14

какрыбавынеякактостранно\nнаработу\nнетявамне\nжелаюдамзлотытварь\nкакты\nчто\n\nводной\nянежты\nкак\nсвасвнет\nвжизничаслень\nвразневгодввденьмне\nнадне\nвне\nвротсмух\nно\nтынаночь\nвтишивтруп\nввотвотвотвчасиз\nизподбой\nзлоскак\nснимвамив\nнедодна\nвгодстобойс\nнавсехявтовтовечер\nвденьдрожь\n\nвчасивдень\nвденьлетоместавосемьденегза\nнеточтоя\n</s>


### Увеличиваем выборку

У нас есть выборка для пирожков, которая заметно больше.

**Задание** Обучитесь на ней.

### Transfer learning

Простой и приятный способ улучшения модели - сделать перенос обученной на большом корпусе модели на меньшего объема датасет.

Популярен этот способ больше в компьютерном зрении: [Transfer learning, cs231n](http://cs231n.github.io/transfer-learning/) - там есть огромный ImageNet, на котором предобучают модель, чтобы потом заморозить нижние слои и заменить выходные. В итоге модель использует универсальные представления данных, выученные на большом корпусе, но для предсказания совсем других меток - и качество очень здорово растет.

Нам такие извращения пока не нужны (хотя потом пригодятся, ключевые слова: ULMFiT, ELMo и компания). Просто возьмем обученную на большем корпусе модель и поучим ее на меньшем корпусе. Ей всего-то нужно новый матрический шаблон последней строки выучить.

**Задание** Обученную в прошлом пункте модель дообучите на порошки.

### Conditional language model

Ещё лучше - просто учиться на обоих корпусах сразу. Объедините пирожки и порошки, для каждого храните индекс 0/1 - был ли это пирожок или порошок. Добавьте вход - этот индекс и конкатенируйте его либо к каждому эмбеддингу слов, либо к каждому выходу из LSTM.

**Задание** Научите единую модель, у которой можно просить сгенерировать пирожок или порошок.

### Variational & word dropout

**Задание** На прошлом занятии приводились примеры более приспособленных к RNN'ам dropout'ов. Добавьте их.

**Задание** Кроме этого, попробуйте увеличивать размер модели или количество слоев в ней, чтобы улучшить качество.

## Multi-task learning

Ещё один важный способ улучшения модели - multi-task learning. Это когда одна модель учится делать предсказания сразу для нескольких задач.

В нашем случае это может быть предсказанием отдельно леммы слова и отдельно - его грамматического значения:
![](https://hsto.org/web/e97/8a8/6e8/e978a86e8a874d8d946bb15e6a49a713.png =x350)

В итоге модель выучивает как языковую модель по леммам, так и модель POS tagging'а. Одновременно!

Возьмем корпус из universal dependencies - он уже размечен, как нужно.

Почитаем его:

In [None]:
from corpus_iterator import Token, CorpusIterator

fields = [('word', Field()), ('lemma', Field()), ('gram_val', Field())]
examples = []

with CorpusIterator('UD_Russian-SynTagRus/ru_syntagrus-ud-train.conllu') as corpus_iter:
    for sent in corpus_iter:
        words = ['<s>'] + [tok.token.lower() for tok in sent] + ['</s>']
        lemmas = ['<s>'] + [tok.lemma.lower() for tok in sent] + ['</s>']
        gr_vals = ['<s>'] + [tok.grammar_value for tok in sent] + ['</s>']
        examples.append(Example.fromlist([words, lemmas, gr_vals], fields))

In [None]:
print('Words:', examples[1].word)
print('Lemmas:', examples[1].lemma)
print('Grammar vals:', examples[1].gram_val)

Таким образом, размер словаря может быть существенно сокращен - лемм меньше, чем слов, а предсказание грамматики вынуждает модель быть более осведомленной о согласовании слов.

In [None]:
dataset = Dataset(examples, fields)

dataset.fields['word'].build_vocab(dataset, min_freq=3)
print('Word vocab size =', len(dataset.fields['word'].vocab))
dataset.fields['lemma'].build_vocab(dataset, min_freq=3)
print('Lemma vocab size =', len(dataset.fields['lemma'].vocab))
dataset.fields['gram_val'].build_vocab(dataset)
print('Grammar val vocab size =', len(dataset.fields['gram_val'].vocab))

train_dataset, test_dataset = dataset.split(split_ratio=0.75)

train_iter, test_iter = BucketIterator.splits(datasets=(train_dataset, test_dataset), batch_sizes=(32, 128), 
                                              shuffle=True, device=DEVICE, sort=False)

Построим маппинг из пары (лемма, грамматическое значение) в слово - если бы у нас под рукой был морфологический словарь, маппинг можно было бы пополнить, добавить слова для лемм из корпуса, которые не встретились в обучении.

In [None]:
dictionary = {
    (lemma, gr_val): word
    for example in train_iter.dataset.examples 
    for word, lemma, gr_val in zip(example.word, example.lemma, example.gram_val)
}

**Задание**  Обновите генератор - например, можно сэмплировать лемму и находить самое вероятное грамматическое значение, которое встречается  в паре с этой леммой в `dictionary`.

In [None]:
def generate(model, temp=0.7):
    ...

**Задание** Обновите модель и функцию обучения.

Модель должна принимать пары `lemma, gr_val`, конкатенировать их эмбеддинги и предсказывать следующие `lemma, gr_val` по выходу из LSTM.

Функция `do_epoch` должна суммировать потери по предсказанию леммы (делая маскинг для `<unk>` и `<pad>`) + потери по предсказанию грамматического значения (с маскингом по `<pad>`).

## Контролируемая генерация

Хочется сделать генерацию более контролируемой - в идеале, задавать тему.

Простой способ - сделать тематическое моделирование и найти в текстах какие-то темы - а потом передавать вектор тем вместе с эмбеддингом слова, чтобы модель училась генерировать тематически-согласованный текст.

In [None]:
from gensim import corpora, models

docs = [[word for word in poem if word != '\\n'] for poem in perashki]

dictionary = corpora.Dictionary(docs)
dictionary.filter_n_most_frequent(100)

bow_corpus = [dictionary.doc2bow(doc) for doc in docs]

lda_model = models.LdaModel(bow_corpus, num_topics=5, id2word=dictionary, passes=5)

Посмотреть, что выучилось, можно так:

In [None]:
import pyLDAvis
import pyLDAvis.gensim

pyLDAvis.enable_notebook()
pyLDAvis.gensim.prepare(lda_model, bow_corpus, dictionary)

Предсказывает распределение модель как-то так:

In [None]:
for word in perashki[10]:
    if word == '\\n':
        print()
    else:
        print(word, end=' ')

In [None]:
lda_model.get_document_topics(bow_corpus[10])

**Задание** Посчитайте для всех текстов вектора тем, передавайте их вместе со словами (конкатенируя к эмбеддингам). Посмотрите, вдруг чего получится.

# Дополнительные материалы

## Статьи

Regularizing and Optimizing LSTM Language Models, 2017 [[arxiv]](https://arxiv.org/abs/1708.02182), [[github]](https://github.com/salesforce/awd-lstm-lm) - одна из самых полезных статей про языковые модели + репозиторий, в котором реализовано много полезного, стоит заглянуть

Exploring the Limits of Language Modeling, 2016 [[arxiv]](https://arxiv.org/abs/1602.02410)

Using the Output Embedding to Improve Language Models, 2017 [[pdf]](http://www.aclweb.org/anthology/E17-2025)


## Transfer learning
[Transfer learning, cs231n](http://cs231n.github.io/transfer-learning/)  
[Transfer learning, Ruder](http://ruder.io/transfer-learning/) - очень подробная статья от чувака из NLP

## Multi-task learning
[An Overview of Multi-Task Learning in Deep Neural Networks, Ruder](http://ruder.io/multi-task/)  
[Multi-Task Learning Objectives for Natural Language Processing, Ruder](http://ruder.io/multi-task-learning-nlp/)

# Сдача

[Форма для сдачи](https://goo.gl/forms/ASLLjYncKUcIHmuO2)  
[Feedback](https://goo.gl/forms/9aizSzOUrx7EvGlG3)