## Импорт необходимых зависимостей

In [1]:
import pandas as pd
import nltk
import torch
import torch.nn as nn
import torch.optim
import time
import pickle

from random import random, sample
from typing import List
from collections import Counter
from itertools import chain
from functools import reduce
from tqdm.auto import tqdm
from sklearn import model_selection
from torch.utils.data import DataLoader, TensorDataset

## Подготовка данных

In [2]:
df = pd.read_csv('../input/seq2seq-dataset/dataset.csv')

In [3]:
df

Unnamed: 0,lemm_texts,orig_texts,nsubj,gender,tense,number
0,я предлагать оригинальный подарок для малыш!,я предлагаю оригинальный подарок для малыша!,я,undefined,pres,sing
1,я обезательный перезвонить в любой случай.,я обезательно перезвоню в любом случае.,я,undefined,fut,sing
2,цена на память я не помнить.,цены на память я не помню.,я,undefined,pres,sing
3,"я не помнить , где находиться.","я не помню, где находились.",я,undefined,pres,sing
4,я работать на высококачественный американский ...,я работаю на высококачественных американских м...,я,undefined,pres,sing
...,...,...,...,...,...,...
356967,другой ящерица медленно подбрести к свой товарка.,другая ящерица медленно подбрела к своей товарке.,ящерица,fem,past,sing
356968,зелёный ящерица застылый на мраморный ступень.,зеленая ящерица застыла на мраморной ступени.,ящерица,fem,past,sing
356969,больший ящерица шмыгнуть по песок.,большая ящерица шмыгнула по песку.,ящерица,fem,past,sing
356970,домашний ящерица быстро пробежать вдоль штора.,домашняя ящерица быстро пробежала вдоль штор.,ящерица,fem,past,sing


### Определение классов словаря и трансформера текста

In [5]:
class Vocab:
    def __init__(self, tokens: List[str], unk_idx: int):
        self._tokens = tokens
        self._token_to_idx = {token: idx for idx, token in enumerate(tokens)}
        self._unk_idx = unk_idx
        
    def token_to_idx(self, token: str) -> int:
        return self._token_to_idx.get(token, self._unk_idx)
    
    def idx_to_token(self, idx: int) -> str:
        return self._tokens[idx]

In [79]:
class TextTransformer:
    def __init__(self, vocab_size: int):
        self.vocab = None
        self.vocab_size = vocab_size
        self.special_tokens_to_idx = {'<UNK>': 0, '<PAD>': 1, '<SOS>': 2, '<EOS>': 3}
        self._tokenizer = nltk.tokenize.wordpunct_tokenize
    
    def tokenize(self, text) -> List[str]:
        return self._tokenizer(text.lower())
    
    def build_vocab(self, tokens: List[str]):
        tokens_ = [special_token for special_token in self.special_tokens_to_idx.keys()]
        special_tokens_amount = len(self.special_tokens_to_idx)
        
        for token, _ in Counter(tokens).most_common(self.vocab_size - special_tokens_amount):
            tokens_.append(token)
        
        unk_idx = self.special_tokens_to_idx.get('<UNK>')
        self.vocab = Vocab(tokens_, unk_idx)
        
    def transform_text(self, text: str) -> List[int]:
        tokenized_text = self.tokenize(text)
        transformed = [self.vocab.token_to_idx(token) for token in tokenized_text]
        return transformed
    
    def fit_transform(self, texts: List[str]) -> None:
        transformed_texts = []
        
        tokenized_texts = [self.tokenize(text) for text in tqdm(texts, 'Tokenizing texts')]
        tokens = chain(*tokenized_texts)
        self.build_vocab(tokens)
        
        for tokenized_text in tqdm(tokenized_texts, 'Transforming texts'):
            transformed = [self.vocab.token_to_idx(token) for token in tokenized_text]
            transformed_texts.append(transformed)
    
    def transform_texts(self, texts: List[str]) -> List[List[int]]:
        transformed_texts = [transform_text(text) for text in tqdm(texts, 'Transforming texts')]
        return transformed_texts
    
    def text_to_tensor(self, text: str, max_seq_len=8) -> torch.tensor:
        transformed_text = self.transform_text(text)
        pad_idx = self.special_tokens_to_idx.get('<PAD>')
        sos_idx = self.special_tokens_to_idx.get('<SOS>')
        eos_idx = self.special_tokens_to_idx.get('<EOS>')
        
        pad_size = 0
        if len(transformed_text) >= max_seq_len:
            transformed_text = transformed_text[:max_seq_len]
        else:
            pad_size = max_seq_len - len(transformed_text)
            transformed_text.extend([pad_idx] * pad_size)   
        transformed_text.insert(0, sos_idx)
        transformed_text.insert(len(transformed_text) - pad_size, eos_idx)
        
        tensor = torch.tensor(transformed_text, dtype=torch.long)
        return tensor.unsqueeze(0)
    
    def texts_to_tensor(self, texts: List[str], max_seq_len=8) -> torch.tensor:
        pad_idx = self.special_tokens_to_idx.get('<PAD>')
        sos_idx = self.special_tokens_to_idx.get('<SOS>')
        eos_idx = self.special_tokens_to_idx.get('<EOS>')
        transformed_texts = []
        
        for text in tqdm(texts, 'Building tensor'):
            transformed_text = self.transform_text(text)
            pad_size = 0
            if len(transformed_text) >= max_seq_len:
                transformed_text = transformed_text[:max_seq_len]
            else:
                pad_size = max_seq_len - len(transformed_text)
                transformed_text.extend([pad_idx] * pad_size)   
            transformed_text.insert(0, sos_idx)
            transformed_text.insert(len(transformed_text) - pad_size, eos_idx)
            transformed_texts.append(transformed_text)
        
        tensor = torch.tensor(transformed_texts, dtype=torch.long).permute(1, 0)
        return tensor

### Разбиение данных на обучающую, тестовую и валидационную выборки

In [7]:
train_df, test_df = model_selection.train_test_split(df, test_size=0.1)

In [8]:
test_df, val_df = model_selection.train_test_split(test_df, test_size=0.25)

### Токенизация текстов и индексация токенов

In [9]:
lemm_vocab_size = 35000
orig_vocab_size = 60000

In [80]:
lemm_text_transformer = TextTransformer(lemm_vocab_size)
orig_text_transformer = TextTransformer(orig_vocab_size)

In [81]:
lemm_text_transformer.fit_transform(train_df.lemm_texts)

Tokenizing texts:   0%|          | 0/321274 [00:00<?, ?it/s]

Transforming texts:   0%|          | 0/321274 [00:00<?, ?it/s]

In [82]:
orig_text_transformer.fit_transform(train_df.orig_texts)

Tokenizing texts:   0%|          | 0/321274 [00:00<?, ?it/s]

Transforming texts:   0%|          | 0/321274 [00:00<?, ?it/s]

### Перевод данных в тензоры

In [13]:
train_lemm_tensor = lemm_text_transformer.texts_to_tensor(train_df.lemm_texts.to_list())
test_lemm_tensor = lemm_text_transformer.texts_to_tensor(test_df.lemm_texts.to_list())
val_lemm_tensor = lemm_text_transformer.texts_to_tensor(val_df.lemm_texts.to_list())

Building tensor:   0%|          | 0/321274 [00:00<?, ?it/s]

Building tensor:   0%|          | 0/17849 [00:00<?, ?it/s]

Building tensor:   0%|          | 0/17849 [00:00<?, ?it/s]

In [14]:
train_orig_tensor = orig_text_transformer.texts_to_tensor(train_df.orig_texts.to_list())
test_orig_tensor = orig_text_transformer.texts_to_tensor(test_df.orig_texts.to_list())
val_orig_tensor = orig_text_transformer.texts_to_tensor(val_df.orig_texts.to_list())

Building tensor:   0%|          | 0/321274 [00:00<?, ?it/s]

Building tensor:   0%|          | 0/17849 [00:00<?, ?it/s]

Building tensor:   0%|          | 0/17849 [00:00<?, ?it/s]

In [15]:
def cut_to_fit_batch(tensor: torch.Tensor, batch_size: int):
    n_samples = tensor.shape[1]
    new_n_samples = (n_samples // batch_size) * batch_size
    result, _ = tensor.split(new_n_samples, dim=1)
    return torch.transpose(result, 1, 0)

## Построение модели

In [16]:
class EncoderRNN(nn.Module):
    def __init__(self, vocab_size: int, embedding_size: int, hidden_size: int, pad_idx: int,
                 device, num_layers, dropout_p: float):
        super(EncoderRNN, self).__init__()
        
        self.device = device
        self.num_layers = num_layers
        
        self.hidden_size = hidden_size
        
        self.embedding = nn.Sequential(
            nn.Embedding(vocab_size, embedding_size, pad_idx),
            nn.Dropout(dropout_p)
        )
        self.lstm = nn.LSTM(embedding_size, hidden_size, num_layers, dropout=dropout_p)
        
    def forward(self, x):
        # x_shape: (seq_len, batch_size)
        embedding = self.embedding(x)
        # embedding_shape: (seq_len, batch_size, embedding_size)
        output, (hidden, cell) = self.lstm(embedding)
        # output_shape: (seq_len, batch_size, hidden_size)
        # hidden_shape: (num_layers, batch_size, hidden_size)
        # cell_shape: (num_layers, batch_size, hidden_size)
        return hidden, cell
    
    def init_hidden_state(self, batch_size: int):
        hidden = torch.zeros(self.num_layers, batch_size, self.hidden_size).to(self.device)
        cell = torch.zeros(self.num_layers, batch_size, self.hidden_size).to(self.device)
        return hidden, cell

In [17]:
class DecoderRNN(nn.Module):
    def __init__(self, vocab_size: int, embedding_size: int, hidden_size: int, output_size: int, pad_idx: int,
                 device, num_layers, dropout_p: float):
        super(DecoderRNN, self).__init__()
        
        self.device = device
        
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        
        self.embedding = nn.Sequential(
            nn.Embedding(vocab_size, embedding_size, pad_idx),
            nn.Dropout(dropout_p)
        )
        self.lstm = nn.LSTM(embedding_size, hidden_size, num_layers, dropout=dropout_p)
        self.fc = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=2)
        
    def forward(self, x, hidden, cell):
        x = x.unsqueeze(0)
        # x_shape: (seq_len=1, batch_size)
        # hidden_shape: (num_layers, batch_size, hidden_size)
        # cell_shape: (num_layers, batch_size, hidden_size)
        embedding = self.embedding(x)
        # embedding_shape: (seq_len=1, batch_size, embedding_size)
        lstm_out, (hidden, cell) = self.lstm(embedding, (hidden, cell))
        # lstm_out_shape: (seq_len=1, batch_size, hidden_size)
        fc_out = self.fc(lstm_out)
        # fc_out_shape: (seq_len=1, batch_size, output_size)
        output = self.softmax(fc_out)
        # output_shape: (seq_len=1, batch_size, output_size)
        
        return output, hidden, cell

In [18]:
class Seq2SeqModel(nn.Module):
    def __init__(self, encoder_vocab_size: int, decoder_vocab_size: int, embedding_size: int, hidden_size: int, output_size: int,
                 pad_idx: int, device, num_layers, dropout_p: float):
        super(Seq2SeqModel, self).__init__()
        
        self.device = device
        
        self.encoder = EncoderRNN(encoder_vocab_size, embedding_size, hidden_size, pad_idx, device, num_layers, dropout_p).to(device)
        self.decoder = DecoderRNN(decoder_vocab_size, embedding_size, hidden_size, output_size, pad_idx, device, num_layers, dropout_p).to(device)
        self.decoder_vocab_size = decoder_vocab_size
        
    def forward(self, input, target, teacher_forcing_ratio=0.5):
        batch_size = input.shape[1]
        target_len = target.shape[0]
        target_vocab_size = self.decoder_vocab_size
        
        outputs = torch.zeros(target_len, batch_size, target_vocab_size, device=self.device)
        
        hidden, cell = self.encoder(input)
        # hidden, cell shapes: (num_layers, batch_size, hidden_size)
        
        prev_token_idx = target[0]
        # prev_token_shape: (batch_size)
        
        for t in range(target_len):
            output, hidden, cell = self.decoder(prev_token_idx, hidden, cell)
            outputs[t] = output.squeeze(0)
            
            best_prediction = outputs[t].argmax(dim=1)
            # best_prediction_shape: (batch_size)
            prev_token_idx = target[t] if random() < teacher_forcing_ratio else best_prediction
        
        return outputs

## Обучение модели

### Функция сохранения текущего состояния модели

In [19]:
def save_model(model: Seq2SeqModel, optimizer, epoch, path):
    checkpoint = {
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'criterion': criterion,
        'epoch': epoch
    }
    
#     torch.save(checkpoint, path)
    with open(path, mode='wb') as f:
        pickle.dump(checkpoint, f)

### Функция загрузки уже тренировавшейся модели

In [20]:
def load_model(model: Seq2SeqModel, optimizer, criterion, path):
    with open(path, mode='rb') as f:
        checkpoint = pickle.load(f)
#     checkpoint = torch.load(path)
        
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    epoch = checkpoint['epoch']
    criterion = checkpoint['criterion']
    
    return epoch

### Инициализация гиперпараметров

In [21]:
learning_rate = 0.001
batch_size = 32
epochs_amount = 15
lemm_vocab_size = 35000
orig_vocab_size = 60000
hidden_size = 1024
embedding_size = 300
num_layers = 2
max_norm = 1.0
dropout_p = 0.4
patience = 3
output_size = orig_vocab_size
pad_idx = lemm_text_transformer.special_tokens_to_idx.get('<PAD>')
model_path = './models/'
model_name = 'simple_seq2seq.model'

In [22]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [23]:
model = Seq2SeqModel(lemm_vocab_size, orig_vocab_size, embedding_size, hidden_size, output_size, pad_idx, device, num_layers, dropout_p).to(device)

In [24]:
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [25]:
criterion = nn.CrossEntropyLoss(ignore_index=pad_idx)

In [27]:
try:
    epoch = load_model(model, optimizer, criterion, model_path + model_name)
    print(f'Loaded model from {model_path}')
except:
    print(f'No models found at {model_path}')
    epoch = 1

No models found at ./seq2seq.model


### Урезание данных для соответствия размеру батча

In [28]:
train_lemm_tensor_f = cut_to_fit_batch(train_lemm_tensor, batch_size)
train_orig_tensor_f = cut_to_fit_batch(train_orig_tensor, batch_size)

test_lemm_tensor_f = cut_to_fit_batch(test_lemm_tensor, batch_size)
test_orig_tensor_f = cut_to_fit_batch(test_orig_tensor, batch_size)

val_lemm_tensor_f = cut_to_fit_batch(val_lemm_tensor, batch_size)
val_orig_tensor_f = cut_to_fit_batch(val_orig_tensor, batch_size)

### Инициализация данных итерируемых по батчам

In [29]:
train_dataset = TensorDataset(train_lemm_tensor_f, train_orig_tensor_f)
test_dataset = TensorDataset(test_lemm_tensor_f, test_orig_tensor_f)
val_dataset = TensorDataset(val_lemm_tensor_f, val_orig_tensor_f)

In [30]:
train_loader = DataLoader(train_dataset, batch_size=batch_size)
test_loader = DataLoader(test_dataset, batch_size=1)
val_loader = DataLoader(val_dataset, batch_size=batch_size)

### Определение функции проверки работы сети между эпохами обучения

In [31]:
def test_evaluate(model, input, target_len=8):
    input = input.to(device)
    sos_idx = lemm_text_transformer.special_tokens_to_idx.get('<SOS>')
    eos_idx = lemm_text_transformer.special_tokens_to_idx.get('<EOS>')
    
    with torch.no_grad():
        model.eval()
        hidden, cell = model.encoder(input)
        
        predicted_indexes = [sos_idx]
        
        for _ in range(1, target_len):
            prev_idx = torch.tensor([predicted_indexes[-1]], dtype=torch.long, device=device)
            
            output, hidden, cell = model.decoder(prev_idx, hidden, cell)
            output = output.squeeze(0)
            
            best_prediction = output.argmax(dim=1).item()
            
            if best_prediction == eos_idx:
                break
                
            predicted_indexes.append(best_prediction)
                        
        
    predicted_tokens = [orig_text_transformer.vocab.idx_to_token(idx) for idx in predicted_indexes]
    return predicted_tokens[1:]

### Определение функции обучения сети

In [32]:
def train(model, optimizer, criterion, train_data, val_data, test_data, epochs_amount, max_norm, patience=3, current_epoch=1, n_prints=5):
    min_mean_val_loss = float('+inf')
    initial_patiece = patience
    print_every = len(train_data) // n_prints
    
    for epoch in tqdm(range(current_epoch, epochs_amount + 1), 'Epochs'):
        print(f'\nEpoch [{epoch} / {epochs_amount}]')
        epoch_start_time = torch.cuda.Event(enable_timing=True)
        epoch_end_time = torch.cuda.Event(enable_timing=True)
        epoch_start_time.record()
        
        model.train()
        for iteration, (input, target) in enumerate(tqdm(train_data, 'Epoch training iterations')):
            optimizer.zero_grad()
            # input = lemm_texts, target = orig_texts
            input = torch.transpose(input, 1, 0).to(device)
            # input_shape: (seq_len, batch_size)
            target = torch.transpose(target, 1, 0).to(device)
            # target_shape: (seq_len, batch_size)
            output = model(input, target)
            # output_shape: (seq_len, batch_size, orig_vocab_size) but need (N, orig_vocab_size)
            target = target[1:].reshape(-1)
            # now target_shape is (seq_len * batch_size)
            orig_vocab_size = output.shape[2]
            output = output[1:].reshape(-1, orig_vocab_size)
            # now output_shape is (seq_len * batch_size, orig_vocab_size)
            
            loss = criterion(output, target)
            loss.backward()
            
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=max_norm)
            
            optimizer.step()
            
            if iteration % print_every == 0:
                print(f'\tIteration #{iteration}: training loss = {loss.item()}')
            elif iteration == len(train_data):
                print(f'\tIteration #{iteration}: training loss = {loss.item()}')
            
            
        with torch.no_grad():
            model.eval()
            val_loss = []
            
            for input, target in tqdm(val_data, 'Epoch validating iterations'):
                input = torch.transpose(input, 1, 0).to(device)
                target = torch.transpose(target, 1, 0).to(device)
                
                output = model(input, target)
                orig_vocab_size = output.shape[2]
                output = output[1:].reshape(-1, orig_vocab_size)
                target = target[1:].reshape(-1)
                
                val_loss.append(criterion(output, target).item())
            
            mean_val_loss = sum(val_loss) / len(val_loss)
            print(f'\tValidation loss = {mean_val_loss}')
            if mean_val_loss < min_mean_val_loss:
                try:
                    save_model(model, optimizer, epoch, model_path)
                    min_mean_val_loss = mean_val_loss
                    patience = initial_patiece
                except Exception as exc:
                    print(exc)
            else:
                patience -= 1
            
            test_data = DataLoader(test_data.dataset, batch_size=1, shuffle=True)
            for input, target in test_data:
                target = target.squeeze(0).to(device)
                
                input = torch.transpose(input, 1, 0)
                target_len = target.shape[0]
                
                output = test_evaluate(model, input, target_len)
                decoded_input = [lemm_text_transformer.vocab.idx_to_token(idx.item()) for idx in input]
                decoded_target = [orig_text_transformer.vocab.idx_to_token(idx.item()) for idx in target]
                
                print(f'\tInput: {decoded_input}')
                print(f'\tOutput: {output}')
                print(f'\tTarget: {decoded_target}')
                break
                
        torch.cuda.synchronize()
        epoch_end_time.record()
        torch.cuda.synchronize()
        elapsed_epoch_time = round(epoch_start_time.elapsed_time(epoch_end_time) / 60000, 3)
        print(f'\tEpoch elapsed time: {elapsed_epoch_time} minutes')
        
        if patience == 0:
            print(f'\nModel learning finished due to early stopping')
            break


### Определение функции эксплуатации обученной модели

In [314]:
def evaluate(model: Seq2SeqModel, sentence: str, max_seq_len=10):
    input_tensor = lemm_text_transformer.text_to_tensor(sentence, max_seq_len).to(device)
    input_tensor = torch.transpose(input_tensor, 1, 0)
    sos_idx = lemm_text_transformer.special_tokens_to_idx.get('<SOS>')
    eos_idx = lemm_text_transformer.special_tokens_to_idx.get('<EOS>')
    
    with torch.no_grad():
        model.eval()
        hidden, cell = model.encoder(input_tensor)
        
        predicted_indexes = [sos_idx]
        
#         while True:
#             prev_idx = torch.tensor([predicted_indexes[-1]], dtype=torch.long, device=device)
            
#             output, hidden, cell = model.decoder(prev_idx, hidden, cell)
#             output = output.squeeze(0)
            
#             best_prediction = output.argmax(dim=1).item()
            
#             if best_prediction == eos_idx:
#                 break
            
#             predicted_indexes.append(best_prediction)
                       
        
        for _ in range(1, max_seq_len):
            prev_idx = torch.tensor([predicted_indexes[-1]], dtype=torch.long, device=device)
            
            output, hidden, cell = model.decoder(prev_idx, hidden, cell)
            output = output.squeeze(0)
            
            best_prediction = output.argmax(dim=1).item()
            
            if best_prediction == eos_idx:
                break
                
            predicted_indexes.append(best_prediction)
        
    predicted_tokens = [orig_text_transformer.vocab.idx_to_token(idx) for idx in predicted_indexes]
    return predicted_tokens[1:]

In [34]:
# train(model, optimizer, criterion, train_loader, val_loader, test_loader, epochs_amount, max_norm, patience, epoch)

Epochs:   0%|          | 0/15 [00:00<?, ?it/s]


Epoch [1 / 15]


Epoch training iterations:   0%|          | 0/1254 [00:00<?, ?it/s]

	Iteration #0: training loss = 11.00084400177002
	Iteration #250: training loss = 4.2196044921875
	Iteration #500: training loss = 3.2808210849761963
	Iteration #750: training loss = 2.5662546157836914
	Iteration #1000: training loss = 2.184664726257324
	Iteration #1250: training loss = 1.822027325630188


Epoch validating iterations:   0%|          | 0/69 [00:00<?, ?it/s]

	Validation loss = 1.7900535351988198
	Input: ['<SOS>', 'я', 'всегда', 'ударять', 'по', 'мяч', '.', '<EOS>', '<PAD>', '<PAD>']
	Output: ['я', 'всегда', 'скучаю', 'по', '<ДАННЫЕ_УДАЛЕНЫ>', '.']
	Target: ['<SOS>', 'я', 'всегда', 'ударяю', 'по', 'мячу', '.', '<EOS>', '<PAD>', '<PAD>']
	Epoch elapsed time: 5.984 minutes

Epoch [2 / 15]


Epoch training iterations:   0%|          | 0/1254 [00:00<?, ?it/s]

	Iteration #0: training loss = 2.244800090789795
	Iteration #250: training loss = 1.7715039253234863
	Iteration #500: training loss = 1.6209949254989624
	Iteration #750: training loss = 1.2364271879196167
	Iteration #1000: training loss = 1.2012498378753662
	Iteration #1250: training loss = 1.0096397399902344


Epoch validating iterations:   0%|          | 0/69 [00:00<?, ?it/s]

	Validation loss = 1.0527670988138171
	Input: ['<SOS>', 'девушка', 'зажмуриться', 'от', 'ужас', '.', '<EOS>', '<PAD>', '<PAD>', '<PAD>']
	Output: ['девушка', 'зажмурилась', 'от', 'ужаса', '.']
	Target: ['<SOS>', 'девушка', 'зажмурилась', 'от', 'ужаса', '.', '<EOS>', '<PAD>', '<PAD>', '<PAD>']
	Epoch elapsed time: 6.182 minutes

Epoch [3 / 15]


Epoch training iterations:   0%|          | 0/1254 [00:00<?, ?it/s]

	Iteration #0: training loss = 1.3237749338150024
	Iteration #250: training loss = 0.9848141074180603
	Iteration #500: training loss = 0.9563255310058594
	Iteration #750: training loss = 0.7145868539810181
	Iteration #1000: training loss = 0.741412341594696
	Iteration #1250: training loss = 0.6275113224983215


Epoch validating iterations:   0%|          | 0/69 [00:00<?, ?it/s]

	Validation loss = 0.7826366234516752
	Input: ['<SOS>', 'я', 'присоединиться', 'к', 'масса', '.', '<EOS>', '<PAD>', '<PAD>', '<PAD>']
	Output: ['я', 'присоединился', 'к', '<ДАННЫЕ_УДАЛЕНЫ>', '.']
	Target: ['<SOS>', 'я', 'присоединился', 'к', 'массе', '.', '<EOS>', '<PAD>', '<PAD>', '<PAD>']
	Epoch elapsed time: 6.195 minutes

Epoch [4 / 15]


Epoch training iterations:   0%|          | 0/1254 [00:00<?, ?it/s]

	Iteration #0: training loss = 0.8233298063278198
	Iteration #250: training loss = 0.5715072154998779
	Iteration #500: training loss = 0.6232591867446899
	Iteration #750: training loss = 0.4912452697753906
	Iteration #1000: training loss = 0.4703558683395386
	Iteration #1250: training loss = 0.4086615741252899


Epoch validating iterations:   0%|          | 0/69 [00:00<?, ?it/s]

	Validation loss = 0.665241299332052
	Input: ['<SOS>', 'сашка', 'взять', 'в', 'лиловый', 'оправа', '.', '<EOS>', '<PAD>', '<PAD>']
	Output: ['сашка', 'взял', 'в', '<ДАННЫЕ_УДАЛЕНЫ>', 'техникуме', '.']
	Target: ['<SOS>', 'сашка', 'взял', 'в', 'лиловой', '<ДАННЫЕ_УДАЛЕНЫ>', '.', '<EOS>', '<PAD>', '<PAD>']
	Epoch elapsed time: 6.191 minutes

Epoch [5 / 15]


Epoch training iterations:   0%|          | 0/1254 [00:00<?, ?it/s]

	Iteration #0: training loss = 0.5101946592330933
	Iteration #250: training loss = 0.3305533826351166
	Iteration #500: training loss = 0.4313153326511383
	Iteration #750: training loss = 0.326799601316452
	Iteration #1000: training loss = 0.3459184169769287
	Iteration #1250: training loss = 0.2728794813156128


Epoch validating iterations:   0%|          | 0/69 [00:00<?, ?it/s]

	Validation loss = 0.6086651566235916
	Input: ['<SOS>', 'капитан', 'повернуться', 'к', 'энн', '.', '<EOS>', '<PAD>', '<PAD>', '<PAD>']
	Output: ['капитан', 'повернулся', 'к', 'энн', '.']
	Target: ['<SOS>', 'капитан', 'повернулся', 'к', 'энн', '.', '<EOS>', '<PAD>', '<PAD>', '<PAD>']
	Epoch elapsed time: 6.165 minutes

Epoch [6 / 15]


Epoch training iterations:   0%|          | 0/1254 [00:00<?, ?it/s]

	Iteration #0: training loss = 0.33056730031967163
	Iteration #250: training loss = 0.20200130343437195
	Iteration #500: training loss = 0.2887400686740875
	Iteration #750: training loss = 0.247162863612175
	Iteration #1000: training loss = 0.23237352073192596
	Iteration #1250: training loss = 0.2136102318763733


Epoch validating iterations:   0%|          | 0/69 [00:00<?, ?it/s]

	Validation loss = 0.5787783321263134
	Input: ['<SOS>', 'он', 'искоса', 'поглядеть', 'на', 'гермиона', '.', '<EOS>', '<PAD>', '<PAD>']
	Output: ['он', 'искоса', 'поглядел', 'на', 'гермиону', '.']
	Target: ['<SOS>', 'он', 'искоса', 'поглядел', 'на', 'гермиону', '.', '<EOS>', '<PAD>', '<PAD>']
	Epoch elapsed time: 6.171 minutes

Epoch [7 / 15]


Epoch training iterations:   0%|          | 0/1254 [00:00<?, ?it/s]

	Iteration #0: training loss = 0.22271345555782318
	Iteration #250: training loss = 0.1454756259918213
	Iteration #500: training loss = 0.2203332483768463
	Iteration #750: training loss = 0.1709129810333252
	Iteration #1000: training loss = 0.20406152307987213
	Iteration #1250: training loss = 0.1571664810180664


Epoch validating iterations:   0%|          | 0/69 [00:00<?, ?it/s]

	Validation loss = 0.5687042699343916
	Input: ['<SOS>', 'браун', 'резко', 'повернуться', 'к', 'алексис', '.', '<EOS>', '<PAD>', '<PAD>']
	Output: ['браун', 'резко', 'повернулся', 'к', '<ДАННЫЕ_УДАЛЕНЫ>', '.']
	Target: ['<SOS>', 'браун', 'резко', 'повернулся', 'к', '<ДАННЫЕ_УДАЛЕНЫ>', '.', '<EOS>', '<PAD>', '<PAD>']
	Epoch elapsed time: 6.18 minutes

Epoch [8 / 15]


Epoch training iterations:   0%|          | 0/1254 [00:00<?, ?it/s]

	Iteration #0: training loss = 0.1591860055923462
	Iteration #250: training loss = 0.12383783608675003
	Iteration #500: training loss = 0.1682780385017395
	Iteration #750: training loss = 0.14022408425807953
	Iteration #1000: training loss = 0.162378191947937
	Iteration #1250: training loss = 0.11923165619373322


Epoch validating iterations:   0%|          | 0/69 [00:00<?, ?it/s]

	Validation loss = 0.557683726583702
	Input: ['<SOS>', 'саманта', 'побежать', 'в', 'лаборатория', '.', '<EOS>', '<PAD>', '<PAD>', '<PAD>']
	Output: ['саманта', 'побежала', 'в', 'лабораторию', '.']
	Target: ['<SOS>', 'саманта', 'побежала', 'в', 'лабораторию', '.', '<EOS>', '<PAD>', '<PAD>', '<PAD>']
	Epoch elapsed time: 6.175 minutes

Epoch [9 / 15]


Epoch training iterations:   0%|          | 0/1254 [00:00<?, ?it/s]

	Iteration #0: training loss = 0.14510950446128845
	Iteration #250: training loss = 0.08425789326429367
	Iteration #500: training loss = 0.16126611828804016
	Iteration #750: training loss = 0.12940996885299683
	Iteration #1000: training loss = 0.13750624656677246
	Iteration #1250: training loss = 0.10021355748176575


Epoch validating iterations:   0%|          | 0/69 [00:00<?, ?it/s]

	Validation loss = 0.555608161981555
	Input: ['<SOS>', 'я', 'медленно', 'идти', 'по', 'городок', '.', '<EOS>', '<PAD>', '<PAD>']
	Output: ['я', 'медленно', 'шел', 'по', 'городку', '.']
	Target: ['<SOS>', 'я', 'медленно', 'шел', 'по', 'городку', '.', '<EOS>', '<PAD>', '<PAD>']
	Epoch elapsed time: 6.163 minutes

Epoch [10 / 15]


Epoch training iterations:   0%|          | 0/1254 [00:00<?, ?it/s]

	Iteration #0: training loss = 0.1281607300043106
	Iteration #250: training loss = 0.08394954353570938
	Iteration #500: training loss = 0.12871237099170685
	Iteration #750: training loss = 0.10635174065828323
	Iteration #1000: training loss = 0.1069166287779808
	Iteration #1250: training loss = 0.10388542711734772


Epoch validating iterations:   0%|          | 0/69 [00:00<?, ?it/s]

	Validation loss = 0.5538733795933102
	Input: ['<SOS>', 'он', 'повернуться', 'в', 'седло', '.', '<EOS>', '<PAD>', '<PAD>', '<PAD>']
	Output: ['он', 'повернулся', 'в', 'седле', '.']
	Target: ['<SOS>', 'он', 'повернулся', 'в', 'седле', '.', '<EOS>', '<PAD>', '<PAD>', '<PAD>']
	Epoch elapsed time: 6.174 minutes

Epoch [11 / 15]


Epoch training iterations:   0%|          | 0/1254 [00:00<?, ?it/s]

	Iteration #0: training loss = 0.0969495102763176
	Iteration #250: training loss = 0.06817984580993652
	Iteration #500: training loss = 0.0891454815864563
	Iteration #750: training loss = 0.09971724450588226
	Iteration #1000: training loss = 0.10060471296310425
	Iteration #1250: training loss = 0.09673718363046646


Epoch validating iterations:   0%|          | 0/69 [00:00<?, ?it/s]

	Validation loss = 0.5534522308819536
	Input: ['<SOS>', 'он', 'смотреть', 'в', 'темнота', ';', '<EOS>', '<PAD>', '<PAD>', '<PAD>']
	Output: ['он', 'смотрел', 'в', 'темноту', ';']
	Target: ['<SOS>', 'он', 'смотрел', 'в', 'темноту', ';', '<EOS>', '<PAD>', '<PAD>', '<PAD>']
	Epoch elapsed time: 6.136 minutes

Epoch [12 / 15]


Epoch training iterations:   0%|          | 0/1254 [00:00<?, ?it/s]

	Iteration #0: training loss = 0.10112450271844864
	Iteration #250: training loss = 0.060384444892406464
	Iteration #500: training loss = 0.0989241972565651
	Iteration #750: training loss = 0.09463158249855042
	Iteration #1000: training loss = 0.09263592213392258
	Iteration #1250: training loss = 0.09083976596593857


Epoch validating iterations:   0%|          | 0/69 [00:00<?, ?it/s]

	Validation loss = 0.5520763064640156
	Input: ['<SOS>', 'виктор', 'решиться', 'на', 'крайний', 'средство', '.', '<EOS>', '<PAD>', '<PAD>']
	Output: ['виктор', 'решился', 'на', 'крайнее', 'средство', '.']
	Target: ['<SOS>', 'виктор', 'решился', 'на', 'крайнее', 'средство', '.', '<EOS>', '<PAD>', '<PAD>']
	Epoch elapsed time: 6.17 minutes

Epoch [13 / 15]


Epoch training iterations:   0%|          | 0/1254 [00:00<?, ?it/s]

	Iteration #0: training loss = 0.08408451080322266
	Iteration #250: training loss = 0.058484990149736404
	Iteration #500: training loss = 0.10459940135478973
	Iteration #750: training loss = 0.08478805422782898
	Iteration #1000: training loss = 0.09069288522005081
	Iteration #1250: training loss = 0.08381370455026627


Epoch validating iterations:   0%|          | 0/69 [00:00<?, ?it/s]

	Validation loss = 0.5527546777241472
	Input: ['<SOS>', 'он', 'взглянуть', 'на', 'нож', '.', '<EOS>', '<PAD>', '<PAD>', '<PAD>']
	Output: ['он', 'взглянул', 'на', 'нож', '.']
	Target: ['<SOS>', 'он', 'взглянул', 'на', 'нож', '.', '<EOS>', '<PAD>', '<PAD>', '<PAD>']
	Epoch elapsed time: 5.876 minutes

Epoch [14 / 15]


Epoch training iterations:   0%|          | 0/1254 [00:00<?, ?it/s]

	Iteration #0: training loss = 0.07135278731584549
	Iteration #250: training loss = 0.04725491255521774
	Iteration #500: training loss = 0.08378446102142334
	Iteration #750: training loss = 0.07651790976524353
	Iteration #1000: training loss = 0.07493138313293457
	Iteration #1250: training loss = 0.07616628706455231


Epoch validating iterations:   0%|          | 0/69 [00:00<?, ?it/s]

	Validation loss = 0.5558792408825695
	Input: ['<SOS>', 'они', 'двигаться', 'вслед', 'за', 'лодка', '.', '<EOS>', '<PAD>', '<PAD>']
	Output: ['они', 'двигались', 'вслед', 'за', 'лодкой', '.']
	Target: ['<SOS>', 'они', 'двигались', 'вслед', 'за', 'лодкой', '.', '<EOS>', '<PAD>', '<PAD>']
	Epoch elapsed time: 5.873 minutes

Epoch [15 / 15]


Epoch training iterations:   0%|          | 0/1254 [00:00<?, ?it/s]

	Iteration #0: training loss = 0.07581671327352524
	Iteration #250: training loss = 0.04735279455780983
	Iteration #500: training loss = 0.08201677352190018
	Iteration #750: training loss = 0.07472237199544907
	Iteration #1000: training loss = 0.07938290387392044
	Iteration #1250: training loss = 0.05841830000281334


Epoch validating iterations:   0%|          | 0/69 [00:00<?, ?it/s]

	Validation loss = 0.5610537719035494
	Input: ['<SOS>', 'саша', 'отойти', 'в', 'сторона', '.', '<EOS>', '<PAD>', '<PAD>', '<PAD>']
	Output: ['саша', 'отошла', 'в', 'сторону', '.']
	Target: ['<SOS>', 'саша', 'отошел', 'в', 'сторону', '.', '<EOS>', '<PAD>', '<PAD>', '<PAD>']
	Epoch elapsed time: 5.871 minutes

Model learning finished due to early stopping


In [35]:
# load_model(model, optimizer, criterion, model_path + model_name)
# model.eval()

In [36]:
# import gc
# del model
# del optimizer
# gc.collect()
# torch.cuda.empty_cache()
# gc.collect()

In [319]:
# test_sample = train_df.sample(100)
# test_input = test_sample.lemm_texts.to_list()
# test_target = test_sample.orig_texts.to_list()
# test_pair = list(zip(test_input, test_target))

In [327]:
# for input_sentence, target_sentence in test_pair[:10]:
#     model_output = evaluate(model, input_sentence)
#     print(f'Input: {input_sentence}')
#     print(f'Output: {model_output}')
#     print(f'Target: {target_sentence}')
#     print('\n')

Input: он нуждаться в дракон.
Output: ['он', 'нуждался', 'в', 'драконе', '.']
Target: он нуждался в драконе.


Input: я вспотеть от один мысль.
Output: ['я', 'вспотел', 'от', 'одной', 'мысли', '.']
Target: я вспотел от одной мысли.


Input: мы стоить на место!
Output: ['мы', 'стоим', 'на', 'месте', '!']
Target: мы стоим на месте!


Input: она идти вдоль берег.
Output: ['она', 'шла', 'вдоль', 'берега', '.']
Target: она шла вдоль берега.


Input: он выйти из убежище.
Output: ['он', 'вышел', 'из', 'убежища', '.']
Target: он вышел из убежища.


Input: она заворачиваться в сторона.
Output: ['она', '<ДАННЫЕ_УДАЛЕНЫ>', 'в', 'сторону', '.']
Target: она заворачивалась в сторону.


Input: они никогда не появляться в школа.
Output: ['они', 'никогда', 'не', 'появлялись', 'в', 'школе', '.']
Target: они никогда не появлялись в школе.


Input: разведчик растаять в небо.
Output: ['разведчик', 'растаял', 'в', 'небе', '.']
Target: разведчик растаял в небе.


Input: мы работать с копия.
Output: ['мы', 'р