In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib
from sklearn.decomposition import PCA
from sklearn.decomposition import IncrementalPCA
import torch.nn.functional as F
import os
import re
import nltk
from nltk.tokenize import sent_tokenize
nltk.download('punkt')
nltk.download('punkt_tab')
from nltk.tokenize import word_tokenize
from navec import Navec
import scipy.spatial.distance

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\petro\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\petro\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


In [14]:
lit_vectors_np = np.load("vectorized_lit.npy")
conv_vectors_np = np.load("vectorized_tg.npy")

# Первая версия модели и гиперпараметров

In [29]:
vector_dimension = 400 
batch_size = 64  
learning_rate_generators = 0.0002 
learning_rate_discriminators = 0.0001 
num_epochs = 100  
lambda_cycle = 5 
lambda_identity = 5 
beta1_adam = 0.5 
beta2_adam = 0.999  
val_split_ratio = 0.2  
random_seed = 42  
lstm_hidden_dim_generator = 512 
lstm_num_layers_generator = 1 #
lstm_hidden_dim_discriminator = 256
lstm_num_layers_discriminator = 1 
discriminator_layer_size_hidden = 128  
leaky_relu_negative_slope = 0.2 
num_dataloader_workers = 0  
print_batch_interval = 500 
plot_loss_interval_epochs = 5 
output_dir = model_output_dir = "version_1" 
discriminator_dropout_rate = 0.2 
discriminator_weight_decay = 1e-5 
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 

In [33]:
class Generator(nn.Module):
    def __init__(self, input_dim, output_dim, lstm_hidden_dim=lstm_hidden_dim_generator, num_layers=lstm_num_layers_generator): 
        super(Generator, self).__init__()
        self.lstm = nn.LSTM(input_size=input_dim, hidden_size=lstm_hidden_dim, num_layers=num_layers, batch_first=True)
        self.linear_out = nn.Linear(lstm_hidden_dim, output_dim)

    def forward(self, x):
        x = x.unsqueeze(1)
        lstm_out, _ = self.lstm(x)
        output = self.linear_out(lstm_out[:, -1, :])
        return output


class Discriminator(nn.Module):
    def __init__(self, input_dim, lstm_hidden_dim=lstm_hidden_dim_discriminator, num_layers=lstm_num_layers_discriminator, negative_slope=leaky_relu_negative_slope, layer_size_hidden=discriminator_layer_size_hidden): 
        super(Discriminator, self).__init__()
        self.lstm = nn.LSTM(input_size=input_dim, hidden_size=lstm_hidden_dim, num_layers=num_layers, batch_first=True)
        self.linear_hidden = nn.Linear(lstm_hidden_dim, layer_size_hidden) 
        self.leaky_relu = nn.LeakyReLU(negative_slope)
        self.linear_out = nn.Linear(layer_size_hidden, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = x.unsqueeze(1)
        lstm_out, _ = self.lstm(x)
        hidden_out = self.leaky_relu(self.linear_hidden(lstm_out[:, -1, :])) 
        output = self.linear_out(hidden_out)
        return self.sigmoid(output)

In [5]:
generator_LitToConv = Generator(vector_dimension, vector_dimension).to(device) 
generator_ConvToLit = Generator(vector_dimension, vector_dimension).to(device) 
discriminator_Literary = Discriminator(vector_dimension).to(device) 
discriminator_Conversational = Discriminator(vector_dimension).to(device)

optimizer_G = optim.Adam(list(generator_LitToConv.parameters()) + list(generator_ConvToLit.parameters()),
                         lr=learning_rate_generators, betas=(beta1_adam, beta2_adam))
optimizer_D_lit = optim.Adam(discriminator_Literary.parameters(), lr=learning_rate_discriminators, betas=(beta1_adam, beta2_adam), weight_decay=discriminator_weight_decay)
optimizer_D_conv = optim.Adam(discriminator_Conversational.parameters(), lr=learning_rate_discriminators, betas=(beta1_adam, beta2_adam), weight_decay=discriminator_weight_decay) 

# Вторая версия модели и гиперпараметров

In [6]:
vector_dimension = 400 
batch_size = 64  
learning_rate_generators = 0.00025 
learning_rate_discriminators = 0.00005 
num_epochs = 100  
lambda_cycle = 7  
lambda_identity = 2  
beta1_adam = 0.5  
beta2_adam = 0.999  # 
val_split_ratio = 0.2  
random_seed = 42  
lstm_hidden_dim_generator = 256
lstm_num_layers_generator = 2 
lstm_hidden_dim_discriminator = 256 
lstm_num_layers_discriminator = 1 
discriminator_layer_size_hidden = 128  
leaky_relu_negative_slope = 0.2  
num_dataloader_workers = 0  
print_batch_interval = 500 #
plot_loss_interval_epochs = 5
output_dir = model_output_dir = "version_2" 
discriminator_dropout_rate = 0.2
discriminator_weight_decay = 1e-5 
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 

In [7]:
class Generator(nn.Module):
    """Генератор для CycleGAN на основе LSTM."""
    def __init__(self, input_dim, output_dim, lstm_hidden_dim=lstm_hidden_dim_generator, num_layers=lstm_num_layers_generator):
        super(Generator, self).__init__()
        self.lstm = nn.LSTM(input_size=input_dim, hidden_size=lstm_hidden_dim, num_layers=num_layers, batch_first=True)
        self.linear_out = nn.Linear(lstm_hidden_dim, output_dim)

    def forward(self, x):
        x = x.unsqueeze(1)
        lstm_out, _ = self.lstm(x)
        output = self.linear_out(lstm_out[:, -1, :])
        return output


class Discriminator(nn.Module):
    def __init__(self, input_dim, lstm_hidden_dim, num_layers, negative_slope, layer_size_hidden, dropout_rate): 
        super(Discriminator, self).__init__()
        self.lstm = nn.LSTM(input_size=input_dim, hidden_size=lstm_hidden_dim, num_layers=num_layers, batch_first=True)
        self.linear_hidden = nn.Linear(lstm_hidden_dim, layer_size_hidden) 
        self.leaky_relu = nn.LeakyReLU(negative_slope) 
        self.dropout = nn.Dropout(dropout_rate) 
        self.linear_out = nn.Linear(layer_size_hidden, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = x.unsqueeze(1)
        lstm_out, _ = self.lstm(x)
        hidden_out = self.leaky_relu(self.linear_hidden(lstm_out[:, -1, :]))
        hidden_out_dropout = self.dropout(hidden_out) 
        output = self.linear_out(hidden_out_dropout) 
        return self.sigmoid(output)

In [47]:
generator_LitToConv = Generator(vector_dimension, vector_dimension, lstm_hidden_dim_generator, lstm_num_layers_generator).to(device)
generator_ConvToLit = Generator(vector_dimension, vector_dimension, lstm_hidden_dim_generator, lstm_num_layers_generator).to(device)
discriminator_Literary = Discriminator(vector_dimension, lstm_hidden_dim_discriminator, lstm_num_layers_discriminator, leaky_relu_negative_slope, discriminator_layer_size_hidden, discriminator_dropout_rate).to(device)
discriminator_Conversational = Discriminator(vector_dimension, lstm_hidden_dim_discriminator, lstm_num_layers_discriminator, leaky_relu_negative_slope, discriminator_layer_size_hidden, discriminator_dropout_rate).to(device)

optimizer_G = optim.Adam(list(generator_LitToConv.parameters()) + list(generator_ConvToLit.parameters()),
                         lr=learning_rate_generators, betas=(beta1_adam, beta2_adam))
optimizer_D_lit = optim.Adam(discriminator_Literary.parameters(), lr=learning_rate_discriminators, betas=(beta1_adam, beta2_adam), weight_decay=discriminator_weight_decay)
optimizer_D_conv = optim.Adam(discriminator_Conversational.parameters(), lr=learning_rate_discriminators, betas=(beta1_adam, beta2_adam), weight_decay=discriminator_weight_decay)

# Третья версия модели и гиперпараметров

In [6]:
vector_dimension = 400 
batch_size = 64  
learning_rate_generators = 0.00025 
learning_rate_discriminators = 0.00005 
num_epochs = 100  
lambda_cycle = 10 
lambda_identity = 3.5 
beta1_adam = 0.5  
beta2_adam = 0.999 
val_split_ratio = 0.2  
random_seed = 42  
lstm_hidden_dim_generator = 256 
lstm_num_layers_generator = 2 
lstm_hidden_dim_discriminator = 256 
lstm_num_layers_discriminator = 1
discriminator_layer_size_hidden = 128  
leaky_relu_negative_slope = 0.2  
num_dataloader_workers = 0 
print_batch_interval = 500 
plot_loss_interval_epochs = 5 
lambda_cycle_l1 = 0.5    
lambda_cycle_cosine = 0.5   
lambda_identity_l1 = 0.5     
lambda_identity_cosine = 0.5
output_dir = model_output_dir = "version_3"
discriminator_dropout_rate = 0.2 
discriminator_weight_decay = 1e-5 
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 

In [7]:
class Generator(nn.Module):
    """Генератор для CycleGAN на основе LSTM."""
    def __init__(self, input_dim, output_dim, lstm_hidden_dim=lstm_hidden_dim_generator, num_layers=lstm_num_layers_generator):
        super(Generator, self).__init__()
        self.lstm = nn.LSTM(input_size=input_dim, hidden_size=lstm_hidden_dim, num_layers=num_layers, batch_first=True)
        self.linear_out = nn.Linear(lstm_hidden_dim, output_dim)

    def forward(self, x):
        x = x.unsqueeze(1)
        lstm_out, _ = self.lstm(x)
        output = self.linear_out(lstm_out[:, -1, :])
        return output


class Discriminator(nn.Module):
    def __init__(self, input_dim, lstm_hidden_dim, num_layers, negative_slope, layer_size_hidden, dropout_rate): 
        super(Discriminator, self).__init__()
        self.lstm = nn.LSTM(input_size=input_dim, hidden_size=lstm_hidden_dim, num_layers=num_layers, batch_first=True) 
        self.linear_hidden = nn.Linear(lstm_hidden_dim, layer_size_hidden) 
        self.leaky_relu = nn.LeakyReLU(negative_slope) 
        self.dropout = nn.Dropout(dropout_rate) 
        self.linear_out = nn.Linear(layer_size_hidden, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = x.unsqueeze(1)
        lstm_out, _ = self.lstm(x)
        hidden_out = self.leaky_relu(self.linear_hidden(lstm_out[:, -1, :]))
        hidden_out_dropout = self.dropout(hidden_out)
        output = self.linear_out(hidden_out_dropout) 
        return self.sigmoid(output)

In [8]:
generator_LitToConv = Generator(vector_dimension, vector_dimension, lstm_hidden_dim_generator, lstm_num_layers_generator).to(device)
generator_ConvToLit = Generator(vector_dimension, vector_dimension, lstm_hidden_dim_generator, lstm_num_layers_generator).to(device)
discriminator_Literary = Discriminator(vector_dimension, lstm_hidden_dim_discriminator, lstm_num_layers_discriminator, leaky_relu_negative_slope, discriminator_layer_size_hidden, discriminator_dropout_rate).to(device)
discriminator_Conversational = Discriminator(vector_dimension, lstm_hidden_dim_discriminator, lstm_num_layers_discriminator, leaky_relu_negative_slope, discriminator_layer_size_hidden, discriminator_dropout_rate).to(device)

optimizer_G = optim.Adam(list(generator_LitToConv.parameters()) + list(generator_ConvToLit.parameters()),
                         lr=learning_rate_generators, betas=(beta1_adam, beta2_adam))
optimizer_D_lit = optim.Adam(discriminator_Literary.parameters(), lr=learning_rate_discriminators, betas=(beta1_adam, beta2_adam), weight_decay=discriminator_weight_decay)
optimizer_D_conv = optim.Adam(discriminator_Conversational.parameters(), lr=learning_rate_discriminators, betas=(beta1_adam, beta2_adam), weight_decay=discriminator_weight_decay)

# Четвёртая версия модели и гиперпараметров

In [22]:
vector_dimension = 400 
batch_size = 64  
learning_rate_generators = 0.0002 
learning_rate_discriminators = 0.00005 
num_epochs = 100  
lambda_cycle = 4 
lambda_identity = 4 
beta1_adam = 0.5  
beta2_adam = 0.999  
val_split_ratio = 0.2  
random_seed = 42  #
lstm_hidden_dim_generator = 400 
lstm_num_layers_generator = 3 
lstm_hidden_dim_discriminator = 160 
lstm_num_layers_discriminator = 1 
discriminator_layer_size_hidden = 40  
leaky_relu_negative_slope = 0.2 
num_dataloader_workers = 0  
print_batch_interval = 500 
plot_loss_interval_epochs = 5 
lambda_cycle_l1 = 0.5      
lambda_cycle_cosine = 0.5   
lambda_identity_l1 = 0.5   
lambda_identity_cosine = 0.5
output_dir = model_output_dir = "version_4" 
discriminator_dropout_rate = 0.4 
discriminator_weight_decay = 1e-3 
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 

In [23]:
class Generator(nn.Module):
    def __init__(self, input_dim, output_dim, lstm_hidden_dim=lstm_hidden_dim_generator, num_layers=lstm_num_layers_generator): 
        super(Generator, self).__init__()
        self.lstm = nn.LSTM(input_size=input_dim, hidden_size=lstm_hidden_dim, num_layers=num_layers, batch_first=True)
        self.linear_out = nn.Linear(lstm_hidden_dim, output_dim)

    def forward(self, x):
        x = x.unsqueeze(1)
        lstm_out, _ = self.lstm(x)
        output = self.linear_out(lstm_out[:, -1, :])
        return output


class Discriminator(nn.Module):
    def __init__(self, input_dim, lstm_hidden_dim, num_layers, negative_slope, layer_size_hidden, dropout_rate): 
        super(Discriminator, self).__init__()
        self.lstm = nn.LSTM(input_size=input_dim, hidden_size=lstm_hidden_dim, num_layers=num_layers, batch_first=True) 
        self.linear_hidden = nn.Linear(lstm_hidden_dim, layer_size_hidden) 
        self.leaky_relu = nn.LeakyReLU(negative_slope) 
        self.dropout = nn.Dropout(dropout_rate) 
        self.linear_out = nn.Linear(layer_size_hidden, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = x.unsqueeze(1)
        lstm_out, _ = self.lstm(x)
        hidden_out = self.leaky_relu(self.linear_hidden(lstm_out[:, -1, :]))
        hidden_out_dropout = self.dropout(hidden_out) 
        output = self.linear_out(hidden_out_dropout)
        return self.sigmoid(output)

In [24]:
generator_LitToConv = Generator(vector_dimension, vector_dimension, lstm_hidden_dim_generator, lstm_num_layers_generator).to(device)
generator_ConvToLit = Generator(vector_dimension, vector_dimension, lstm_hidden_dim_generator, lstm_num_layers_generator).to(device)
discriminator_Literary = Discriminator(vector_dimension, lstm_hidden_dim_discriminator, lstm_num_layers_discriminator, leaky_relu_negative_slope, discriminator_layer_size_hidden, discriminator_dropout_rate).to(device)
discriminator_Conversational = Discriminator(vector_dimension, lstm_hidden_dim_discriminator, lstm_num_layers_discriminator, leaky_relu_negative_slope, discriminator_layer_size_hidden, discriminator_dropout_rate).to(device)

optimizer_G = optim.Adam(list(generator_LitToConv.parameters()) + list(generator_ConvToLit.parameters()),
                         lr=learning_rate_generators, betas=(beta1_adam, beta2_adam))
optimizer_D_lit = optim.Adam(discriminator_Literary.parameters(), lr=learning_rate_discriminators, betas=(beta1_adam, beta2_adam), weight_decay=discriminator_weight_decay)
optimizer_D_conv = optim.Adam(discriminator_Conversational.parameters(), lr=learning_rate_discriminators, betas=(beta1_adam, beta2_adam), weight_decay=discriminator_weight_decay)

# Ф-ции сохранения моделей и графиков

In [25]:
def plot_losses(history, epoch, output_dir=output_dir):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    epochs_range = range(1, epoch + 1)

    print("Debugging plot_losses:")
    print(f"Length epochs_range: {len(epochs_range)}")
    for key, value in history.items():
        print(f"Length history['{key}']: {len(value)}")


    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1)
    sns.lineplot(x=epochs_range, y=history['D_lit_train'], label='D_lit_train')
    sns.lineplot(x=epochs_range, y=history['D_lit_val'], label='D_lit_val')
    plt.title('Loss дискриминатора литературного стиля')
    plt.xlabel('Эпоха')
    plt.ylabel('Loss')
    plt.legend()

    plt.subplot(1, 2, 2)
    sns.lineplot(x=epochs_range, y=history['D_conv_train'], label='D_conv_train')
    sns.lineplot(x=epochs_range, y=history['D_conv_val'], label='D_conv_val')
    plt.title('Loss дискриминатора разговорного стиля')
    plt.xlabel('Эпоха')
    plt.ylabel('Loss')
    plt.legend()
    plt.tight_layout()
    plt.savefig(os.path.join(output_dir, f'discriminators_losses.png'))
    plt.close()

    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1)
    sns.lineplot(x=epochs_range, y=history['G_train'], label='G_train')
    sns.lineplot(x=epochs_range, y=history['G_val'], label='G_val')
    plt.title('Loss генераторов')
    plt.xlabel('Эпоха')
    plt.ylabel('Loss')
    plt.legend()

    plt.subplot(1, 2, 2)
    sns.lineplot(x=epochs_range, y=history['cycle_train'], label='Cycle_train')
    sns.lineplot(x=epochs_range, y=history['cycle_val'], label='Cycle_val')
    plt.title('Cycle Consistency Loss')
    plt.xlabel('Эпоха')
    plt.ylabel('Loss')
    plt.legend()
    plt.tight_layout()
    plt.savefig(os.path.join(output_dir, f'generators_and_cycle_losses.png'))
    plt.close()

    plt.figure(figsize=(6, 5))
    sns.lineplot(x=epochs_range, y=history['identity_train'], label='Identity_train')
    sns.lineplot(x=epochs_range, y=history['identity_val'], label='Identity_val')
    plt.title('Identity Loss')
    plt.xlabel('Эпоха')
    plt.ylabel('Loss')
    plt.legend()
    plt.tight_layout()
    plt.savefig(os.path.join(output_dir, f'identity_losses.png'))
    plt.close()


def save_models(epoch, generator_lit_to_conv, generator_conv_to_lit, discriminator_literary, discriminator_conversational, optimizer_g, optimizer_d_lit, optimizer_d_conv, output_dir=model_output_dir):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    torch.save(generator_lit_to_conv.state_dict(), os.path.join(output_dir, f'generator_lit_to_conv.pth')) 
    torch.save(generator_conv_to_lit.state_dict(), os.path.join(output_dir, f'generator_conv_to_lit.pth')) 
    torch.save(discriminator_literary.state_dict(), os.path.join(output_dir, f'discriminator_literary.pth')) 
    torch.save(discriminator_conversational.state_dict(), os.path.join(output_dir, f'discriminator_conversational.pth')) 
    torch.save(optimizer_g.state_dict(), os.path.join(output_dir, f'optimizer_g.pth')) 
    torch.save(optimizer_d_lit.state_dict(), os.path.join(output_dir, f'optimizer_d_lit.pth')) 
    torch.save(optimizer_d_conv.state_dict(), os.path.join(output_dir, f'optimizer_d_conv.pth')) 
    print(f"Модели перезаписаны в {output_dir} на {epoch} эпохе") 

# Общая подготовка к обучению

In [26]:
class SentenceDataset(torch.utils.data.Dataset):
    def __init__(self, literary_vectors, conversational_vectors):
        self.literary_data = torch.tensor(literary_vectors, dtype=torch.float32)
        self.conversational_data = torch.tensor(conversational_vectors, dtype=torch.float32)
        self.dataset_len = len(self.literary_data) 

    def __len__(self):
        return self.dataset_len 

    def __getitem__(self, idx):
        return self.literary_data[idx], self.conversational_data[idx] 

In [27]:
lit_train, lit_val, conv_train, conv_val = train_test_split(
    lit_vectors_np, conv_vectors_np, test_size=val_split_ratio, random_state=random_seed
)

In [28]:
train_dataset = SentenceDataset(lit_train, conv_train)
val_dataset = SentenceDataset(lit_val, conv_val)

In [29]:
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_dataloader_workers)
val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_dataloader_workers)

In [30]:
history = {'D_lit_train': [], 'D_conv_train': [], 'G_train': [], 'cycle_train': [], 'identity_train': [],
           'D_lit_val': [], 'D_conv_val': [], 'G_val': [], 'cycle_val': [], 'identity_val': []}

# Подготовка и обучение для 1 и 2 версий

In [53]:
criterion_GAN = nn.BCELoss()
criterion_cycle = nn.L1Loss()
criterion_identity = nn.L1Loss()

In [None]:
for epoch in range(num_epochs):
    generator_LitToConv.train()
    generator_ConvToLit.train()
    discriminator_Literary.train()
    discriminator_Conversational.train()

    train_loss_D_lit_accum = 0.0
    train_loss_D_conv_accum = 0.0
    train_loss_G_accum = 0.0
    train_loss_cycle_accum = 0.0
    train_loss_identity_accum = 0.0
    num_train_batches = 0

    for i, (real_literary, real_conversational) in enumerate(train_dataloader):
        real_literary = real_literary.to(device)
        real_conversational = real_conversational.to(device)

        valid = torch.ones(real_literary.size(0), 1).to(device)
        fake = torch.zeros(real_literary.size(0), 1).to(device)

        optimizer_D_lit.zero_grad()
        optimizer_D_conv.zero_grad()

        loss_D_lit_real = criterion_GAN(discriminator_Literary(real_literary), valid)
        fake_literary = generator_ConvToLit(real_conversational)
        loss_D_lit_fake = criterion_GAN(discriminator_Literary(fake_literary.detach()), fake)
        loss_D_lit = (loss_D_lit_real + loss_D_lit_fake) / 2
        loss_D_lit.backward()
        optimizer_D_lit.step()

        loss_D_conv_real = criterion_GAN(discriminator_Conversational(real_conversational), valid)
        fake_conversational = generator_LitToConv(real_literary)
        loss_D_conv_fake = criterion_GAN(discriminator_Conversational(fake_conversational.detach()), fake)
        loss_D_conv = (loss_D_conv_real + loss_D_conv_fake) / 2
        loss_D_conv.backward()
        optimizer_D_conv.step()

        optimizer_G.zero_grad()

        fake_conversational = generator_LitToConv(real_literary)
        loss_GAN_LitToConv = criterion_GAN(discriminator_Conversational(fake_conversational), valid)

        fake_literary = generator_ConvToLit(real_conversational)
        loss_GAN_ConvToLit = criterion_GAN(discriminator_Literary(fake_literary), valid)

        recovered_literary = generator_ConvToLit(fake_conversational)
        loss_cycle_lit = criterion_cycle(recovered_literary, real_literary)

        recovered_conversational = generator_LitToConv(fake_literary)
        loss_cycle_conv = criterion_cycle(recovered_conversational, real_conversational)

        loss_cycle = (loss_cycle_lit + loss_cycle_conv) / 2

        identity_conversational = generator_LitToConv(real_conversational)
        loss_identity_conv = criterion_identity(identity_conversational, real_conversational)

        identity_literary = generator_ConvToLit(real_literary)
        loss_identity_lit = criterion_identity(identity_literary, real_literary)

        loss_identity = (loss_identity_conv + loss_identity_lit) / 2

        loss_G =  (loss_GAN_LitToConv + loss_GAN_ConvToLit) + \
                  lambda_cycle * loss_cycle + \
                  lambda_identity * loss_identity
        loss_G.backward()
        optimizer_G.step()

        train_loss_D_lit_accum += loss_D_lit.item()
        train_loss_D_conv_accum += loss_D_conv.item()
        train_loss_G_accum += loss_G.item()
        train_loss_cycle_accum += loss_cycle.item()
        train_loss_identity_accum += loss_identity.item()
        num_train_batches += 1

        if i % print_batch_interval == 0:
            print(f"Эпоха [{epoch + 1}/{num_epochs}] Батч [{i}/{len(train_dataloader)}] "
                  f"D_lit_loss: {loss_D_lit.item():.4f} D_conv_loss: {loss_D_conv.item():.4f} "
                  f"G_loss: {loss_G.item():.4f}")

    generator_LitToConv.eval()
    generator_ConvToLit.eval()
    discriminator_Literary.eval()
    discriminator_Conversational.eval()

    val_loss_D_lit_accum = 0.0
    val_loss_D_conv_accum = 0.0
    val_loss_G_accum = 0.0
    val_loss_cycle_accum = 0.0
    val_loss_identity_accum = 0.0
    num_val_batches = 0

    with torch.no_grad():
        for i_val, (val_literary, val_conversational) in enumerate(val_dataloader):
            val_literary = val_literary.to(device)
            val_conversational = val_conversational.to(device)
            val_valid = torch.ones(val_literary.size(0), 1).to(device)
            val_fake = torch.zeros(val_literary.size(0), 1).to(device)

            val_loss_D_lit_real = criterion_GAN(discriminator_Literary(val_literary), val_valid)
            val_fake_literary = generator_ConvToLit(val_conversational)
            val_loss_D_lit_fake = criterion_GAN(discriminator_Literary(val_fake_literary), val_fake)
            val_loss_D_lit_val = (val_loss_D_lit_real + val_loss_D_lit_fake) / 2

            val_loss_D_conv_real = criterion_GAN(discriminator_Conversational(val_conversational), val_valid)
            val_fake_conversational = generator_LitToConv(val_literary)
            val_loss_D_conv_fake = criterion_GAN(discriminator_Conversational(val_fake_conversational), val_fake)
            val_loss_D_conv_val = (val_loss_D_conv_real + val_loss_D_conv_fake) / 2

            val_loss_GAN_LitToConv = criterion_GAN(discriminator_Conversational(val_fake_conversational), val_valid)
            val_loss_GAN_ConvToLit = criterion_GAN(discriminator_Literary(val_fake_literary), val_valid)

            val_recovered_literary = generator_ConvToLit(val_fake_conversational)
            val_loss_cycle_lit = criterion_cycle(val_recovered_literary, val_literary)
            val_recovered_conversational = generator_LitToConv(val_fake_literary)
            val_loss_cycle_conv = criterion_cycle(val_recovered_conversational, val_conversational)
            val_loss_cycle_val = (val_loss_cycle_lit + val_loss_cycle_conv) / 2

            val_identity_conversational = generator_LitToConv(val_conversational)
            val_loss_identity_conv = criterion_identity(val_identity_conversational, val_conversational)
            val_identity_literary = generator_ConvToLit(val_literary)
            val_loss_identity_lit = criterion_identity(val_identity_literary, val_literary)
            val_loss_identity_val = (val_loss_identity_conv + val_loss_identity_lit) / 2

            val_loss_G_val = (val_loss_GAN_LitToConv + val_loss_GAN_ConvToLit) + \
                            lambda_cycle * val_loss_cycle_val + \
                            lambda_identity * val_loss_identity_val

            val_loss_D_lit_accum += val_loss_D_lit_val.item()
            val_loss_D_conv_accum += val_loss_D_conv_val.item()
            val_loss_G_accum += val_loss_G_val.item()
            val_loss_cycle_accum += val_loss_cycle_val.item()
            val_loss_identity_accum += val_loss_identity_val.item()
            num_val_batches += 1

    avg_val_loss_D_lit = val_loss_D_lit_accum / num_val_batches
    avg_val_loss_D_conv = val_loss_D_conv_accum / num_val_batches
    avg_val_loss_G = val_loss_G_accum / num_val_batches
    avg_val_loss_cycle = val_loss_cycle_accum / num_val_batches
    avg_val_loss_identity = val_loss_identity_accum / num_val_batches

    generator_LitToConv.train()
    generator_ConvToLit.train()
    discriminator_Literary.train()
    discriminator_Conversational.train()

    history['D_lit_train'].append(train_loss_D_lit_accum / num_train_batches)
    history['D_conv_train'].append(train_loss_D_conv_accum / num_train_batches)
    history['G_train'].append(train_loss_G_accum / num_train_batches)
    history['cycle_train'].append(train_loss_cycle_accum / num_train_batches)
    history['identity_train'].append(train_loss_identity_accum / num_train_batches)

    history['D_lit_val'].append(avg_val_loss_D_lit)
    history['D_conv_val'].append(avg_val_loss_D_conv)
    history['G_val'].append(avg_val_loss_G)
    history['cycle_val'].append(avg_val_loss_cycle)
    history['identity_val'].append(avg_val_loss_identity)

    print(f"Эпоха [{epoch + 1}/{num_epochs}] Обучение--- "
          f"D_lit_loss: {history['D_lit_train'][-1]:.4f} D_conv_loss: {history['D_conv_train'][-1]:.4f} "
          f"G_loss: {history['G_train'][-1]:.4f} cycle_loss: {history['cycle_train'][-1]:.4f} identity_loss: {history['identity_train'][-1]:.4f}")
    print(f"Эпоха [{epoch + 1}/{num_epochs}] Валидация --- "
          f"Val_D_lit_loss: {history['D_lit_val'][-1]:.4f} Val_D_conv_loss: {history['D_conv_val'][-1]:.4f} "
          f"Val_G_loss: {history['G_val'][-1]:.4f} Val_cycle_loss: {history['cycle_val'][-1]:.4f} Val_identity_loss: {history['identity_val'][-1]:.4f}")

    save_models(epoch + 1, generator_LitToConv, generator_ConvToLit, discriminator_Literary, discriminator_Conversational, optimizer_G, optimizer_D_lit, optimizer_D_conv)
    plot_losses(history, epoch + 1)

print("Обучение завершено!")

plot_losses(history, num_epochs)

# Подготовка и обучение для 3 и 4 версий

In [31]:
def cosine_distance_loss(output, target):
    cosine_similarity = F.cosine_similarity(output, target)
    cosine_distance = (1 - cosine_similarity) / 2
    loss = torch.mean(cosine_distance)
    return loss

In [32]:
criterion_GAN = nn.BCELoss()
criterion_cycle_l1 = nn.L1Loss() 
criterion_cycle_cosine = cosine_distance_loss 
criterion_identity_l1 = nn.L1Loss() 
criterion_identity_cosine = cosine_distance_loss

In [33]:
for epoch in range(num_epochs):
    generator_LitToConv.train()
    generator_ConvToLit.train()
    discriminator_Literary.train()
    discriminator_Conversational.train()

    train_loss_D_lit_accum = 0.0
    train_loss_D_conv_accum = 0.0
    train_loss_G_accum = 0.0
    train_loss_cycle_accum = 0.0
    train_loss_identity_accum = 0.0
    num_train_batches = 0

    for i, (real_literary, real_conversational) in enumerate(train_dataloader):
        real_literary = real_literary.to(device)
        real_conversational = real_conversational.to(device)

        valid = torch.ones(real_literary.size(0), 1).to(device)
        fake = torch.zeros(real_literary.size(0), 1).to(device)

        optimizer_D_lit.zero_grad()
        optimizer_D_conv.zero_grad()

        loss_D_lit_real = criterion_GAN(discriminator_Literary(real_literary), valid)
        fake_literary = generator_ConvToLit(real_conversational)
        loss_D_lit_fake = criterion_GAN(discriminator_Literary(fake_literary.detach()), fake)
        loss_D_lit = (loss_D_lit_real + loss_D_lit_fake) / 2
        loss_D_lit.backward()
        optimizer_D_lit.step()

        loss_D_conv_real = criterion_GAN(discriminator_Conversational(real_conversational), valid)
        fake_conversational = generator_LitToConv(real_literary)
        loss_D_conv_fake = criterion_GAN(discriminator_Conversational(fake_conversational.detach()), fake)
        loss_D_conv = (loss_D_conv_real + loss_D_conv_fake) / 2
        loss_D_conv.backward()
        optimizer_D_conv.step()

        optimizer_G.zero_grad()

        # GAN Loss
        fake_conversational = generator_LitToConv(real_literary)
        loss_GAN_LitToConv = criterion_GAN(discriminator_Conversational(fake_conversational), valid)

        fake_literary = generator_ConvToLit(real_conversational)
        loss_GAN_ConvToLit = criterion_GAN(discriminator_Literary(fake_literary), valid)

        recovered_literary = generator_ConvToLit(fake_conversational)
        loss_cycle_lit_l1 = criterion_cycle_l1(recovered_literary, real_literary)
        loss_cycle_lit_cosine = criterion_cycle_cosine(recovered_literary, real_literary)
        loss_cycle_lit = lambda_cycle_l1 * loss_cycle_lit_l1 + lambda_cycle_cosine * loss_cycle_lit_cosine

        recovered_conversational = generator_LitToConv(fake_literary)
        loss_cycle_conv_l1 = criterion_cycle_l1(recovered_conversational, real_conversational)
        loss_cycle_conv_cosine = criterion_cycle_cosine(recovered_conversational, real_conversational)
        loss_cycle_conv = lambda_cycle_l1 * loss_cycle_conv_l1 + lambda_cycle_cosine * loss_cycle_conv_cosine

        loss_cycle = (loss_cycle_lit + loss_cycle_conv) / 2

        identity_conversational = generator_LitToConv(real_conversational)
        loss_identity_conv_l1 = criterion_identity_l1(identity_conversational, real_conversational)
        loss_identity_conv_cosine = criterion_identity_cosine(identity_conversational, real_conversational)
        loss_identity_conv = lambda_identity_l1 * loss_identity_conv_l1 + lambda_identity_cosine * loss_identity_conv_cosine

        identity_literary = generator_ConvToLit(real_literary)
        loss_identity_lit_l1 = criterion_identity_l1(identity_literary, real_literary)
        loss_identity_lit_cosine = criterion_identity_cosine(identity_literary, real_literary)
        loss_identity_lit = lambda_identity_l1 * loss_identity_lit_l1 + lambda_identity_cosine * loss_identity_lit_cosine

        loss_identity = (loss_identity_conv + loss_identity_lit) / 2

        loss_G =  (loss_GAN_LitToConv + loss_GAN_ConvToLit) + \
                  lambda_cycle * loss_cycle + \
                  lambda_identity * loss_identity
        loss_G.backward()
        optimizer_G.step()

        train_loss_D_lit_accum += loss_D_lit.item()
        train_loss_D_conv_accum += loss_D_conv.item()
        train_loss_G_accum += loss_G.item()
        train_loss_cycle_accum += loss_cycle.item()
        train_loss_identity_accum += loss_identity.item()
        num_train_batches += 1

        if i % print_batch_interval == 0:
            print(f"Эпоха [{epoch + 1}/{num_epochs}] Батч [{i}/{len(train_dataloader)}] "
                  f"D_lit_loss: {loss_D_lit.item():.4f} D_conv_loss: {loss_D_conv.item():.4f} "
                  f"G_loss: {loss_G.item():.4f}")

    generator_LitToConv.eval()
    generator_ConvToLit.eval()
    discriminator_Literary.eval()
    discriminator_Conversational.eval()

    val_loss_D_lit_accum = 0.0
    val_loss_D_conv_accum = 0.0
    val_loss_G_accum = 0.0
    val_loss_cycle_accum = 0.0
    val_loss_identity_accum = 0.0
    num_val_batches = 0

    with torch.no_grad():
        for i_val, (val_literary, val_conversational) in enumerate(val_dataloader):
            val_literary = val_literary.to(device)
            val_conversational = val_conversational.to(device)
            val_valid = torch.ones(val_literary.size(0), 1).to(device)
            val_fake = torch.zeros(val_literary.size(0), 1).to(device)

            val_loss_D_lit_real = criterion_GAN(discriminator_Literary(val_literary), val_valid)
            val_fake_literary = generator_ConvToLit(val_conversational)
            val_loss_D_lit_fake = criterion_GAN(discriminator_Literary(val_fake_literary), val_fake)
            val_loss_D_lit_val = (val_loss_D_lit_real + val_loss_D_lit_fake) / 2

            val_loss_D_conv_real = criterion_GAN(discriminator_Conversational(val_conversational), val_valid)
            val_fake_conversational = generator_LitToConv(val_literary)
            val_loss_D_conv_fake = criterion_GAN(discriminator_Conversational(val_fake_conversational), val_fake)
            val_loss_D_conv_val = (val_loss_D_conv_real + val_loss_D_conv_fake) / 2

            val_loss_GAN_LitToConv = criterion_GAN(discriminator_Conversational(val_fake_conversational), val_valid)
            val_loss_GAN_ConvToLit = criterion_GAN(discriminator_Literary(val_fake_literary), val_valid)

            val_fake_conversational = generator_LitToConv(val_literary)
            val_fake_literary = generator_ConvToLit(val_conversational)

            val_recovered_literary = generator_ConvToLit(val_fake_conversational)
            val_recovered_conversational = generator_LitToConv(val_fake_literary)

            val_loss_cycle_lit_l1 = criterion_cycle_l1(val_recovered_literary, val_literary)
            val_loss_cycle_lit_cosine = criterion_cycle_cosine(val_recovered_literary, val_literary)
            val_loss_cycle_lit_val = lambda_cycle_l1 * val_loss_cycle_lit_l1 + lambda_cycle_cosine * val_loss_cycle_lit_cosine

            val_loss_cycle_conv_l1 = criterion_cycle_l1(val_recovered_conversational, val_conversational)
            val_loss_cycle_conv_cosine = criterion_cycle_cosine(val_recovered_conversational, val_conversational)
            val_loss_cycle_conv_val = lambda_cycle_l1 * val_loss_cycle_conv_l1 + lambda_cycle_cosine * val_loss_cycle_conv_cosine

            val_loss_cycle_val = (val_loss_cycle_lit_val + val_loss_cycle_conv_val) / 2

            val_identity_conversational = generator_LitToConv(val_conversational)
            val_identity_literary = generator_ConvToLit(val_literary) 

            val_loss_identity_conv_l1 = criterion_identity_l1(val_identity_conversational, val_conversational)
            val_loss_identity_conv_cosine = criterion_identity_cosine(val_identity_conversational, val_conversational)
            val_loss_identity_conv_val = lambda_identity_l1 * val_loss_identity_conv_l1 + lambda_identity_cosine * val_loss_identity_conv_cosine

            val_loss_identity_lit_l1 = criterion_identity_l1(val_identity_literary, val_literary)
            val_loss_identity_lit_cosine = criterion_identity_cosine(val_identity_literary, val_literary)
            val_loss_identity_lit_val = lambda_identity_l1 * val_loss_identity_lit_l1 + lambda_identity_cosine * val_loss_identity_lit_cosine

            val_loss_identity_val = (val_loss_identity_conv_val + val_loss_identity_lit_val) / 2 

            val_loss_G_val = (val_loss_GAN_LitToConv + val_loss_GAN_ConvToLit) + \
                            lambda_cycle * val_loss_cycle_val + \
                            lambda_identity * val_loss_identity_val

            val_loss_D_lit_accum += val_loss_D_lit_val.item()
            val_loss_D_conv_accum += val_loss_D_conv_val.item()
            val_loss_G_accum += val_loss_G_val.item()
            val_loss_cycle_accum += val_loss_cycle_val.item()
            val_loss_identity_accum += val_loss_identity_val.item()
            num_val_batches += 1

    avg_val_loss_D_lit = val_loss_D_lit_accum / num_val_batches
    avg_val_loss_D_conv = val_loss_D_conv_accum / num_val_batches
    avg_val_loss_G = val_loss_G_accum / num_val_batches
    avg_val_loss_cycle = val_loss_cycle_accum / num_val_batches
    avg_val_loss_identity = val_loss_identity_accum / num_val_batches

    generator_LitToConv.train()
    generator_ConvToLit.train()
    discriminator_Literary.train()
    discriminator_Conversational.train()

    # Сохранение и печать loss-ов для каждой эпохи
    history['D_lit_train'].append(train_loss_D_lit_accum / num_train_batches)
    history['D_conv_train'].append(train_loss_D_conv_accum / num_train_batches)
    history['G_train'].append(train_loss_G_accum / num_train_batches)
    history['cycle_train'].append(train_loss_cycle_accum / num_train_batches)
    history['identity_train'].append(train_loss_identity_accum / num_train_batches)

    history['D_lit_val'].append(avg_val_loss_D_lit)
    history['D_conv_val'].append(avg_val_loss_D_conv)
    history['G_val'].append(avg_val_loss_G)
    history['cycle_val'].append(avg_val_loss_cycle)
    history['identity_val'].append(avg_val_loss_identity)

    print(f"Эпоха [{epoch + 1}/{num_epochs}] Обучение--- "
          f"D_lit_loss: {history['D_lit_train'][-1]:.4f} D_conv_loss: {history['D_conv_train'][-1]:.4f} "
          f"G_loss: {history['G_train'][-1]:.4f} cycle_loss: {history['cycle_train'][-1]:.4f} identity_loss: {history['identity_train'][-1]:.4f}")
    print(f"Эпоха [{epoch + 1}/{num_epochs}] Валидация --- "
          f"Val_D_lit_loss: {history['D_lit_val'][-1]:.4f} Val_D_conv_loss: {history['D_conv_val'][-1]:.4f} "
          f"Val_G_loss: {history['G_val'][-1]:.4f} Val_cycle_loss: {history['cycle_val'][-1]:.4f} Val_identity_loss: {history['identity_val'][-1]:.4f}")

    if (epoch + 1) % plot_loss_interval_epochs == 0:
        plot_losses(history, epoch + 1)

    save_models(epoch + 1, generator_LitToConv, generator_ConvToLit, discriminator_Literary, discriminator_Conversational, optimizer_G, optimizer_D_lit, optimizer_D_conv)


print("Обучение завершено!")

Эпоха [1/100] Батч [0/17229] D_lit_loss: 0.6966 D_conv_loss: 0.6978 G_loss: 4.2750
Эпоха [1/100] Батч [500/17229] D_lit_loss: 0.6845 D_conv_loss: 0.6897 G_loss: 2.3325
Эпоха [1/100] Батч [1000/17229] D_lit_loss: 0.6862 D_conv_loss: 0.6944 G_loss: 2.1245
Эпоха [1/100] Батч [1500/17229] D_lit_loss: 0.6918 D_conv_loss: 0.6914 G_loss: 2.0802
Эпоха [1/100] Батч [2000/17229] D_lit_loss: 0.7088 D_conv_loss: 0.6886 G_loss: 1.9754
Эпоха [1/100] Батч [2500/17229] D_lit_loss: 0.6810 D_conv_loss: 0.6826 G_loss: 2.0314
Эпоха [1/100] Батч [3000/17229] D_lit_loss: 0.6884 D_conv_loss: 0.6829 G_loss: 1.9735
Эпоха [1/100] Батч [3500/17229] D_lit_loss: 0.6950 D_conv_loss: 0.6933 G_loss: 1.9470
Эпоха [1/100] Батч [4000/17229] D_lit_loss: 0.6943 D_conv_loss: 0.6894 G_loss: 1.8855
Эпоха [1/100] Батч [4500/17229] D_lit_loss: 0.6888 D_conv_loss: 0.6882 G_loss: 1.8435
Эпоха [1/100] Батч [5000/17229] D_lit_loss: 0.6891 D_conv_loss: 0.6828 G_loss: 1.8170
Эпоха [1/100] Батч [5500/17229] D_lit_loss: 0.6982 D_conv_

# Ручные тесты

In [34]:
path = 'navec_hudlit_v1_12B_500K_300d_100q.tar'
navec = Navec.load(path)

embeddings = list()
for word in navec.vocab.words:
        embeddings.append(navec[word])
embeddings = np.array(embeddings)

target_dim = 10
max_vector_length = 40
pca = PCA(n_components=target_dim)
reduced_embeddings = pca.fit_transform(embeddings)

reduced_navec = {word: np.array(reduced_embeddings[i], dtype=np.float16) for i, word in enumerate(navec.vocab.words)}

punkt_vectors = {
    ".": np.array([0.0] * (target_dim - 1) + [1.0], dtype=np.float16),
    "!": np.array([0.0] * (target_dim - 1) + [0.9], dtype=np.float16),
    "?": np.array([0.0] * (target_dim - 1) + [0.8], dtype=np.float16),
    ",": np.array([0.0] * (target_dim - 1) + [0.5], dtype=np.float16),
    ":": np.array([0.0] * (target_dim - 1) + [0.6], dtype=np.float16),
    "–": np.array([0.0] * (target_dim - 1) + [0.4], dtype=np.float16),
    "-": np.array([0.0] * (target_dim - 1) + [0.4], dtype=np.float16),
    "—": np.array([0.0] * (target_dim - 1) + [0.4], dtype=np.float16),
    "«": np.array([0.0] * (target_dim - 1) + [0.2], dtype=np.float16),
    "»": np.array([0.0] * (target_dim - 1) + [0.3], dtype=np.float16),
}

combined_vectors = {**reduced_navec, **punkt_vectors} 
combined_items = list(combined_vectors.keys()) 
combined_embeddings = np.array(list(combined_vectors.values()))

In [54]:
def text_to_vec(text):
    sentence = text.strip()
    sentence = sentence.strip()
    sentence = re.sub("[^а-яА-Я–-—,.!?:«» ]", "", sentence)
    start_removed = 0
    while len(sentence) > 0 and sentence[0] in "–-—,.!?:«» ":
        start_removed += 1
    sentence = sentence[start_removed:]
    tokens = word_tokenize(sentence)
    tokens = tokens[:max_vector_length]
    padding_for = max_vector_length - len(tokens)
    vector = [np.zeros(target_dim)] * padding_for
    for word in tokens:
        try:
            vector.append(reduced_navec[word])
        except KeyError:
            try:
                vector.append(punkt_vectors[word])
            except KeyError:
                vector.append(reduced_navec["<unk>"])
    vector = np.array(vector, dtype=np.float16).reshape(max_vector_length * target_dim)
    return torch.tensor(vector, dtype=torch.float32).unsqueeze(0).to(device)


def vec_to_text(vector):
    vector_2d = vector.reshape(max_vector_length, target_dim)

    tokens = []
    combined_vectors = {**reduced_navec, **punkt_vectors}
    combined_items = list(combined_vectors.keys())
    combined_embeddings = np.array(list(combined_vectors.values()))

    for vec in vector_2d:
        if isinstance(vec, torch.Tensor):
            vec_np = vec.cpu().numpy()
        else: 
            vec_np = vec

        if np.all(vec_np == 0):
            continue

        if isinstance(vec, torch.Tensor):
            distances = scipy.spatial.distance.cdist(vec.reshape(1, -1).cpu().numpy(), combined_embeddings, metric='euclidean')[0] 
        else: 
            distances = scipy.spatial.distance.cdist(vec.reshape(1, -1), combined_embeddings, metric='euclidean')[0]
        closest_token_index = np.argmin(distances)
        closest_token = combined_items[closest_token_index]
        tokens.append(closest_token)

    reconstructed_text_parts = []
    for token in tokens:
        if token in punkt_vectors:
            reconstructed_text_parts.append(token)
        else:
            reconstructed_text_parts.append(" " + token)

    reconstructed_text = "".join(reconstructed_text_parts).strip()
    return reconstructed_text

In [71]:
v3_generator_LitToConv = Generator(vector_dimension, vector_dimension).to(device)
v3_generator_ConvToLit = Generator(vector_dimension, vector_dimension).to(device)
v3_discriminator_Literary = Discriminator(vector_dimension, lstm_hidden_dim_discriminator, lstm_num_layers_discriminator, leaky_relu_negative_slope, discriminator_layer_size_hidden, discriminator_dropout_rate).to(device)
v3_discriminator_Conversational = Discriminator(vector_dimension, lstm_hidden_dim_discriminator, lstm_num_layers_discriminator, leaky_relu_negative_slope, discriminator_layer_size_hidden, discriminator_dropout_rate).to(device)

In [72]:
v3_generator_LitToConv.load_state_dict(torch.load(os.path.join(output_dir, 'generator_lit_to_conv.pth'), map_location=device))
v3_generator_ConvToLit.load_state_dict(torch.load(os.path.join(output_dir, 'generator_conv_to_lit.pth'), map_location=device))
v3_generator_LitToConv.eval()
v3_generator_ConvToLit.eval()

Generator(
  (lstm): LSTM(400, 400, num_layers=3, batch_first=True)
  (linear_out): Linear(in_features=400, out_features=400, bias=True)
)

In [76]:
lit_text = "У лукоморья дуб зелёный Златая цепь на дубе том И днём и ночью кот учёный Всё ходит по цепи кругом"
lit_vec = text_to_vec(lit_text)

In [77]:
with torch.no_grad():
    v3_fake_conv = v3_generator_LitToConv(lit_vec)
    v3_fake_conv_to_lit = v3_generator_ConvToLit(v3_fake_conv)

In [78]:
print(lit_text)
print(vec_to_text(v3_fake_conv))
print(vec_to_text(v3_fake_conv_to_lit))

У лукоморья дуб зелёный Златая цепь на дубе том И днём и ночью кот учёный Всё ходит по цепи кругом
«««««««««««««««««««« <unk> лукоморья дуб <unk> <unk> цепь на дубе том <unk> <unk> и ночью кот <unk> <unk> ходит по цепи кругом
«««««««««««««««««««« <unk> лукоморья дуб <unk> <unk> цепь на дубе том <unk> <unk> и ночью кот <unk> <unk> ходит по пленки–
