# Начальная инициализация

In [1]:
import lightning
import pickle
from tqdm import tqdm
import pandas as pd
import torch
import sklearn.model_selection
import torchmetrics
import IPython.display

random_state = 42

In [2]:
lightning.seed_everything(random_state, workers=True)

Global seed set to 42


42

In [3]:
PATH = 'dumps/'
with open(PATH + 'X_train_ft_pre_trained.pkl', mode='rb') as file:
    X_train_ft_pre_trained = pickle.load(file)

with open(PATH + 'X_train_w2v_pre_trained.pkl', mode='rb') as file:
    X_train_w2v_pre_trained = pickle.load(file)

with open(PATH + 'X_test_w2v_pre_trained.pkl', mode='rb') as file:
    X_test_w2v_pre_trained = pickle.load(file)

with open(PATH + 'X_test_w2v_self_trained.pkl', mode='rb') as file:
    X_test_w2v_self_trained = pickle.load(file)

with open(PATH + 'X_test_ft_pre_trained.pkl', mode='rb') as file:
    X_test_ft_pre_trained = pickle.load(file)

with open(PATH + 'X_train_ft_self_trained.pkl', mode='rb') as file:
    X_train_ft_self_trained = pickle.load(file)

with open(PATH + 'X_test_ft_self_trained.pkl', mode='rb') as file:
    X_test_ft_self_trained = pickle.load(file)

with open(PATH + 'X_train_w2v_self_trained.pkl', mode='rb') as file:
    X_train_w2v_self_trained = pickle.load(file)

with open(PATH + 'y_train.pkl', mode='rb') as file:
    y_train = pickle.load(file)

with open(PATH + 'y_test.pkl', mode='rb') as file:
    y_test = pickle.load(file)

Сетка перебираемых гиперпараметров

In [4]:
param_grid = {
    'embeddings': ['w2v_pretrained', 'ft_pretrained', 'w2v_selftrained', 'ft_selftrained'],
    'layers_count': [2, 3],
    'learning_rate': [0.001],
    'layers_type': ['different', 'equal'],
    'batch_size': [64],
    'optimizer': [torch.optim.Adam],
    'epochs': [10],
    'activation_function': [torch.nn.functional.relu],
    "initialization": [None, torch.nn.init.kaiming_uniform_, torch.nn.init.xavier_uniform_],
    "regularization": ['None', 'dropout', 'l2_reg'],
    "normalization": [None, torch.nn.LayerNorm, torch.nn.BatchNorm1d],
    "scheduler": [None, torch.optim.lr_scheduler.ExponentialLR, torch.optim.lr_scheduler.MultiStepLR]
}

params_list = sklearn.model_selection.ParameterGrid(param_grid)
len(params_list)

1296

Класс для работы с данными

In [5]:
class NetData(lightning.LightningDataModule):
    def __init__(self, train_features=None, test_features=None, train_targets=None, test_targets=None, batch_size=None, random_state=None):
        super().__init__()
        
        self.batch_size = batch_size
        self.X_train, self.X_val, self.y_train, self.y_val = sklearn.model_selection.train_test_split(train_features, train_targets, random_state=random_state)
        self.X_test, self.y_test = test_features, test_targets

    def setup(self, stage=None):
        features_train = torch.tensor(self.X_train, dtype=torch.float32)
        targets_train = torch.tensor(self.y_train, dtype=torch.int32)
    
        features_val = torch.tensor(self.X_val, dtype=torch.float32)
        targets_val = torch.tensor(self.y_val, dtype=torch.int32)
    
        features_test = torch.tensor(self.X_test, dtype=torch.float32)
        targets_test = torch.tensor(self.y_test, dtype=torch.int32)
    
        self.trainset = torch.utils.data.TensorDataset(features_train, targets_train)
        self.valset = torch.utils.data.TensorDataset(features_val, targets_val)
        self.testset = torch.utils.data.TensorDataset(features_test, targets_test)
        
    def train_dataloader(self):
        return torch.utils.data.DataLoader(self.trainset, batch_size=self.batch_size)
    def val_dataloader(self):
        return torch.utils.data.DataLoader(self.valset, batch_size=self.batch_size)
    def test_dataloader(self):
        return torch.utils.data.DataLoader(self.testset, batch_size=self.batch_size)

Нейронная сеть

In [6]:
#Архитектура нейронной сети
class Net(lightning.LightningModule):
    def __init__(self,
                 learning_rate=None,
                 layers_count=None,
                 layers_type=None,
                 optimizer=None,
                 activation=None,
                 initialization=None,
                 regularization=None,
                 normalization=None,
                 scheduler=None
                 ):
        
        super().__init__()
        self.layers = torch.nn.ModuleList()
        input_dim = 2**(2 + layers_count)
        
        self.layers.append( self.add_linear_layer(input=300,
                                                  output=input_dim,
                                                  normalization=normalization,
                                                  regularization=regularization) )
        for _ in range(layers_count):
            if layers_type == 'equal':
                self.layers.append( self.add_linear_layer(input=input_dim, 
                                                          output=input_dim, 
                                                          normalization=normalization, 
                                                          regularization=regularization) )
            elif layers_type == "different":
                self.layers.append( self.add_linear_layer(input=input_dim, 
                                                          output=input_dim // 2, 
                                                          normalization=normalization, 
                                                          regularization=regularization) )
                input_dim //= 2
        self.layers.append( self.add_linear_layer(input=input_dim,
                                                  output=2,
                                                  normalization=normalization,
                                                  regularization=regularization) )

        self.activation = activation

        self.learning_rate = learning_rate
        self.optimizer = optimizer
        self.f1 = torchmetrics.classification.F1Score(task='multiclass', num_classes=2, multidim_average='global', average='weighted')
        if initialization:
            self.iw(torch.nn.Linear, initialization)
        self.regularization = regularization
        self.scheduler = scheduler

    def iw(self, ModuleClass, weights_initializator):
        for module in self.modules():
            if isinstance(module, ModuleClass):
                weights_initializator(module.weight)
    
    def add_linear_layer(self, input=None, output=None, normalization=None, regularization=None) -> torch.nn.Sequential:
        layer = torch.nn.Sequential()
        layer.append(torch.nn.Linear(in_features=input, out_features=output))
        if normalization:
            layer.append( normalization(output) )
        if regularization == 'dropout':
            layer.append( torch.nn.Dropout1d(p=0.2) )
        return layer

    def forward(self, x):
        for layer in self.layers[:-1]:
            x = self.activation(layer(x))
        x = self.layers[-1](x)
        return torch.nn.functional.log_softmax(x, dim=-1)
    
    def configure_optimizers(self):
        optimizer = self.optimizer(self.parameters(), lr=self.learning_rate, weight_decay=(0.01 if self.regularization == 'l2_reg' else 0))
        if self.scheduler is None:
            return optimizer
        else:
            if self.scheduler.__name__ == 'ExponentialLR':
                scheduler = self.scheduler(optimizer, gamma=0.9)
            elif self.scheduler.__name__ == 'MultiStepLR':
                scheduler = self.scheduler(optimizer, milestones=list(range(1, 15, 3)), gamma=0.9)
            return [optimizer], [scheduler]
    
    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = torch.nn.functional.cross_entropy(y_hat, y.long())
        self.log('loss', loss, on_epoch=True)
        self.log("train_f1", self.f1(y_hat, y), on_epoch=True)
        return loss
    
    def validation_step(self, batch, batch_idx):
        x, y = batch
        pred = self(x)
        self.log("val_f1", self.f1(pred, y), on_epoch=True)

    def test_step(self, batch, batch_idx):
        x, y = batch
        pred = self(x)
        self.log("test_f1", self.f1(pred, y), on_epoch=True)

In [7]:
total = []

for params in tqdm(params_list): 

    embeddings = params['embeddings']
    learningRate = params['learning_rate']
    optimizer_type = params['optimizer']
    layers_count = params['layers_count']
    batch_size = params['batch_size']
    epochs = params['epochs']
    activation_function = params['activation_function']
    layers_type = params['layers_type']
    initialization = params['initialization']
    regularization = params['regularization']
    normalization = params['normalization']
    scheduler = params['scheduler']


    net = Net(learning_rate=learningRate,
          layers_count=layers_count,
          layers_type=layers_type,
          optimizer=optimizer_type,
          activation=activation_function,
          initialization=initialization,
          regularization=regularization,
          normalization=normalization,
          scheduler=scheduler,
        )
    
    dm = None
    if embeddings == 'w2v_pretrained':
        dm = NetData(
            train_features=X_train_w2v_pre_trained,
            test_features=X_test_w2v_pre_trained,
            train_targets=y_train,
            test_targets=y_test,
            batch_size=batch_size,
            random_state=random_state
        )
    elif embeddings == 'w2v_selftrained':
        dm = NetData(
            train_features=X_train_w2v_self_trained,
            test_features=X_test_w2v_self_trained,
            train_targets=y_train,
            test_targets=y_test,
            batch_size=batch_size,
            random_state=random_state
        )
    elif embeddings == 'ft_pretrained':
        dm = NetData(
            train_features=X_train_ft_pre_trained,
            test_features=X_test_ft_pre_trained,
            train_targets=y_train,
            test_targets=y_test,
            batch_size=batch_size,
            random_state=random_state
        )
    elif embeddings == 'ft_selftrained':
        dm = NetData(
            train_features=X_train_ft_self_trained,
            test_features=X_test_ft_self_trained,
            train_targets=y_train,
            test_targets=y_test,
            batch_size=batch_size,
            random_state=random_state
        )
    

    # CPU is better for FCNN than GPU
    trainer = lightning.Trainer(logger=False, 
                                max_epochs=epochs, 
                                enable_progress_bar=True, 
                                deterministic=True, 
                                inference_mode=True, 
                                enable_checkpointing=False, 
                                accelerator='cpu')

    trainer.fit(net, datamodule=dm)
    f1_train = trainer.logged_metrics['train_f1_epoch'].item()
    f1_val = trainer.logged_metrics['val_f1'].item()
    f1_test = trainer.test(net, datamodule=dm)[-1]['test_f1']    
    
    
    total.append({
        'embeddings': embeddings,
        'Layers type': layers_type,
        'Optimizer': optimizer_type.__name__,
        'Batch': batch_size,
        'Count of layers': layers_count,
        'Epochs': epochs,
        'Learning rate': learningRate,
        'Activation function': activation_function.__name__,
        'Initialization': initialization.__name__ if initialization else None,
        'Regularization': regularization,
        'Normalization': normalization.__name__ if normalization else None,
        'Scheduler': scheduler.__name__ if scheduler else None,
        'F1-train': round(f1_train, 4),
        'F1-val': round(f1_val, 4),
        'F1-test': round(f1_test, 4)
    })
    
    IPython.display.clear_output(wait=True) 

100%|█████████████████████████████████████████████████████████████████████████████████████████| 1296/1296 [9:38:22<00:00, 26.78s/it]


# Сводная таблица

In [8]:
pd.set_option('display.max_rows', None)
summary = pd.DataFrame.from_dict(total)
summary_sort = summary.sort_values(by='F1-val', ascending=False)
summary_sort

Unnamed: 0,embeddings,Layers type,Optimizer,Batch,Count of layers,Epochs,Learning rate,Activation function,Initialization,Regularization,Normalization,Scheduler,F1-train,F1-val,F1-test
1055,ft_selftrained,equal,Adam,64,3,10,0.001,relu,,,,MultiStepLR,0.7235,0.6919,0.6974
1082,ft_selftrained,different,Adam,64,2,10,0.001,relu,kaiming_uniform_,,,MultiStepLR,0.7027,0.6914,0.6192
1054,ft_selftrained,equal,Adam,64,3,10,0.001,relu,,,,ExponentialLR,0.7314,0.6914,0.6
1028,ft_selftrained,different,Adam,64,3,10,0.001,relu,,,,MultiStepLR,0.7007,0.6907,0.6406
1189,ft_selftrained,different,Adam,64,2,10,0.001,relu,xavier_uniform_,,,ExponentialLR,0.71,0.6895,0.68
1109,ft_selftrained,equal,Adam,64,2,10,0.001,relu,kaiming_uniform_,,,MultiStepLR,0.7139,0.689,0.6805
1053,ft_selftrained,equal,Adam,64,3,10,0.001,relu,,,,,0.7331,0.6883,0.5314
999,ft_selftrained,equal,Adam,64,2,10,0.001,relu,,,,,0.7089,0.688,0.68
1271,ft_selftrained,equal,Adam,64,3,10,0.001,relu,xavier_uniform_,,,MultiStepLR,0.7507,0.6878,0.6784
972,ft_selftrained,different,Adam,64,2,10,0.001,relu,,,,,0.6954,0.6874,0.6006


# Выводы

Наиболее хороший результат был получен при использовании стохастического градиентного спуска. Также в пятерку лучших результатов попали 3 стохастический градиентных спуска и 2 AdamW оптимизатора, что может объясняться большим количеством эпох и относительно малым рамзером пакета.