# Начальная инициализация

In [1]:
import datetime
print(datetime.datetime.now().strftime('%d-%B-%Y %H:%M:%S'))

26-May-2023 18:38:29


In [2]:
# Подключение библиотек

# !pip install torch torchvision torchaudio
# !pip install lightning
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as data_utils
import lightning.pytorch as pl
import lightning as L

# !pip install scikit-learn
from sklearn.model_selection import ParameterGrid

import torch.optim as optim
# !pip install pandas
import pandas as pd
import numpy as np


import os
import random

In [3]:
random_state = 42

torch.use_deterministic_algorithms(True)

%env CUBLAS_WORKSPACE_CONFIG=:4096:8
%env PYTHONHASHSEED=42

def set_random_state(random_state):
    torch.manual_seed(random_state)
    random.seed(random_state)
    np.random.seed(random_state)

    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(random_state)
        torch.cuda.manual_seed(random_state)

        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
        
set_random_state(random_state)

pl.seed_everything(random_state, workers=True)

Global seed set to 42


env: CUBLAS_WORKSPACE_CONFIG=:4096:8
env: PYTHONHASHSEED=42


42

In [2]:
print(datetime.datetime.now().strftime('%d-%B-%Y %H:%M:%S'))

26-May-2023 18:38:39


# Обучение нейронных сетей

In [5]:
import pickle
PATH = 'dumps/'
with open(PATH + 'X_train_ft_pre_trained.pkl', mode='rb') as file:
    X_train_ft_pre_trained = pickle.load(file)

with open(PATH + 'X_train_w2v_pre_trained.pkl', mode='rb') as file:
    X_train_w2v_pre_trained = pickle.load(file)

with open(PATH + 'X_test_w2v_pre_trained.pkl', mode='rb') as file:
    X_test_w2v_pre_trained = pickle.load(file)

with open(PATH + 'X_test_w2v_self_trained.pkl', mode='rb') as file:
    X_test_w2v_self_trained = pickle.load(file)

with open(PATH + 'WordsIdPad_train.pkl', mode='rb') as file:
    WordsIdPad_train = pickle.load(file)

with open(PATH + 'X_test_ft_pre_trained.pkl', mode='rb') as file:
    X_test_ft_pre_trained = pickle.load(file)

with open(PATH + 'X_train_ft_self_trained.pkl', mode='rb') as file:
    X_train_ft_self_trained = pickle.load(file)

with open(PATH + 'WordsIdPad_test.pkl', mode='rb') as file:
    WordsIdPad_test = pickle.load(file)

with open(PATH + 'X_test_ft_self_trained.pkl', mode='rb') as file:
    X_test_ft_self_trained = pickle.load(file)

with open(PATH + 'X_train_w2v_self_trained.pkl', mode='rb') as file:
    X_train_w2v_self_trained = pickle.load(file)

with open(PATH + 'y_train.pkl', mode='rb') as file:
    y_train = pickle.load(file)

with open(PATH + 'y_test.pkl', mode='rb') as file:
    y_test = pickle.load(file)



In [6]:
param_grid = {
    "embeddings": ['w2v_pretrained', 'ft_pretrained', 'w2v_selftrained', 'ft_selftrained', 'torch.nn'],
    "conv_layers": [3],
    "activation_fn": [F.relu],
    "learning_rate": [0.001],
    "epochs": [5, 20],
    "batch_size": [2048],
    "optimizer": [optim.AdamW],
    "kernel_size": [3, 4],
    "stride": [1, 2],
    "layers_count": [1],
    "type_rnn": [torch.nn.RNN, torch.nn.LSTM, torch.nn.GRU],
    "hidden_size": [100],
    "bidirectional": [False, True],
    "initialization": [None, torch.nn.init.kaiming_uniform_, torch.nn.init.xavier_uniform_]
}

params_list = ParameterGrid(param_grid)
len(params_list)

720

In [7]:
from sklearn.model_selection import train_test_split

class NetData(pl.LightningDataModule):
    def __init__(self, train_features=None, test_features=None, train_targets=None, test_targets=None, batch_size=None, random_state=None, internal=None):
        super().__init__()
        
        self.batch_size = batch_size
        self.X_train, self.X_val, self.y_train, self.y_val = train_test_split(train_features, train_targets, random_state=random_state)
        self.X_test, self.y_test = test_features, test_targets
        self.datatype = torch.int if internal else torch.float

    def setup(self, stage=None):

        features_train = torch.tensor(self.X_train, dtype=self.datatype)
        targets_train = torch.tensor(self.y_train, dtype=torch.int32)
    
        features_val = torch.tensor(self.X_val, dtype=self.datatype)
        targets_val = torch.tensor(self.y_val, dtype=torch.int32)
    
        features_test = torch.tensor(self.X_test, dtype=self.datatype)
        targets_test = torch.tensor(self.y_test, dtype=torch.int32)
    
        self.trainset = data_utils.TensorDataset(features_train, targets_train)
        self.valset = data_utils.TensorDataset(features_val, targets_val)
        self.testset = data_utils.TensorDataset(features_test, targets_test)
        
    def train_dataloader(self):
        return data_utils.DataLoader(self.trainset, batch_size=self.batch_size)
    def val_dataloader(self):
        return data_utils.DataLoader(self.valset, batch_size=self.batch_size)
    def test_dataloader(self):
        return data_utils.DataLoader(self.testset, batch_size=self.batch_size)

In [8]:
from torchmetrics.classification import F1Score

class Net(pl.LightningModule):
    def __init__(self,
                 layers_count=None, 
                 activation_function=None,
                 type_rnn=None,
                 hidden_size=None,
                 bidirectional=None,
                 optimizer=None,
                 learningRate=None,
                 internal=None,
                 conv_layers_count=None,
                 conv_kernel=None,
                 conv_stride=None,
                 initialization=None
                ):
        
        super().__init__()

        
        self.embedding_size = 300
        self.num_words = 10000
        self.seq_len = 20 if internal else 1
        self.optim = optimizer
        self.learningRate = learningRate

        self.f1 = F1Score(task='multiclass', num_classes=2, multidim_average='global', average='weighted')
        
        self.embedding = nn.Embedding(self.num_words + 1, self.embedding_size, padding_idx=0) if internal else None
        
        self.fa = activation_function

        output_shape = self.seq_len
        conv_padding = conv_kernel // 2
        self.conv_layers = nn.ModuleList()
        
        for i in range(conv_layers_count):
            self.conv_layers.append(nn.Conv1d(in_channels = self.embedding_size, out_channels = self.embedding_size, kernel_size = conv_kernel, stride = conv_stride, padding = conv_padding))
            output_shape = 1 + (output_shape + 2*conv_padding - conv_kernel) // conv_stride
        
        self.rnn = type_rnn(
                input_size = self.embedding_size,
                hidden_size = hidden_size,
                num_layers = layers_count,
                bidirectional = bidirectional,
                batch_first = True
        )
        
        self.fc = nn.Linear(hidden_size * output_shape * (1+bidirectional), 2)
        
        if initialization:
            self.iw(nn.Linear, initialization)
            self.iw(nn.Conv1d, initialization)
        

    def iw(self, ModuleClass, weights_initializator):
        for module in self.modules():
            if isinstance(module, ModuleClass):
                weights_initializator(module.weight)

    def forward(self, x):

        if self.embedding:
            x = self.embedding(x)
            x = x.reshape(len(x), self.embedding_size, self.seq_len)
        else:
            x = x[:, :, None]

        
        for layer in self.conv_layers:
            x = layer(x)
            x = self.fa(x)
        
        x = x.reshape(len(x), -1, self.embedding_size)

        x, size = self.rnn(x)

        x = self.fc(x.reshape(x.shape[0], -1))
        
        return F.log_softmax(x, dim=-1)
    
    def configure_optimizers(self):
        optimizer = self.optim(self.parameters(), lr=self.learningRate)
        return optimizer
    
    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = F.cross_entropy(y_hat, y.long())
        return loss
    
    def validation_step(self, batch, batch_idx):
        x, y = batch
        pred = self(x)
        self.log("val_f1", self.f1(pred, y), on_epoch=True)

    def test_step(self, batch, batch_idx):
        x, y = batch
        pred = self(x)
        self.log("test_f1", self.f1(pred, y), on_epoch=True)

In [9]:
from IPython.display import clear_output
from tqdm import tqdm
total = []

for params in tqdm(params_list):
    
    clear_output(wait=True)  

    learningRate = params['learning_rate']
    optimizer_type = params['optimizer']
    layers_count = params['layers_count']
    batch_size = params['batch_size']
    epochs = params['epochs']
    fa = params['activation_fn']
    type_rnn = params['type_rnn']
    hidden_size = params['hidden_size']
    bidirectional = params['bidirectional']
    conv_layers_count = params['conv_layers']
    embeddings = params['embeddings']
    kernel_size = params['kernel_size']
    stride = params['stride']
    initialization = params['initialization']


    net = Net(
        layers_count=layers_count,
        activation_function=fa,
        type_rnn=type_rnn,
        hidden_size=hidden_size,
        bidirectional=bidirectional,
        optimizer=optimizer_type,
        learningRate=learningRate,
        conv_kernel=kernel_size,
        conv_layers_count=conv_layers_count,
        conv_stride=stride,
        internal=(embeddings=='torch.nn'),
        initialization=initialization
        )
    dm = None
    if embeddings == 'w2v_pretrained':
        dm = NetData(
            train_features=X_train_w2v_pre_trained,
            test_features=X_test_w2v_pre_trained,
            train_targets=y_train,
            test_targets=y_test,
            batch_size=batch_size,
            random_state=random_state,
            internal=(embeddings=='torch.nn')
        )
    elif embeddings == 'w2v_selftrained':
        dm = NetData(
            train_features=X_train_w2v_self_trained,
            test_features=X_test_w2v_self_trained,
            train_targets=y_train,
            test_targets=y_test,
            batch_size=batch_size,
            random_state=random_state,
            internal=(embeddings=='torch.nn')
        )
    elif embeddings == 'ft_pretrained':
        dm = NetData(
            train_features=X_train_ft_pre_trained,
            test_features=X_test_ft_pre_trained,
            train_targets=y_train,
            test_targets=y_test,
            batch_size=batch_size,
            random_state=random_state,
            internal=(embeddings=='torch.nn')
        )
    elif embeddings == 'ft_selftrained':
        dm = NetData(
            train_features=X_train_ft_self_trained,
            test_features=X_test_ft_self_trained,
            train_targets=y_train,
            test_targets=y_test,
            batch_size=batch_size,
            random_state=random_state,
            internal=(embeddings=='torch.nn')
        )
    elif embeddings == 'torch.nn':
        dm = NetData(
            train_features=WordsIdPad_train,
            test_features=WordsIdPad_test,
            train_targets=y_train,
            test_targets=y_test,
            batch_size=batch_size,
            random_state=random_state,
            internal=(embeddings=='torch.nn')
        )
        
    trainer = L.Trainer(logger=False, max_epochs=epochs, enable_progress_bar=True, deterministic=True, inference_mode=True, enable_checkpointing=False)
    trainer.fit(net, datamodule=dm)
    f1_val = trainer.validate(net, datamodule=dm)[-1]['val_f1']
    f1_test = trainer.test(net, datamodule=dm)[-1]['test_f1']
    
    total.append({
        'embeddings': embeddings,
        'kernel_size': kernel_size,
        'stride': stride,
        'conv layers': conv_layers_count,
        'Type RNN': type_rnn.__name__,
        'hidden_size': hidden_size,
        'bidirectional': bidirectional,
        'rrn layers': layers_count,
        'optimizer': optimizer_type.__name__,
        'lr': learningRate,
        'epochs': epochs,
        'batch_size': batch_size,
        'initialization': initialization.__name__ if initialization else None,
        'F1-val': round(f1_val, 4),
        'F1-test': round(f1_test, 4)
    })

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name        | Type              | Params
--------------------------------------------------
0 | f1          | MulticlassF1Score | 0     
1 | embedding   | Embedding         | 3.0 M 
2 | conv_layers | ModuleList        | 1.1 M 
3 | rnn         | GRU               | 241 K 
4 | fc          | Linear            | 1.6 K 
--------------------------------------------------
4.3 M     Trainable params
0         Non-trainable params
4.3 M     Total params
17.296    Total estimated model params size (MB)


Epoch 19: 100%|██████████| 9/9 [00:00<00:00, 10.52it/s]                    

`Trainer.fit` stopped: `max_epochs=20` reached.


Epoch 19: 100%|██████████| 9/9 [00:00<00:00, 10.51it/s]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]



Validation DataLoader 0: 100%|██████████| 3/3 [00:00<00:00, 51.24it/s]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 171.48it/s]


100%|██████████| 720/720 [1:02:09<00:00,  5.18s/it]


In [3]:
print(datetime.datetime.now().strftime('%d-%B-%Y %H:%M:%S'))

26-May-2023 18:38:43


# Сводная таблица

In [11]:
pd.set_option('display.max_rows', None)
summary = pd.DataFrame.from_dict(total)
summary_sort = summary.sort_values(by='F1-val', ascending=False)
summary_sort

Unnamed: 0,embeddings,kernel_size,stride,conv layers,Type RNN,hidden_size,bidirectional,rrn layers,optimizer,lr,epochs,batch_size,initialization,F1-val,F1-test
621,ft_selftrained,4,2,3,RNN,100,True,1,AdamW,0.001,20,2048,,0.7274,0.7004
613,ft_selftrained,3,1,3,LSTM,100,True,1,AdamW,0.001,20,2048,,0.7266,0.7004
256,ft_selftrained,3,2,3,LSTM,100,False,1,AdamW,0.001,20,2048,,0.7237,0.68
614,ft_selftrained,3,1,3,GRU,100,True,1,AdamW,0.001,20,2048,,0.7223,0.6593
595,ft_selftrained,4,1,3,LSTM,100,True,1,AdamW,0.001,5,2048,kaiming_uniform_,0.7202,0.7403
616,ft_selftrained,3,2,3,LSTM,100,True,1,AdamW,0.001,20,2048,,0.7199,0.6604
263,ft_selftrained,4,2,3,GRU,100,False,1,AdamW,0.001,20,2048,,0.7197,0.6785
286,ft_selftrained,4,2,3,LSTM,100,False,1,AdamW,0.001,20,2048,xavier_uniform_,0.7193,0.6785
606,ft_selftrained,4,1,3,RNN,100,True,1,AdamW,0.001,5,2048,xavier_uniform_,0.7182,0.799
255,ft_selftrained,3,2,3,RNN,100,False,1,AdamW,0.001,20,2048,,0.7174,0.6974
