# Начальная инициализация

In [1]:
import datetime
print(datetime.datetime.now().strftime('%d-%B-%Y %H:%M:%S'))

26-May-2023 18:37:34


In [2]:
# Подключение библиотек

# !pip install torch torchvision torchaudio
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as data_utils

# !pip install scikit-learn
from sklearn.model_selection import ParameterGrid

import torch.optim as optim
# !pip install pandas
import pandas as pd
import numpy as np


import os
import random

In [3]:
# Подключение вычислений на видеокарту, если доступна
if torch.cuda.is_available():
    dev = "cuda:0"
else:
    dev = "cpu"
device = torch.device(dev)
print(device)

cuda:0


In [4]:
cpu_count = os.cpu_count()

num_workers = cpu_count if device == torch.device("cpu") else 0
num_workers, cpu_count

(0, 4)

In [5]:
random_state = 42

torch.use_deterministic_algorithms(True)

%env CUBLAS_WORKSPACE_CONFIG=:4096:8
%env PYTHONHASHSEED=42

def set_random_state(random_state):
    torch.manual_seed(random_state)
    random.seed(random_state)
    np.random.seed(random_state)

    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(random_state)
        torch.cuda.manual_seed(random_state)

        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
        
set_random_state(42)

env: CUBLAS_WORKSPACE_CONFIG=:4096:8
env: PYTHONHASHSEED=42


In [2]:
print(datetime.datetime.now().strftime('%d-%B-%Y %H:%M:%S'))

26-May-2023 18:37:49


# Обучение нейронных сетей

In [7]:
import pickle
PATH = 'dumps/'
with open(PATH + 'X_train_ft_pre_trained.pkl', mode='rb') as file:
    X_train_ft_pre_trained = pickle.load(file)

with open(PATH + 'X_train_w2v_pre_trained.pkl', mode='rb') as file:
    X_train_w2v_pre_trained = pickle.load(file)

with open(PATH + 'X_test_w2v_pre_trained.pkl', mode='rb') as file:
    X_test_w2v_pre_trained = pickle.load(file)

with open(PATH + 'X_test_w2v_self_trained.pkl', mode='rb') as file:
    X_test_w2v_self_trained = pickle.load(file)

with open(PATH + 'WordsIdPad_train.pkl', mode='rb') as file:
    WordsIdPad_train = pickle.load(file)

with open(PATH + 'X_test_ft_pre_trained.pkl', mode='rb') as file:
    X_test_ft_pre_trained = pickle.load(file)

with open(PATH + 'X_train_ft_self_trained.pkl', mode='rb') as file:
    X_train_ft_self_trained = pickle.load(file)

with open(PATH + 'WordsIdPad_test.pkl', mode='rb') as file:
    WordsIdPad_test = pickle.load(file)

with open(PATH + 'X_test_ft_self_trained.pkl', mode='rb') as file:
    X_test_ft_self_trained = pickle.load(file)

with open(PATH + 'X_train_w2v_self_trained.pkl', mode='rb') as file:
    X_train_w2v_self_trained = pickle.load(file)

with open(PATH + 'y_train.pkl', mode='rb') as file:
    y_train = pickle.load(file)

with open(PATH + 'y_test.pkl', mode='rb') as file:
    y_test = pickle.load(file)

# X_train, X_val, y_train, y_val = train_test_split(WordsIdPad_train, y_train, test_size=0.2, random_state=42)

In [8]:
param_grid = {
    "embeddings": ['w2v_pretrained', 'ft_pretrained', 'w2v_selftrained', 'ft_selftrained', 'torch.nn'],
    "activation_fn": [F.relu],
    "learning_rate": [0.001],
    "epochs": [20, 10],
    "optimizer": [optim.Adam],
    "batch_size": [2048],
    "layers_count": [3],
    "kernel_size": [3],
    "stride": [1],
    "initialization": [None, torch.nn.init.kaiming_uniform_, torch.nn.init.xavier_uniform_],
    "regularization": ['None', 'dropout', 'l2_reg'],
    "normalization": [None, torch.nn.LayerNorm, torch.nn.BatchNorm1d],
    "scheduler": [None, torch.optim.lr_scheduler.ExponentialLR, torch.optim.lr_scheduler.MultiStepLR]
}

params_list = ParameterGrid(param_grid)
len(params_list)

810

In [9]:
class Net(nn.Module):
    def __init__(self, 
                 device=None, 
                 layers_count=None, 
                 activation_function=None, 
                 kernel=None, 
                 stride=None,
                 internal=None,
                 initialization=None,
                 regularization=None,
                 normalization=None,
                 batch_size=None
                ):
        
        super().__init__()
        
        self.embedding_size = 300
        self.num_words = 10000
        self.seq_len = 20 if internal else 1
        
        self.embedding = nn.Embedding(self.num_words + 1, self.embedding_size, padding_idx=0, device=device) if internal else None
        
        self.fa = activation_function
        
        self.layers = nn.ModuleList([])
        
        self.drop = nn.Dropout1d(p=0.2) if regularization == 'dropout' else nn.Dropout1d(p=0)
        self.norm = None
        if normalization:
            if normalization.__name__ == 'LayerNorm':
                 self.norm = normalization((self.embedding_size, self.seq_len), device=device)
            elif normalization.__name__ == 'BatchNorm1d':
                self.norm = normalization(self.embedding_size, device=device)
        
        output_shape = self.seq_len
        padding = kernel // 2
        
        for i in range(layers_count):
            self.layers.append(nn.Conv1d(in_channels = self.embedding_size, out_channels = self.embedding_size, kernel_size = kernel, stride = stride, padding = padding, device=device))
            output_shape = 1 + (output_shape + 2*padding - kernel) // stride
        
        
        self.fc = nn.Linear(output_shape * self.embedding_size, 2, device=device)
        
        if initialization:
            self.init_weights(nn.Linear, initialization)
            self.init_weights(nn.Conv1d, initialization)

    def init_weights(self, ModuleClass, weights_initializator):
        for module in self.modules():
            if isinstance(module, ModuleClass):
                weights_initializator(module.weight)
    
    def forward(self, x):
        
        if self.embedding:
            x = self.embedding(x)
            x = x.reshape(len(x), self.embedding_size, self.seq_len)
        else:
            x = x[:, :, None]
        
        for conv_layer in self.layers:
            x = conv_layer(x)
            x = self.fa(x)
            if self.norm:
                x = self.norm(x)
            x = self.drop(x)
        
        x = self.fc(x.reshape(x.shape[0], -1))
        
        return F.log_softmax(x, dim=-1)

In [10]:
# !pip install torchmetrics
from torchmetrics.classification import Accuracy, F1Score
from tqdm import tqdm
from sklearn.model_selection import train_test_split

total = []

# accuracy_criterion = Accuracy(task='multiclass', num_classes=2, multidim_average='global', average='weighted').to(device)
f1_criterion = F1Score(task='multiclass', num_classes=2, multidim_average='global', average='weighted').to(device)

for params in tqdm(params_list):
    
    learningRate = params['learning_rate']
    optimizer_type = params['optimizer']
    layers_count = params['layers_count']
    batch_size = params['batch_size']
    epochs = params['epochs']
    fa = params['activation_fn']
    kernel_size = params['kernel_size']
    stride = params['stride']
    embeddings = params['embeddings']
    initialization = params['initialization']
    regularization = params['regularization']
    normalization = params['normalization']
    scheduler = params['scheduler']

    net = Net(device=device, 
              layers_count=layers_count, 
              activation_function=fa, 
              kernel=kernel_size, 
              stride=stride, 
              internal=embeddings and (embeddings=='torch.nn'),
              regularization=regularization,
              initialization=initialization,
              normalization=normalization,
              batch_size=batch_size
             ).to(device)
    optimizer = optimizer_type(net.parameters(), lr=learningRate, weight_decay=(0.01 if regularization == 'l2_reg' else 0))
    if scheduler:
        if scheduler.__name__ == 'ExponentialLR':
            scheduler = scheduler(optimizer, gamma=0.9)
        elif scheduler.__name__ == 'MultiStepLR':
            scheduler = scheduler(optimizer, milestones=list(range(0, epochs)), gamma=0.9)
    
    
    XX_train, yy_train, XX_val, yy_val, XX_test, yy_test = [], [], [], [], [], []
    
    # divide into different embeddings
    
    if embeddings == 'w2v_pretrained':
        XX_train, XX_val, yy_train, yy_val = train_test_split(X_train_w2v_pre_trained, y_train, test_size=0.2, random_state=random_state)
        XX_test, yy_test = X_test_w2v_pre_trained, y_test
    elif embeddings == 'w2v_selftrained':
        XX_train, XX_val, yy_train, yy_val = train_test_split(X_train_w2v_self_trained, y_train, test_size=0.2, random_state=random_state)
        XX_test, yy_test = X_test_w2v_self_trained, y_test
    elif embeddings == 'ft_pretrained':
        XX_train, XX_val, yy_train, yy_val = train_test_split(X_train_ft_pre_trained, y_train, test_size=0.2, random_state=random_state)
        XX_test, yy_test = X_test_ft_pre_trained, y_test
    elif embeddings == 'ft_selftrained':
        XX_train, XX_val, yy_train, yy_val = train_test_split(X_train_ft_self_trained, y_train, test_size=0.2, random_state=random_state)
        XX_test, yy_test = X_test_ft_self_trained, y_test
    elif embeddings == 'torch.nn':
        XX_train, XX_val, yy_train, yy_val = train_test_split(WordsIdPad_train, y_train, test_size=0.2, random_state=random_state)
        XX_test, yy_test = WordsIdPad_test, y_test
    
    features_train = torch.tensor(XX_train, device=device, dtype=(torch.int32 if embeddings == 'torch.nn' else torch.float))
    targets_train = torch.tensor(yy_train, device=device, dtype=torch.int32)
    
    features_val = torch.tensor(XX_val, device=device, dtype=(torch.int32 if embeddings == 'torch.nn' else torch.float))
    targets_val = torch.tensor(yy_val, device=device, dtype=torch.int32)
    
    features_test = torch.tensor(XX_test, device=device, dtype=(torch.int32 if embeddings == 'torch.nn' else torch.float))
    targets_test = torch.tensor(yy_test, device=device, dtype=torch.int32)
    
    trainset = data_utils.TensorDataset(features_train, targets_train)
    valset = data_utils.TensorDataset(features_val, targets_val)
    testset = data_utils.TensorDataset(features_test, targets_test)
    
    train_loader = data_utils.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
    val_loader = data_utils.DataLoader(valset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
    test_loader = data_utils.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=num_workers)

    for epoch in range(epochs):
        net.train()
        
        for X, Y in train_loader:
            output = net(X)
            loss = F.cross_entropy(output, Y.long())
            
            net.zero_grad()
            
            loss.backward()
            optimizer.step()
            
            if scheduler:
                scheduler.step()
            
    
    train_f1 = 0
    val_f1 = 0
    test_f1 = 0
    
    with torch.no_grad():
        net.eval()
        
        # evaluate the model on train
        targets = torch.tensor([], device=device)
        predictions = torch.tensor([], device=device)

        for X, Y in train_loader:
            output = net(X)
            
            targets = torch.cat((targets, Y))
            predictions = torch.cat((predictions, output.argmax(dim=-1)))
        
        train_f1 = f1_criterion(predictions, targets).item()

        # evaluate the model on validation
        targets = torch.tensor([], device=device)
        predictions = torch.tensor([], device=device)

        for X, Y in val_loader:
            output = net(X)
            
            targets = torch.cat((targets, Y))
            predictions = torch.cat((predictions, output.argmax(dim=-1)))
        
        val_f1 = f1_criterion(predictions, targets).item()

        # evaluate the model on test
        targets = torch.tensor([], device=device) 
        predictions = torch.tensor([], device=device)

        for X, Y in test_loader:
            output = net(X)
            
            targets = torch.cat((targets, Y))
            predictions = torch.cat((predictions, output.argmax(dim=-1)))
        
        test_f1 = f1_criterion(predictions, targets).item()
            
        
    
    total.append({
        'Модель': embeddings,
        'Размер ядра': kernel_size,
        "Отступ": stride,
        'Количество слоев': layers_count,
        'Инициализация': initialization.__name__ if initialization else None,
        'Регуляризация': regularization,
        'Нормализация': normalization.__name__ if normalization else None,
        'Планировщик': type(scheduler).__name__ if scheduler else None,
        'Количество эпох': epochs,
        'F1-train': round(train_f1, 2),
        'F1-val': round(val_f1, 2),
        'F1-test': round(test_f1, 2)
    })



100%|██████████| 810/810 [48:55<00:00,  3.62s/it]


In [3]:
print(datetime.datetime.now().strftime('%d-%B-%Y %H:%M:%S'))

26-May-2023 18:37:53


# Сводная таблица

In [12]:
pd.set_option('display.max_rows', None)
summary = pd.DataFrame.from_dict(total)
summary_sort = summary.sort_values(by='F1-val', ascending=False)
summary_sort

Unnamed: 0,Модель,Размер ядра,Отступ,Количество слоев,Инициализация,Регуляризация,Нормализация,Планировщик,Количество эпох,F1-train,F1-val,F1-test
543,ft_selftrained,3,1,3,xavier_uniform_,dropout,,,20,0.83,0.73,0.72
624,ft_selftrained,3,1,3,xavier_uniform_,dropout,,,10,0.75,0.73,0.7
525,ft_selftrained,3,1,3,kaiming_uniform_,dropout,LayerNorm,,20,0.75,0.73,0.7
540,ft_selftrained,3,1,3,xavier_uniform_,,,,20,0.81,0.73,0.7
597,ft_selftrained,3,1,3,kaiming_uniform_,dropout,,,10,0.75,0.72,0.7
584,ft_selftrained,3,1,3,,l2_reg,LayerNorm,MultiStepLR,10,0.71,0.72,0.84
491,ft_selftrained,3,1,3,,dropout,,MultiStepLR,20,0.72,0.72,0.84
380,w2v_selftrained,3,1,3,xavier_uniform_,,,MultiStepLR,20,0.73,0.72,0.8
489,ft_selftrained,3,1,3,,dropout,,,20,0.78,0.72,0.74
552,ft_selftrained,3,1,3,xavier_uniform_,dropout,LayerNorm,,20,0.74,0.72,0.8


# Выводы

Как можно увидеть по таблице - самого лучшего результата (0.84) достигла непредеробученная модель на признаках fastText с оптимизатором AdamW, функцией активации LeakyReLU, Dropout регуляризацией, "планировщиком" MultiStepLR с начальной скоростью 0.01 с LayerNorm нормализацией.