In [1]:
# Подавление предупреждений
import warnings
for warn in [UserWarning, FutureWarning]: warnings.filterwarnings("ignore", category = warn)

# Импорт необходимых библиотек
import os
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import pandas as pd
import sklearn
import matplotlib.pyplot as plt
from tqdm import tqdm

from transformers import AutoTokenizer, AutoModel
import torch
import torch.nn.functional as F
from torch import Tensor
from einops import rearrange
from typing import Tuple, Callable
from torch.autograd import Function
import gc
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
pd.set_option('display.max_columns', None)

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

### Данные

In [3]:
from torch.utils.data import Dataset, DataLoader 
import numpy as np 
import math 

class Dataset_MELD_RESD(): 
    def __init__(self, part='train', transform=None): 
        if part == 'train':
            df_meld = pd.read_csv("train_sent_emo.csv")[['Utterance', 'Emotion']]
            df_meld.columns = ['text', 'emotion']
            df_resd = pd.read_csv("train.csv")[['text', 'emotion']]
            df = pd.concat([df_meld, df_resd[0:int(len(df_resd)*0.7)]], axis=0)
        elif part == 'dev_meld':
            df = pd.read_csv("dev_sent_emo.csv")[['Utterance', 'Emotion']]
            df.columns = ['text', 'emotion']
        elif part == 'dev_resd':
            df = pd.read_csv("train.csv")
            df = df[int(len(df)*0.7):]
        elif part == 'test_resd':
            df = pd.read_csv("test.csv")
        elif part == 'test_meld':
            df = pd.read_csv("test_sent_emo.csv")[['Utterance', 'Emotion']]
            df.columns = ['text', 'emotion']
        elif part == 'test_resd':
            df = pd.read_csv("test.csv")
        else:
            raise ValueError('Unknown part of Dataset (train / test_meld / test_resd)')
        self.x = list(df['text'].values)
        emotion_mapping = {
            'anger': 0,
            'disgust': 1,
            'fear': 2,
            'joy': 3,
            'happiness': 3,
            'neutral': 4,
            'sadness': 5,
            'surprise': 6,
            'enthusiasm': 6
        }

        self.y = torch.tensor(df['emotion'].apply(lambda x : emotion_mapping[x]).values).to(device)
        self.n_samples = df.shape[0]

    def __getitem__(self, index): 
        return self.x[index], self.y[index] 
        
    def __len__(self): 
        return self.n_samples 

In [4]:
BATCH_SIZE = 32
train_dataloader = DataLoader(dataset=Dataset_MELD_RESD('train'), batch_size=BATCH_SIZE, shuffle=True)
dev_meld_dataloader = DataLoader(dataset=Dataset_MELD_RESD('dev_meld'), batch_size=BATCH_SIZE, shuffle=False)
dev_resd_dataloader = DataLoader(dataset=Dataset_MELD_RESD('dev_resd'), batch_size=BATCH_SIZE, shuffle=False)
test_meld_dataloader = DataLoader(dataset=Dataset_MELD_RESD('test_meld'), batch_size=BATCH_SIZE, shuffle=False)
test_resd_dataloader = DataLoader(dataset=Dataset_MELD_RESD('test_resd'), batch_size=BATCH_SIZE, shuffle=False)

### Feature Extractor

In [5]:
class Embedding():
    def __init__(self, model_name='jina', pooling=None):
        self.model_name = model_name
        self.pooling = pooling
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        if model_name == 'jina':
            self.tokenizer = AutoTokenizer.from_pretrained("jinaai/jina-embeddings-v3", code_revision='da863dd04a4e5dce6814c6625adfba87b83838aa', trust_remote_code=True)
            self.model = AutoModel.from_pretrained("jinaai/jina-embeddings-v3", code_revision='da863dd04a4e5dce6814c6625adfba87b83838aa', trust_remote_code=True).to(self.device)
        elif model_name == 'xlm-roberta-base':
            self.tokenizer = AutoTokenizer.from_pretrained('xlm-roberta-base')
            self.model = AutoModel.from_pretrained('xlm-roberta-base').to(self.device)
        elif model_name == 'canine-c':
            self.tokenizer = AutoTokenizer.from_pretrained('google/canine-c')
            self.model = AutoModel.from_pretrained('google/canine-c').to(self.device)
        else:
            raise ValueError('Unknown name of Embedding')
    def _mean_pooling(self, X):
        def mean_pooling(model_output, attention_mask):
            token_embeddings = model_output[0]
            input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
            return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
        encoded_input = self.tokenizer(X, padding=True, truncation=True, return_tensors='pt').to(self.device)
        with torch.no_grad():
            model_output = self.model(**encoded_input)
        sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])
        sentence_embeddings = F.normalize(sentence_embeddings, p=2, dim=1)
        return sentence_embeddings.unsqueeze(1)
    
    def get_embeddings(self, X):
        if self.pooling is None:
            if self.model_name == 'canine-c':
                max_len = 329
            else:
                max_len = 95
            encoded_input = self.tokenizer(X, padding=True, truncation=True, return_tensors='pt').to(self.device)
            with torch.no_grad():
                features = self.model(**encoded_input)[0].detach().cpu().float().numpy()
            res = np.pad(features[:, :max_len, :], ((0, 0), (0, max(0, max_len - features.shape[1])), (0, 0)), "constant")
            return torch.tensor(res)
        elif self.pooling == 'mean':
            return self._mean_pooling(X)
        else:
            raise ValueError('Unknown type of pooling')

### Метрики

In [6]:
def evaluate_metrics(model, test_dataloader):
    model.eval()
    y_test = []
    y_predict = []
    with torch.no_grad():
        for batch, (batch_X, targets) in enumerate(test_dataloader, 1):
            y_test.extend(list(map(int, targets)))
            output = model(batch_X)
            _, predictions = torch.max(output, dim=1)
            y_predict.extend(list(map(int, predictions)))
        # Unweighted Average Recall (UAR)
        uar = recall_score(y_test, y_predict, average='macro')
        # Weighted Average Recall (WAR)
        war = recall_score(y_test, y_predict, average='weighted')
        # Macro F1-score (MF1)
        mf1 = f1_score(y_test, y_predict, average='macro')
        # Weighted F1-score (WF1)
        wf1 = f1_score(y_test, y_predict, average='weighted')
    return {'uar': 100.0 * uar, 'war': 100.0 * war, 'mf1': 100.0 * mf1, 'wf1': 100.0 * wf1}

# Обучение

In [7]:
from dataclasses import dataclass
from typing import ClassVar
from typing import List, Dict, Any, Tuple, Optional
@dataclass
class ModelTrainer:
    model: 'typing.Any'
    train_dataloader: DataLoader
    dev_meld_dataloader: DataLoader
    dev_resd_dataloader: DataLoader
    test_meld_dataloader: DataLoader
    test_resd_dataloader: DataLoader
    device: torch.device
    epochs: int
    round_loss: int
    round_acc: int

    optimizer: torch.optim
    loss_fn: 'typing.Any'
    
    patience: int = 10 # Ранняя остановка обучения

    class_names: ClassVar[Optional[List[str]]] = None # Список имен классов

    def __post_init__(self):
        
        # История обучения и тестирования
        self.__history = pd.DataFrame({
            "train_avg": [], # Средние метрики на тренировочной выборке
            "dev_avg": [], # Средние метрики на валидационной выборке
            "train_loss": [], # Loss на тренировочной выборке
            "dev_loss": [], # Loss на валидационной выборке
        })

        # Количество шагов в одной эпохе
        self.__train_steps = len(self.train_dataloader)
        self.__dev_steps = len(self.dev_meld_dataloader) + len(self.dev_resd_dataloader)
        self.__test_steps = len(self.test_meld_dataloader) + len(self.test_resd_dataloader)

        self.__best_dev_avg = 0
        self.__no_improvement_count = 0
        
        self.loss_fn = self.loss_fn

    @property
    def history(self) -> pd.DataFrame:
        """Получение DataFrame историей обучения и тестирования

        Returns:
            pd.DataFrame: **DataFrame** c историей обучения и тестирования
        """

        return self.__history

    @classmethod
    def get_model_logits(cls, logits: torch.Tensor) -> torch.Tensor:
        """Получение логитов модели в зависимости от функции потерь

        Args:
            logits (torch.Tensor): Входные логиты

        Returns:
            torch.Tensor: Обработанные логиты
        """

        if isinstance(cls.loss_fn, nn.NLLLoss):
            log_softmax = nn.LogSoftmax(dim = 1)
            return log_softmax(logits)
        elif isinstance(cls.loss_fn, nn.CrossEntropyLoss):
            return logits

    def _is_best_model(self, dev_avg: float) -> bool:
        """Проверка, является ли текущая модель лучшей на основе метрик валидации

        Args:
            test_accuracy (float): Текущая точность тестирования

        Returns:
            bool: True, если текущая модель лучшая, иначе False
        """

        try:
            max_dev_avg = max(self.__history["dev_avg"])
        except ValueError:
            max_dev_avg = 0
        return dev_avg > max_dev_avg

    def _save_model(self, epoch: int, path_to_model: str, test_accuracy: float, loss: torch.Tensor) -> None:
        """Сохранение модели

        Args:
            epoch (int): Текущая эпоха
            path_to_model (str): Путь для сохранения модели
            test_accuracy (float): Точность на тестовой выборке
            loss (torch.Tensor): Значение потерь
        """
        
        os.makedirs(path_to_model, exist_ok = True)
        self._best_model_name = f"{self.model.__class__.__name__}_{self.model.model_name}_{epoch}_{test_accuracy}_checkpoint.pth"

        torch.save({
            "epoch": epoch,
            "model_state_dict": self.model.state_dict(),
            "optimizer_state_dict": self.optimizer.state_dict(),
            "test_loss": loss,
        }, os.path.join(path_to_model, f"{self.model.__class__.__name__}_{self.model.model_name}_{epoch}_{test_accuracy}_checkpoint.pth"))
    
    # Процесс обучения
    def train(self, path_to_model: str) -> None:
        """Процесс обучения

        Args:
            path_to_model (str): Путь для сохранения моделей

        Returns:
            None
        """
        
        losses_train_list = []
        losses_dev_list = []
        accuracy_train_list = []
        accuracy_dev_list = []

        for epoch in range(1, self.epochs + 1):
            with torch.no_grad():
                torch.cuda.empty_cache()
            self.model.train() # Установка модели в режим обучения
            # Сумма Loss
            total_train_loss = 0
            total_dev_loss = 0
            total_dev_loss_meld = 0
            total_dev_loss_resd = 0
            # Сумма точности
            train_accuracy = 0
            dev_accuracy = 0
            dev_accuracy_meld = 0
            dev_accuracy_resd = 0
            # Сумма метрик
            train_uar = 0
            train_war = 0
            train_mf1 = 0
            train_wf1 = 0
            dev_uar_meld = 0
            dev_war_meld = 0
            dev_mf1_meld = 0
            dev_wf1_meld = 0
            dev_uar_resd = 0
            dev_war_resd = 0
            dev_mf1_resd = 0
            dev_wf1_resd = 0

            # Проход по всем тренировочным пакетам
            with tqdm(total = self.__train_steps, desc = f"Эпоха {epoch}", unit = "batch") as pbar_train:
                for batch, (batch_X, targets) in enumerate(self.train_dataloader, 1):
                    targets = targets.to(device)
                    logits = self.model(batch_X)
                    loss = self.loss_fn(logits, targets) # Ошибка предсказаний

                    # Обратное распространение для обновления весов
                    self.optimizer.zero_grad()
                    loss.backward()
                    self.optimizer.step()
        
                    total_train_loss += loss.item() # Потеря
                    # Метрики
                    train_uar += 100.0 * recall_score(targets.cpu(), logits.argmax(1).cpu(), average='macro')
                    train_war += 100.0 * recall_score(targets.cpu(), logits.argmax(1).cpu(), average='weighted')
                    train_mf1 += 100.0 * f1_score(targets.cpu(), logits.argmax(1).cpu(), average='macro')
                    train_wf1 += 100.0 * f1_score(targets.cpu(), logits.argmax(1).cpu(), average='weighted')
                    train_accuracy += (logits.argmax(1) == targets).type(torch.float).sum().item()
        
                    pbar_train.update(1)
                    with torch.no_grad():
                        torch.cuda.empty_cache()

                # Средняя потеря
                avg_train_loss = round(total_train_loss / batch, self.round_loss)
                losses_train_list.append(avg_train_loss)
        
                # Точность
                train_accuracy = round(train_accuracy / len(self.train_dataloader.dataset) * 100, self.round_acc)
                
                
                train_uar = round(train_uar / len(self.train_dataloader), self.round_acc)
                train_war = round(train_war / len(self.train_dataloader), self.round_acc)
                train_mf1 = round(train_mf1 / len(self.train_dataloader), self.round_acc)
                train_wf1 = round(train_wf1 / len(self.train_dataloader), self.round_acc)
                
                train_avg_metrics = 0.25 * (train_uar + train_war + train_mf1 + train_wf1)
                accuracy_train_list.append(train_avg_metrics)
        
                pbar_train.set_postfix({
                    "uar": train_uar,
                    "war" : train_war,
                    "mf1" : train_mf1,
                    "wf1" : train_wf1,
                    "avg" : train_avg_metrics,
                    "Средняя потеря": avg_train_loss
                })
            
            
            # Установка модели в режим предсказаний
            self.model.eval()
        
            # Предсказания на валидационной выборке
            with torch.no_grad():
                with tqdm(total = self.__dev_steps, desc = f"Тестирование {epoch}", unit = "batch") as pbar_dev:
                    num_batches = 0
                    for batch, (batch_X, targets) in enumerate(self.dev_meld_dataloader, 1):
                        targets = targets.to(device)
                        logits = self.model(batch_X)
                        loss = self.loss_fn(logits, targets) # Ошибка предсказаний
                        
                        total_dev_loss += loss.item() # Потеря
                        total_dev_loss_meld += loss.item()
                        dev_accuracy_meld += (logits.argmax(1) == targets).type(torch.float).sum().item()
                        # Метрики
                        dev_uar_meld += 100.0 * recall_score(targets.cpu(), logits.argmax(1).cpu(), average='macro')
                        dev_war_meld += 100.0 * recall_score(targets.cpu(), logits.argmax(1).cpu(), average='weighted')
                        dev_mf1_meld += 100.0 * f1_score(targets.cpu(), logits.argmax(1).cpu(), average='macro')
                        dev_wf1_meld += 100.0 * f1_score(targets.cpu(), logits.argmax(1).cpu(), average='weighted')
        
                        pbar_dev.update(1)
                        with torch.no_grad():
                            torch.cuda.empty_cache()
                    num_batches += batch
                    batch_meld = batch
                    for batch, (batch_X, targets) in enumerate(self.dev_resd_dataloader, 1):
                        targets = targets.to(device)
                        logits = self.model(batch_X)
                        loss = self.loss_fn(logits, targets) # Ошибка предсказаний
                        
                        total_dev_loss += loss.item() # Потеря
                        total_dev_loss_resd += loss.item()
                        # Количество правильных предсказаний
                        dev_accuracy_resd += (logits.argmax(1) == targets).type(torch.float).sum().item()
                        # Метрики
                        dev_uar_resd += 100.0 * recall_score(targets.cpu(), logits.argmax(1).cpu(), average='macro')
                        dev_war_resd += 100.0 * recall_score(targets.cpu(), logits.argmax(1).cpu(), average='weighted')
                        dev_mf1_resd += 100.0 * f1_score(targets.cpu(), logits.argmax(1).cpu(), average='macro')
                        dev_wf1_resd += 100.0 * f1_score(targets.cpu(), logits.argmax(1).cpu(), average='weighted')
        
                        pbar_dev.update(1)
                        with torch.no_grad():
                            torch.cuda.empty_cache()
                    num_batches += batch
                    # Средняя потеря
                    avg_dev_loss = round(total_dev_loss / num_batches, self.round_loss)
                    avg_dev_loss = round(0.5 * (total_dev_loss_meld / batch_meld + total_dev_loss_resd / batch), self.round_loss)
                    losses_dev_list.append(avg_dev_loss)
        
                    # Точность
                    dev_accuracy = round(0.5 * (dev_accuracy_meld / len(self.dev_meld_dataloader.dataset) * 100 + dev_accuracy_resd / len(self.dev_resd_dataloader.dataset) * 100), self.round_acc)
                
                    dev_uar_meld = round(dev_uar_meld / len(self.dev_meld_dataloader), self.round_acc)
                    dev_war_meld = round(dev_war_meld / len(self.dev_meld_dataloader), self.round_acc)
                    dev_mf1_meld = round(dev_mf1_meld / len(self.dev_meld_dataloader), self.round_acc)
                    dev_wf1_meld = round(dev_wf1_meld / len(self.dev_meld_dataloader), self.round_acc)
                    
                    dev_uar_resd = round(dev_uar_resd / len(self.dev_resd_dataloader), self.round_acc)
                    dev_war_resd = round(dev_war_resd / len(self.dev_resd_dataloader), self.round_acc)
                    dev_mf1_resd = round(dev_mf1_resd / len(self.dev_resd_dataloader), self.round_acc)
                    dev_wf1_resd = round(dev_wf1_resd / len(self.dev_resd_dataloader), self.round_acc)
                    
                    
                    dev_uar = 0.5 * (dev_uar_meld + dev_uar_resd)
                    dev_war = 0.5 * (dev_war_meld + dev_war_resd)
                    dev_mf1 = 0.5 * (dev_mf1_meld + dev_mf1_resd)
                    dev_wf1 = 0.5 * (dev_wf1_meld + dev_wf1_resd)
                    
                    dev_avg_metrics = 0.25 * (dev_uar + dev_war + dev_mf1 + dev_wf1)
                    accuracy_dev_list.append(dev_avg_metrics)
                    
                    pbar_dev.set_postfix({
                        "uar": dev_uar,
                        "war" : dev_war,
                        "mf1" : dev_mf1,
                        "wf1" : dev_wf1,
                        "avg" : dev_avg_metrics,
                        "Средняя потеря": avg_dev_loss
                    })
            
            if self._is_best_model(dev_avg_metrics):
                self._save_model(epoch, path_to_model, round(dev_avg_metrics, self.round_acc), avg_dev_loss)
                self.__best_dev_avg = dev_avg_metrics
                self.__no_improvement_count = 0
            else:
                self.__no_improvement_count += 1

            # Добавлениие данных в историю обучения
            new_row = pd.Series([train_avg_metrics, dev_avg_metrics, avg_train_loss, avg_dev_loss], index = self.__history.columns)
            self.__history = pd.concat([self.__history, new_row.to_frame().T], ignore_index = True)

            if self.__no_improvement_count >= self.patience:
                print(f"Ранняя остановка на эпохе {epoch} из-за отсутствия улучшения точности на тестовой выборке")
                break
        '''checkpoint = torch.load(os.path.join(path_to_model, self._best_model_name))
        self.model.load_state_dict(checkpoint['model_state_dict'])
        metrics_dev_meld = evaluate_metrics(self.model, dev_meld_dataloader)
        metrics_dev_resd = evaluate_metrics(self.model, dev_resd_dataloader)
        print("Метрики на валидационной выборке MELD: ", metrics_dev_meld)
        print("Метрики на валидационной выборке RESD: ", metrics_dev_resd)
        metrics_test_meld = evaluate_metrics(self.model, test_meld_dataloader)
        metrics_test_resd = evaluate_metrics(self.model, test_resd_dataloader)
        print("Метрики на тестовой выборке MELD: ", metrics_test_meld)
        print("Метрики на тестовой выборке RESD: ", metrics_test_resd)'''
        '''# Визуализация графиков потерь и точности
        plt.figure(figsize=(12, 6))

        plt.subplot(1, 2, 1)
        plt.plot(losses_train_list, label = 'Потери на тренировочной выборке')
        plt.plot(losses_dev_list, label = 'Потери на валидационной выборке')
        plt.title('Потери во время обучения')
        plt.xlabel('Эпоха')
        plt.ylabel('Потери')
        plt.legend()

        plt.subplot(1, 2, 2)
        plt.plot(accuracy_train_list, label = 'Средние метрики на тренировочной выборке')
        plt.plot(accuracy_dev_list, label = 'Средние метрики на валидационной выборке')
        plt.title('Средние метрики во время обучения')
        plt.xlabel('Эпоха')
        plt.ylabel('Точность')
        plt.legend()

        plt.tight_layout()
        plt.show()'''

    # Получение хэш-значения
    def __hash__(self):
        return id(self)

In [4]:
EPOCHS = 50 # Количество эпох
BATCH_SIZE = 32 # Размер выборки (пакета)
LEARNING_RATE = 1e-4 # Скорость обучения
ROUND_ACC = 2 # Знаков Accuracy после запятой
ROUND_LOSS = 7 # Знаков Loss после запятой
ROOT_DIR = os.path.join(".")
PATH_TO_MODEL = os.path.join(ROOT_DIR, "Models_lstm")

In [9]:
from sklearn.utils.class_weight import compute_class_weight
y = []
for batch, (batch_X, targets) in enumerate(train_dataloader, 1):
    y.extend(list(map(int, targets)))
class_weights = torch.tensor(compute_class_weight(class_weight="balanced", classes=np.unique(y), y=y), dtype=torch.float).to(device)

### LSTM

In [10]:
class LSTMClassifier(nn.Module):
    def __init__(self, num_classes, model_name='jina', pooling=None, input_size = 1024, hidden_size = 64, num_layers = 2, dropout = 0.1, bidirectional=True):
        super(LSTMClassifier, self).__init__()
        embed = Embedding(model_name, pooling)
        self.embedding = embed.get_embeddings
        self.model_name = model_name
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        
        self.lstm = nn.LSTM(
            input_size = input_size,
            hidden_size = hidden_size,
            num_layers = num_layers,
            batch_first = True,
            dropout = dropout,
            bidirectional=bidirectional
        )
        if bidirectional:
            self.fc = nn.Linear(2 * hidden_size, num_classes)
        else:
            self.fc = nn.Linear(hidden_size, num_classes)
        
    def forward(self, x):
        x = torch.tensor(self.embedding(x)).to(device)
        if self.lstm.bidirectional:
            h0, c0 = torch.zeros(2 * self.num_layers, len(x), self.hidden_size).to(device), torch.zeros(2 * self.num_layers, len(x), self.hidden_size).to(device)
        else:
            h0, c0 = torch.zeros(self.num_layers, len(x), self.hidden_size).to(device), torch.zeros(self.num_layers, len(x), self.hidden_size).to(device)
        out, (hn, cn) = self.lstm(x, (h0.detach(), c0.detach()))
        if self.lstm.bidirectional:
            out = torch.cat((hn[-2, :, :], hn[-1, :, :]), dim=1)
        else:
            out = out[:, -1, :]
        out = self.fc(out)
        return out

#### LSTM + jina

### Model hyperparameters  

In [12]:
PATH_TO_MODEL = os.path.join(ROOT_DIR, "Models_lstm_jina")

In [12]:
%%capture --no-stdout
result = []
bidirectional = True
for num_layers in [1, 2, 3]:
    for hidden_size in [64, 128, 256, 512]:
        print(f"hidden_size={hidden_size}, num_layers={num_layers}, bidirectional={bidirectional}")
        model_lstm = LSTMClassifier(num_classes = 7, model_name='jina', pooling=None, hidden_size=hidden_size, num_layers=num_layers, bidirectional=bidirectional).to(device)
        optimizer = optim.Adam(params = model_lstm.parameters(), lr = LEARNING_RATE)
        loss_fn = nn.CrossEntropyLoss(weight=class_weights)
        trainer = ModelTrainer(model_lstm, train_dataloader, dev_meld_dataloader, dev_resd_dataloader, test_meld_dataloader, test_resd_dataloader, device, EPOCHS, ROUND_LOSS, ROUND_ACC, optimizer, loss_fn)
        trainer.train(PATH_TO_MODEL)
        checkpoint = torch.load(os.path.join(PATH_TO_MODEL, trainer._best_model_name))
        model_lstm.load_state_dict(checkpoint['model_state_dict'])
        metrics_dev_meld = evaluate_metrics(model_lstm, dev_meld_dataloader)
        metrics_dev_resd = evaluate_metrics(model_lstm, dev_resd_dataloader)
        print("Метрики на валидационной выборке MELD: ", metrics_dev_meld)
        print("Метрики на валидационной выборке RESD: ", metrics_dev_resd)
        metrics_test_meld = evaluate_metrics(model_lstm, test_meld_dataloader)
        metrics_test_resd = evaluate_metrics(model_lstm, test_resd_dataloader)
        print("Метрики на тестовой выборке MELD: ", metrics_test_meld)
        print("Метрики на тестовой выборке RESD: ", metrics_test_resd)
        result.append([{"hidden_size" : hidden_size, "num_layers": num_layers, "bidirectional": bidirectional}, metrics_dev_meld, metrics_dev_resd, metrics_test_meld, metrics_test_resd, trainer._best_model_name])

hidden_size=64, num_layers=1, bidirectional=True
Метрики на валидационной выборке MELD:  {'uar': 38.50513367640605, 'war': 49.6844003606853, 'mf1': 37.248572070345084, 'wf1': 49.95832862164324}
Метрики на валидационной выборке RESD:  {'uar': 30.355023168687765, 'war': 30.74626865671642, 'mf1': 30.299737560055366, 'wf1': 30.725178894219518}
Метрики на тестовой выборке MELD:  {'uar': 37.07617355109617, 'war': 52.720306513409966, 'mf1': 35.71355556458995, 'wf1': 53.422970543498735}
Метрики на тестовой выборке RESD:  {'uar': 27.352842846263897, 'war': 27.857142857142858, 'mf1': 27.180824347026718, 'wf1': 27.726923366248684}
hidden_size=128, num_layers=1, bidirectional=True
Ранняя остановка на эпохе 36 из-за отсутствия улучшения точности на тестовой выборке
Метрики на валидационной выборке MELD:  {'uar': 40.63093843049616, 'war': 50.13525698827773, 'mf1': 39.0559231163594, 'wf1': 50.758278965395846}
Метрики на валидационной выборке RESD:  {'uar': 34.06452629360924, 'war': 34.02985074626866,

In [13]:
df = pd.DataFrame(result, columns=["параметры", "метрики dev meld", "метрики dev resd", "метрики test meld", "метрики test resd", "путь"])
df = pd.concat([df["параметры"].apply(pd.Series), df["метрики dev meld"].apply(pd.Series), df["метрики dev resd"].apply(pd.Series), df["метрики test meld"].apply(pd.Series), df["метрики test resd"].apply(pd.Series), df["путь"]], axis=1)
df.columns = ["hidden_size", "num_layers", "bidirectional", "uar_dev_meld", "war_dev_meld", "mf1_dev_meld", "wf1_dev_meld", "uar_dev_resd", "war_dev_resd", "mf1_dev_resd", "wf1_dev_resd", "uar_test_meld", "war_test_meld", "mf1_test_meld", "wf1_test_meld", "uar_test_resd", "war_test_resd", "mf1_test_resd", "wf1_test_resd", "путь"]
df.to_csv(os.path.join(PATH_TO_MODEL, "result_bidirectional_True_num_layers_hidden_size.csv"))

In [20]:
df['average_dev_meld'] = (df['uar_dev_meld'] + df['war_dev_meld'] + df['mf1_dev_meld'] + df['wf1_dev_meld']) / 4.0
df['average_dev_resd'] = (df['uar_dev_resd'] + df['war_dev_resd'] + df['mf1_dev_resd'] + df['wf1_dev_resd']) / 4.0
df['average_test_meld'] = (df['uar_test_meld'] + df['war_test_meld'] + df['mf1_test_meld'] + df['wf1_test_meld']) / 4.0
df['average_test_resd'] = (df['uar_test_resd'] + df['war_test_resd'] + df['mf1_test_resd'] + df['wf1_test_resd']) / 4.0
df['avg_test'] = 0.5 * (df['average_test_meld'] + df['average_test_resd'])

In [21]:
df.sort_values('avg_test' , ascending=False)

Unnamed: 0,hidden_size,num_layers,bidirectional,uar_dev_meld,war_dev_meld,mf1_dev_meld,wf1_dev_meld,uar_dev_resd,war_dev_resd,mf1_dev_resd,...,uar_test_resd,war_test_resd,mf1_test_resd,wf1_test_resd,путь,average_dev_meld,average_dev_resd,average_test_meld,average_test_resd,avg_test
7,512,2,True,36.914882,52.119026,37.339609,51.54657,34.982744,35.522388,34.838927,...,34.071528,34.642857,33.920942,34.448325,LSTMClassifier_jina_14_39.44_checkpoint.pth,44.480022,35.18873,47.668029,34.270913,40.969471
3,512,1,True,41.617945,53.291253,40.703525,52.841851,32.662597,33.134328,31.784233,...,35.1901,35.714286,34.572775,34.971047,LSTMClassifier_jina_16_39.57_checkpoint.pth,47.113644,32.536312,45.434284,35.112052,40.273168
2,256,1,True,41.413131,53.651939,40.971058,53.636704,31.690755,31.940299,30.853201,...,32.172837,32.857143,31.889696,32.419358,LSTMClassifier_jina_15_39.18_checkpoint.pth,47.418208,31.403431,46.707438,32.334758,39.521098
9,128,3,True,39.164575,52.750225,38.758737,52.465113,32.730957,32.537313,32.322923,...,32.063096,32.142857,32.068321,32.109598,LSTMClassifier_jina_16_38.52_checkpoint.pth,45.784663,32.492504,46.74095,32.095968,39.418459
11,512,3,True,40.489045,49.323715,38.961378,51.451121,31.553316,32.238806,30.229659,...,34.059664,35.0,32.558391,33.288399,LSTMClassifier_jina_5_38.12_checkpoint.pth,45.056315,31.179638,45.050486,33.726613,39.38855
10,256,3,True,36.731051,51.487827,37.221675,50.795774,33.282961,33.432836,33.377888,...,31.831878,32.5,31.942702,32.622566,LSTMClassifier_jina_19_38.65_checkpoint.pth,44.059082,33.403323,45.893903,32.224287,39.059095
4,64,2,True,39.481881,49.774572,37.922167,50.74621,26.898647,27.164179,26.480303,...,29.80993,30.714286,29.335864,30.218864,LSTMClassifier_jina_14_35.37_checkpoint.pth,44.481207,26.90191,47.065238,30.019736,38.542487
1,128,1,True,40.630938,50.135257,39.055923,50.758279,34.064526,34.029851,33.52879,...,32.119724,32.857143,31.913516,32.528511,LSTMClassifier_jina_26_39.12_checkpoint.pth,45.145099,33.866668,44.094929,32.354723,38.224826
6,256,2,True,42.274102,51.577998,40.212988,52.324869,34.883698,35.223881,34.269237,...,31.669084,31.785714,30.403565,30.662568,LSTMClassifier_jina_9_40.24_checkpoint.pth,46.597489,34.784925,43.972067,31.130233,37.55115
8,64,3,True,36.964898,49.594229,36.247772,49.568267,27.083569,27.462687,26.769695,...,27.920492,28.214286,28.268023,28.483659,LSTMClassifier_jina_16_35.02_checkpoint.pth,43.093792,27.144435,45.618832,28.221615,36.920223


### Training hyperparameters

1) BATCH_SIZE=32

In [12]:
BATCH_SIZE = 32
train_dataloader = DataLoader(dataset=Dataset_MELD_RESD('train'), batch_size=BATCH_SIZE, shuffle=True)
dev_meld_dataloader = DataLoader(dataset=Dataset_MELD_RESD('dev_meld'), batch_size=BATCH_SIZE, shuffle=False)
dev_resd_dataloader = DataLoader(dataset=Dataset_MELD_RESD('dev_resd'), batch_size=BATCH_SIZE, shuffle=False)
test_meld_dataloader = DataLoader(dataset=Dataset_MELD_RESD('test_meld'), batch_size=BATCH_SIZE, shuffle=False)
test_resd_dataloader = DataLoader(dataset=Dataset_MELD_RESD('test_resd'), batch_size=BATCH_SIZE, shuffle=False)

In [13]:
# result for lr=1e-4, batch_size=32, optimizer=Adam
metrics_dev_meld = {'uar': 36.914882229220694, 'war': 52.1190261496844, 'mf1': 37.33960869854577, 'wf1': 51.546570204199604}
metrics_dev_resd = {'uar': 34.982743822166555, 'war': 35.52238805970149, 'mf1': 34.83892663653965, 'wf1': 35.410859597657826}
metrics_test_meld = {'uar': 38.45335521111753, 'war': 56.666666666666664, 'mf1': 38.438996451151816, 'wf1': 57.11309695573641}
metrics_test_resd = {'uar': 34.07152838073891, 'war': 34.64285714285714, 'mf1': 33.920941779084316, 'wf1': 34.44832494182704}
result = [[{"lr" : 1e-4, "batch_size": 32, "optimizer" : "Adam"}, metrics_dev_meld, metrics_dev_resd, metrics_test_meld, metrics_test_resd, "LSTMClassifier_jina_14_39.44_checkpoint.pth"]]

In [14]:
%%capture --no-stdout
optimizer_ = "Adam"
for lr in [1e-3, 1e-5]:
    print(f"lr={lr}, batch_size={BATCH_SIZE}, optimizer={optimizer_}")
    model_lstm = LSTMClassifier(num_classes = 7, model_name='jina', pooling=None, hidden_size=512, num_layers=2, bidirectional=True).to(device)
    optimizer = optim.Adam(params = model_lstm.parameters(), lr = LEARNING_RATE)
    loss_fn = nn.CrossEntropyLoss(weight=class_weights)
    trainer = ModelTrainer(model_lstm, train_dataloader, dev_meld_dataloader, dev_resd_dataloader, test_meld_dataloader, test_resd_dataloader, device, EPOCHS, ROUND_LOSS, ROUND_ACC, optimizer, loss_fn)
    trainer.train(PATH_TO_MODEL)
    checkpoint = torch.load(os.path.join(PATH_TO_MODEL, trainer._best_model_name))
    model_lstm.load_state_dict(checkpoint['model_state_dict'])
    metrics_dev_meld = evaluate_metrics(model_lstm, dev_meld_dataloader)
    metrics_dev_resd = evaluate_metrics(model_lstm, dev_resd_dataloader)
    print("Метрики на валидационной выборке MELD: ", metrics_dev_meld)
    print("Метрики на валидационной выборке RESD: ", metrics_dev_resd)
    metrics_test_meld = evaluate_metrics(model_lstm, test_meld_dataloader)
    metrics_test_resd = evaluate_metrics(model_lstm, test_resd_dataloader)
    print("Метрики на тестовой выборке MELD: ", metrics_test_meld)
    print("Метрики на тестовой выборке RESD: ", metrics_test_resd)
    result.append([{"lr" : lr, "batch_size": BATCH_SIZE, "optimizer" : optimizer_}, metrics_dev_meld, metrics_dev_resd, metrics_test_meld, metrics_test_resd, trainer._best_model_name])

lr=0.001, batch_size=32, optimizer=Adam
Ранняя остановка на эпохе 27 из-за отсутствия улучшения точности на тестовой выборке
Метрики на валидационной выборке MELD:  {'uar': 39.03910380637809, 'war': 50.586113615870154, 'mf1': 37.372601037950915, 'wf1': 50.64337961436234}
Метрики на валидационной выборке RESD:  {'uar': 33.57645666962438, 'war': 33.731343283582085, 'mf1': 33.44561867653603, 'wf1': 33.94203533515552}
Метрики на тестовой выборке MELD:  {'uar': 39.31638368939801, 'war': 54.406130268199234, 'mf1': 38.17839486757602, 'wf1': 55.34330144441883}
Метрики на тестовой выборке RESD:  {'uar': 33.957274944117046, 'war': 34.285714285714285, 'mf1': 34.072937248135354, 'wf1': 34.521961460518966}
lr=1e-05, batch_size=32, optimizer=Adam
Метрики на валидационной выборке MELD:  {'uar': 42.289858506932, 'war': 54.283137962128045, 'mf1': 40.17412320419961, 'wf1': 53.09946957537216}
Метрики на валидационной выборке RESD:  {'uar': 32.563604185635604, 'war': 32.23880597014925, 'mf1': 32.110820830

In [15]:
df = pd.DataFrame(result, columns=["параметры", "метрики dev meld", "метрики dev resd", "метрики test meld", "метрики test resd", "путь"])
df = pd.concat([df["параметры"].apply(pd.Series), df["метрики dev meld"].apply(pd.Series), df["метрики dev resd"].apply(pd.Series), df["метрики test meld"].apply(pd.Series), df["метрики test resd"].apply(pd.Series), df["путь"]], axis=1)
df.columns = ["lr", "batch_size", "optimizer", "uar_dev_meld", "war_dev_meld", "mf1_dev_meld", "wf1_dev_meld", "uar_dev_resd", "war_dev_resd", "mf1_dev_resd", "wf1_dev_resd", "uar_test_meld", "war_test_meld", "mf1_test_meld", "wf1_test_meld", "uar_test_resd", "war_test_resd", "mf1_test_resd", "wf1_test_resd", "путь"]
df.to_csv(os.path.join(PATH_TO_MODEL, "result_Adam_32_lr_.csv"))

In [13]:
columns = ["lr", "batch_size", "optimizer", "uar_dev_meld", "war_dev_meld", "mf1_dev_meld", "wf1_dev_meld", "uar_dev_resd", "war_dev_resd", "mf1_dev_resd", "wf1_dev_resd", "uar_test_meld", "war_test_meld", "mf1_test_meld", "wf1_test_meld", "uar_test_resd", "war_test_resd", "mf1_test_resd", "wf1_test_resd", "путь"]
df = pd.read_csv(os.path.join(PATH_TO_MODEL, "result_Adam_32_lr_.csv"), index_col=0)
df.columns=columns

In [14]:
df['average_dev_meld'] = (df['uar_dev_meld'] + df['war_dev_meld'] + df['mf1_dev_meld'] + df['wf1_dev_meld']) / 4.0
df['average_dev_resd'] = (df['uar_dev_resd'] + df['war_dev_resd'] + df['mf1_dev_resd'] + df['wf1_dev_resd']) / 4.0
df['average_test_meld'] = (df['uar_test_meld'] + df['war_test_meld'] + df['mf1_test_meld'] + df['wf1_test_meld']) / 4.0
df['average_test_resd'] = (df['uar_test_resd'] + df['war_test_resd'] + df['mf1_test_resd'] + df['wf1_test_resd']) / 4.0
df['avg_test'] = 0.5 * (df['average_test_meld'] + df['average_test_resd'])

In [15]:
df

Unnamed: 0,lr,batch_size,optimizer,uar_dev_meld,war_dev_meld,mf1_dev_meld,wf1_dev_meld,uar_dev_resd,war_dev_resd,mf1_dev_resd,wf1_dev_resd,uar_test_meld,war_test_meld,mf1_test_meld,wf1_test_meld,uar_test_resd,war_test_resd,mf1_test_resd,wf1_test_resd,путь,average_dev_meld,average_dev_resd,average_test_meld,average_test_resd,avg_test
0,0.0001,32,Adam,36.914882,52.119026,37.339609,51.54657,34.982744,35.522388,34.838927,35.41086,38.453355,56.666667,38.438996,57.113097,34.071528,34.642857,33.920942,34.448325,LSTMClassifier_jina_14_39.44_checkpoint.pth,44.480022,35.18873,47.668029,34.270913,40.969471
1,0.001,32,Adam,39.039104,50.586114,37.372601,50.64338,33.576457,33.731343,33.445619,33.942035,39.316384,54.40613,38.178395,55.343301,33.957275,34.285714,34.072937,34.521961,LSTMClassifier_jina_17_39.06_checkpoint.pth,44.4103,33.673863,46.811053,34.209472,40.510262
2,1e-05,32,Adam,42.289859,54.283138,40.174123,53.09947,32.563604,32.238806,32.110821,32.133212,37.315104,54.712644,35.853453,54.569243,31.945962,31.785714,31.224869,31.243014,LSTMClassifier_jina_11_39.14_checkpoint.pth,47.461647,32.261611,45.612611,31.54989,38.58125


In [20]:
%%capture --no-stdout
optimizer_ = "AdamW"
result = []
for lr in [1e-3,1e-4, 1e-5]:
    print(f"lr={lr}, batch_size={BATCH_SIZE}, optimizer={optimizer_}")
    model_lstm = LSTMClassifier(num_classes = 7, model_name='jina', pooling=None, hidden_size=512, num_layers=2, bidirectional=True).to(device)
    optimizer = optim.AdamW(params = model_lstm.parameters(), lr = LEARNING_RATE)
    loss_fn = nn.CrossEntropyLoss(weight=class_weights)
    trainer = ModelTrainer(model_lstm, train_dataloader, dev_meld_dataloader, dev_resd_dataloader, test_meld_dataloader, test_resd_dataloader, device, EPOCHS, ROUND_LOSS, ROUND_ACC, optimizer, loss_fn)
    trainer.train(PATH_TO_MODEL)
    checkpoint = torch.load(os.path.join(PATH_TO_MODEL, trainer._best_model_name))
    model_lstm.load_state_dict(checkpoint['model_state_dict'])
    metrics_dev_meld = evaluate_metrics(model_lstm, dev_meld_dataloader)
    metrics_dev_resd = evaluate_metrics(model_lstm, dev_resd_dataloader)
    print("Метрики на валидационной выборке MELD: ", metrics_dev_meld)
    print("Метрики на валидационной выборке RESD: ", metrics_dev_resd)
    metrics_test_meld = evaluate_metrics(model_lstm, test_meld_dataloader)
    metrics_test_resd = evaluate_metrics(model_lstm, test_resd_dataloader)
    print("Метрики на тестовой выборке MELD: ", metrics_test_meld)
    print("Метрики на тестовой выборке RESD: ", metrics_test_resd)
    result.append([{"lr" : lr, "batch_size": BATCH_SIZE, "optimizer" : optimizer_}, metrics_dev_meld, metrics_dev_resd, metrics_test_meld, metrics_test_resd, trainer._best_model_name])

lr=0.001, batch_size=32, optimizer=AdamW
Ранняя остановка на эпохе 16 из-за отсутствия улучшения точности на тестовой выборке
Метрики на валидационной выборке MELD:  {'uar': 41.44883547580554, 'war': 54.46348061316502, 'mf1': 40.59111644765535, 'wf1': 54.35170418869099}
Метрики на валидационной выборке RESD:  {'uar': 31.445671988602204, 'war': 31.64179104477612, 'mf1': 30.50936975929744, 'wf1': 31.016860220629294}
Метрики на тестовой выборке MELD:  {'uar': 40.52660151007508, 'war': 56.16858237547893, 'mf1': 38.99274488484909, 'wf1': 57.110340844048665}
Метрики на тестовой выборке RESD:  {'uar': 29.900927071979705, 'war': 30.714285714285715, 'mf1': 29.256593406946074, 'wf1': 29.91505572458935}
lr=0.0001, batch_size=32, optimizer=AdamW
Ранняя остановка на эпохе 22 из-за отсутствия улучшения точности на тестовой выборке
Метрики на валидационной выборке MELD:  {'uar': 36.83143964437682, 'war': 52.38954012623985, 'mf1': 37.27922780650638, 'wf1': 51.44740513809924}
Метрики на валидационной в

In [21]:
df = pd.DataFrame(result, columns=["параметры", "метрики dev meld", "метрики dev resd", "метрики test meld", "метрики test resd", "путь"])
df = pd.concat([df["параметры"].apply(pd.Series), df["метрики dev meld"].apply(pd.Series), df["метрики dev resd"].apply(pd.Series), df["метрики test meld"].apply(pd.Series), df["метрики test resd"].apply(pd.Series), df["путь"]], axis=1)
df.columns = ["lr", "batch_size", "optimizer", "uar_dev_meld", "war_dev_meld", "mf1_dev_meld", "wf1_dev_meld", "uar_dev_resd", "war_dev_resd", "mf1_dev_resd", "wf1_dev_resd", "uar_test_meld", "war_test_meld", "mf1_test_meld", "wf1_test_meld", "uar_test_resd", "war_test_resd", "mf1_test_resd", "wf1_test_resd", "путь"]
df.to_csv(os.path.join(PATH_TO_MODEL, "result_AdamW_32_lr.csv"))

In [12]:
%%capture --no-stdout
optimizer_ = "SGD"
result = []
for lr in [1e-3,1e-4, 1e-5]:
    print(f"lr={lr}, batch_size={BATCH_SIZE}, optimizer={optimizer_}")
    model_lstm = LSTMClassifier(num_classes = 7, model_name='jina', pooling=None, hidden_size=512, num_layers=2, bidirectional=True).to(device)
    optimizer = optim.SGD(params = model_lstm.parameters(), lr = LEARNING_RATE)
    loss_fn = nn.CrossEntropyLoss(weight=class_weights)
    trainer = ModelTrainer(model_lstm, train_dataloader, dev_meld_dataloader, dev_resd_dataloader, test_meld_dataloader, test_resd_dataloader, device, EPOCHS, ROUND_LOSS, ROUND_ACC, optimizer, loss_fn)
    trainer.train(PATH_TO_MODEL)
    checkpoint = torch.load(os.path.join(PATH_TO_MODEL, trainer._best_model_name))
    model_lstm.load_state_dict(checkpoint['model_state_dict'])
    metrics_dev_meld = evaluate_metrics(model_lstm, dev_meld_dataloader)
    metrics_dev_resd = evaluate_metrics(model_lstm, dev_resd_dataloader)
    print("Метрики на валидационной выборке MELD: ", metrics_dev_meld)
    print("Метрики на валидационной выборке RESD: ", metrics_dev_resd)
    metrics_test_meld = evaluate_metrics(model_lstm, test_meld_dataloader)
    metrics_test_resd = evaluate_metrics(model_lstm, test_resd_dataloader)
    print("Метрики на тестовой выборке MELD: ", metrics_test_meld)
    print("Метрики на тестовой выборке RESD: ", metrics_test_resd)
    result.append([{"lr" : lr, "batch_size": BATCH_SIZE, "optimizer" : optimizer_}, metrics_dev_meld, metrics_dev_resd, metrics_test_meld, metrics_test_resd, trainer._best_model_name])

lr=0.001, batch_size=32, optimizer=SGD
Метрики на валидационной выборке MELD:  {'uar': 24.125619206172352, 'war': 33.90441839495041, 'mf1': 21.14971696159699, 'wf1': 33.205229668136376}
Метрики на валидационной выборке RESD:  {'uar': 20.609930928893295, 'war': 21.19402985074627, 'mf1': 16.229831998047004, 'wf1': 16.74713496146953}
Метрики на тестовой выборке MELD:  {'uar': 24.630945804355562, 'war': 34.367816091954026, 'mf1': 21.17399098044318, 'wf1': 35.13282157618069}
Метрики на тестовой выборке RESD:  {'uar': 17.795625427204374, 'war': 17.857142857142858, 'mf1': 14.098575155113638, 'wf1': 14.110768549848757}
lr=0.0001, batch_size=32, optimizer=SGD
Метрики на валидационной выборке MELD:  {'uar': 25.985426825418536, 'war': 38.23264201983769, 'mf1': 24.00480618684994, 'wf1': 36.95529531318729}
Метрики на валидационной выборке RESD:  {'uar': 19.806070254188633, 'war': 21.19402985074627, 'mf1': 15.711368516532668, 'wf1': 16.835661695500157}
Метрики на тестовой выборке MELD:  {'uar': 26.4

In [13]:
df = pd.DataFrame(result, columns=["параметры", "метрики dev meld", "метрики dev resd", "метрики test meld", "метрики test resd", "путь"])
df = pd.concat([df["параметры"].apply(pd.Series), df["метрики dev meld"].apply(pd.Series), df["метрики dev resd"].apply(pd.Series), df["метрики test meld"].apply(pd.Series), df["метрики test resd"].apply(pd.Series), df["путь"]], axis=1)
df.columns = ["lr", "batch_size", "optimizer", "uar_dev_meld", "war_dev_meld", "mf1_dev_meld", "wf1_dev_meld", "uar_dev_resd", "war_dev_resd", "mf1_dev_resd", "wf1_dev_resd", "uar_test_meld", "war_test_meld", "mf1_test_meld", "wf1_test_meld", "uar_test_resd", "war_test_resd", "mf1_test_resd", "wf1_test_resd", "путь"]
df.to_csv(os.path.join(PATH_TO_MODEL, "result_SGD_32_lr.csv"))

2) BATCH_SIZE=16

In [11]:
BATCH_SIZE = 16
train_dataloader = DataLoader(dataset=Dataset_MELD_RESD('train'), batch_size=BATCH_SIZE, shuffle=True)
dev_meld_dataloader = DataLoader(dataset=Dataset_MELD_RESD('dev_meld'), batch_size=BATCH_SIZE, shuffle=False)
dev_resd_dataloader = DataLoader(dataset=Dataset_MELD_RESD('dev_resd'), batch_size=BATCH_SIZE, shuffle=False)
test_meld_dataloader = DataLoader(dataset=Dataset_MELD_RESD('test_meld'), batch_size=BATCH_SIZE, shuffle=False)
test_resd_dataloader = DataLoader(dataset=Dataset_MELD_RESD('test_resd'), batch_size=BATCH_SIZE, shuffle=False)

In [15]:
%%capture --no-stdout
optimizer_ = "Adam"
result = []
for lr in [1e-3, 1e-4, 1e-5]:
    print(f"lr={lr}, batch_size={BATCH_SIZE}, optimizer={optimizer_}")
    model_lstm = LSTMClassifier(num_classes = 7, model_name='jina', pooling=None, hidden_size=512, num_layers=2, bidirectional=True).to(device)
    optimizer = optim.Adam(params = model_lstm.parameters(), lr = LEARNING_RATE)
    loss_fn = nn.CrossEntropyLoss(weight=class_weights)
    trainer = ModelTrainer(model_lstm, train_dataloader, dev_meld_dataloader, dev_resd_dataloader, test_meld_dataloader, test_resd_dataloader, device, EPOCHS, ROUND_LOSS, ROUND_ACC, optimizer, loss_fn)
    trainer.train(PATH_TO_MODEL)
    checkpoint = torch.load(os.path.join(PATH_TO_MODEL, trainer._best_model_name))
    model_lstm.load_state_dict(checkpoint['model_state_dict'])
    metrics_dev_meld = evaluate_metrics(model_lstm, dev_meld_dataloader)
    metrics_dev_resd = evaluate_metrics(model_lstm, dev_resd_dataloader)
    print("Метрики на валидационной выборке MELD: ", metrics_dev_meld)
    print("Метрики на валидационной выборке RESD: ", metrics_dev_resd)
    metrics_test_meld = evaluate_metrics(model_lstm, test_meld_dataloader)
    metrics_test_resd = evaluate_metrics(model_lstm, test_resd_dataloader)
    print("Метрики на тестовой выборке MELD: ", metrics_test_meld)
    print("Метрики на тестовой выборке RESD: ", metrics_test_resd)
    result.append([{"lr" : lr, "batch_size": BATCH_SIZE, "optimizer" : optimizer_}, metrics_dev_meld, metrics_dev_resd, metrics_test_meld, metrics_test_resd, trainer._best_model_name])

lr=0.001, batch_size=16, optimizer=Adam
Ранняя остановка на эпохе 31 из-за отсутствия улучшения точности на тестовой выборке
Метрики на валидационной выборке MELD:  {'uar': 37.9082321062941, 'war': 53.471596032461676, 'mf1': 38.06577809432722, 'wf1': 52.24745150047174}
Метрики на валидационной выборке RESD:  {'uar': 37.02630327493316, 'war': 37.61194029850746, 'mf1': 36.82303570258752, 'wf1': 37.664416751335104}
Метрики на тестовой выборке MELD:  {'uar': 36.10952335704359, 'war': 54.137931034482754, 'mf1': 35.52870229824608, 'wf1': 54.16875074676529}
Метрики на тестовой выборке RESD:  {'uar': 36.9586884389516, 'war': 37.142857142857146, 'mf1': 36.49902119975916, 'wf1': 36.756111807808416}
lr=0.0001, batch_size=16, optimizer=Adam
Ранняя остановка на эпохе 24 из-за отсутствия улучшения точности на тестовой выборке
Метрики на валидационной выборке MELD:  {'uar': 37.80257294142749, 'war': 53.832281334535615, 'mf1': 38.253048949289656, 'wf1': 52.43295661929831}
Метрики на валидационной выбо

In [16]:
df = pd.DataFrame(result, columns=["параметры", "метрики dev meld", "метрики dev resd", "метрики test meld", "метрики test resd", "путь"])
df = pd.concat([df["параметры"].apply(pd.Series), df["метрики dev meld"].apply(pd.Series), df["метрики dev resd"].apply(pd.Series), df["метрики test meld"].apply(pd.Series), df["метрики test resd"].apply(pd.Series), df["путь"]], axis=1)
df.columns = ["lr", "batch_size", "optimizer", "uar_dev_meld", "war_dev_meld", "mf1_dev_meld", "wf1_dev_meld", "uar_dev_resd", "war_dev_resd", "mf1_dev_resd", "wf1_dev_resd", "uar_test_meld", "war_test_meld", "mf1_test_meld", "wf1_test_meld", "uar_test_resd", "war_test_resd", "mf1_test_resd", "wf1_test_resd", "путь"]
df.to_csv(os.path.join(PATH_TO_MODEL, "result_Adam_16_lr.csv"))

In [15]:
%%capture --no-stdout
optimizer_ = "AdamW"
result = []
for lr in [1e-3, 1e-4, 1e-5]:
    print(f"lr={lr}, batch_size={BATCH_SIZE}, optimizer={optimizer_}")
    model_lstm = LSTMClassifier(num_classes = 7, model_name='jina', pooling=None, hidden_size=512, num_layers=2, bidirectional=True).to(device)
    optimizer = optim.AdamW(params = model_lstm.parameters(), lr = LEARNING_RATE)
    loss_fn = nn.CrossEntropyLoss(weight=class_weights)
    trainer = ModelTrainer(model_lstm, train_dataloader, dev_meld_dataloader, dev_resd_dataloader, test_meld_dataloader, test_resd_dataloader, device, EPOCHS, ROUND_LOSS, ROUND_ACC, optimizer, loss_fn)
    trainer.train(PATH_TO_MODEL)
    checkpoint = torch.load(os.path.join(PATH_TO_MODEL, trainer._best_model_name))
    model_lstm.load_state_dict(checkpoint['model_state_dict'])
    metrics_dev_meld = evaluate_metrics(model_lstm, dev_meld_dataloader)
    metrics_dev_resd = evaluate_metrics(model_lstm, dev_resd_dataloader)
    print("Метрики на валидационной выборке MELD: ", metrics_dev_meld)
    print("Метрики на валидационной выборке RESD: ", metrics_dev_resd)
    metrics_test_meld = evaluate_metrics(model_lstm, test_meld_dataloader)
    metrics_test_resd = evaluate_metrics(model_lstm, test_resd_dataloader)
    print("Метрики на тестовой выборке MELD: ", metrics_test_meld)
    print("Метрики на тестовой выборке RESD: ", metrics_test_resd)
    result.append([{"lr" : lr, "batch_size": BATCH_SIZE, "optimizer" : optimizer_}, metrics_dev_meld, metrics_dev_resd, metrics_test_meld, metrics_test_resd, trainer._best_model_name])

lr=0.001, batch_size=16, optimizer=AdamW
Ранняя остановка на эпохе 24 из-за отсутствия улучшения точности на тестовой выборке
Метрики на валидационной выборке MELD:  {'uar': 41.22554824800346, 'war': 54.283137962128045, 'mf1': 40.52411227113837, 'wf1': 54.25882607847431}
Метрики на валидационной выборке RESD:  {'uar': 34.65050561287317, 'war': 35.223880597014926, 'mf1': 34.17343120590235, 'wf1': 34.76978141695343}
Метрики на тестовой выборке MELD:  {'uar': 37.887771400357984, 'war': 54.59770114942529, 'mf1': 37.27032634759639, 'wf1': 55.36358459181183}
Метрики на тестовой выборке RESD:  {'uar': 33.778988318462, 'war': 33.57142857142857, 'mf1': 33.98193760262726, 'wf1': 33.86912664752422}
lr=0.0001, batch_size=16, optimizer=AdamW
Ранняя остановка на эпохе 30 из-за отсутствия улучшения точности на тестовой выборке
Метрики на валидационной выборке MELD:  {'uar': 36.83705414912752, 'war': 53.11091073038774, 'mf1': 37.907146440686944, 'wf1': 51.60100403246638}
Метрики на валидационной выбор

In [16]:
df = pd.DataFrame(result, columns=["параметры", "метрики dev meld", "метрики dev resd", "метрики test meld", "метрики test resd", "путь"])
df = pd.concat([df["параметры"].apply(pd.Series), df["метрики dev meld"].apply(pd.Series), df["метрики dev resd"].apply(pd.Series), df["метрики test meld"].apply(pd.Series), df["метрики test resd"].apply(pd.Series), df["путь"]], axis=1)
df.columns = ["lr", "batch_size", "optimizer", "uar_dev_meld", "war_dev_meld", "mf1_dev_meld", "wf1_dev_meld", "uar_dev_resd", "war_dev_resd", "mf1_dev_resd", "wf1_dev_resd", "uar_test_meld", "war_test_meld", "mf1_test_meld", "wf1_test_meld", "uar_test_resd", "war_test_resd", "mf1_test_resd", "wf1_test_resd", "путь"]
df.to_csv(os.path.join(PATH_TO_MODEL, "result_AdamW_16_lr.csv"))

In [13]:
%%capture --no-stdout
optimizer_ = "SGD"
result = []
for lr in [1e-3,1e-4, 1e-5]:
    print(f"lr={lr}, batch_size={BATCH_SIZE}, optimizer={optimizer_}")
    model_lstm = LSTMClassifier(num_classes = 7, model_name='jina', pooling=None, hidden_size=512, num_layers=2, bidirectional=True).to(device)
    optimizer = optim.SGD(params = model_lstm.parameters(), lr = LEARNING_RATE)
    loss_fn = nn.CrossEntropyLoss(weight=class_weights)
    trainer = ModelTrainer(model_lstm, train_dataloader, dev_meld_dataloader, dev_resd_dataloader, test_meld_dataloader, test_resd_dataloader, device, EPOCHS, ROUND_LOSS, ROUND_ACC, optimizer, loss_fn)
    trainer.train(PATH_TO_MODEL)
    checkpoint = torch.load(os.path.join(PATH_TO_MODEL, trainer._best_model_name))
    model_lstm.load_state_dict(checkpoint['model_state_dict'])
    metrics_dev_meld = evaluate_metrics(model_lstm, dev_meld_dataloader)
    metrics_dev_resd = evaluate_metrics(model_lstm, dev_resd_dataloader)
    print("Метрики на валидационной выборке MELD: ", metrics_dev_meld)
    print("Метрики на валидационной выборке RESD: ", metrics_dev_resd)
    metrics_test_meld = evaluate_metrics(model_lstm, test_meld_dataloader)
    metrics_test_resd = evaluate_metrics(model_lstm, test_resd_dataloader)
    print("Метрики на тестовой выборке MELD: ", metrics_test_meld)
    print("Метрики на тестовой выборке RESD: ", metrics_test_resd)
    result.append([{"lr" : lr, "batch_size": BATCH_SIZE, "optimizer" : optimizer_}, metrics_dev_meld, metrics_dev_resd, metrics_test_meld, metrics_test_resd, trainer._best_model_name])

lr=0.001, batch_size=16, optimizer=SGD
Метрики на валидационной выборке MELD:  {'uar': 24.566318921594306, 'war': 48.33183047790803, 'mf1': 22.469815017036293, 'wf1': 39.98114159002804}
Метрики на валидационной выборке RESD:  {'uar': 16.793491309567308, 'war': 17.01492537313433, 'mf1': 11.84672919916415, 'wf1': 12.313820248364394}
Метрики на тестовой выборке MELD:  {'uar': 23.891128630791254, 'war': 51.30268199233716, 'mf1': 22.10112001592992, 'wf1': 44.03697767771638}
Метрики на тестовой выборке RESD:  {'uar': 16.262816131237184, 'war': 16.428571428571427, 'mf1': 10.80001207505962, 'wf1': 11.269493018121777}
lr=0.0001, batch_size=16, optimizer=SGD
Метрики на валидационной выборке MELD:  {'uar': 27.28876472887775, 'war': 48.78268710550045, 'mf1': 25.36991551439336, 'wf1': 42.96383501523809}
Метрики на валидационной выборке RESD:  {'uar': 15.305061154896741, 'war': 15.522388059701491, 'mf1': 10.382286247832464, 'wf1': 10.91103585712892}
Метрики на тестовой выборке MELD:  {'uar': 27.4928

In [14]:
df = pd.DataFrame(result, columns=["параметры", "метрики dev meld", "метрики dev resd", "метрики test meld", "метрики test resd", "путь"])
df = pd.concat([df["параметры"].apply(pd.Series), df["метрики dev meld"].apply(pd.Series), df["метрики dev resd"].apply(pd.Series), df["метрики test meld"].apply(pd.Series), df["метрики test resd"].apply(pd.Series), df["путь"]], axis=1)
df.columns = ["lr", "batch_size", "optimizer", "uar_dev_meld", "war_dev_meld", "mf1_dev_meld", "wf1_dev_meld", "uar_dev_resd", "war_dev_resd", "mf1_dev_resd", "wf1_dev_resd", "uar_test_meld", "war_test_meld", "mf1_test_meld", "wf1_test_meld", "uar_test_resd", "war_test_resd", "mf1_test_resd", "wf1_test_resd", "путь"]
df.to_csv(os.path.join(PATH_TO_MODEL, "result_SGD_16_lr.csv"))

In [13]:
columns = ["lr", "batch_size", "optimizer", "uar_dev_meld", "war_dev_meld", "mf1_dev_meld", "wf1_dev_meld", "uar_dev_resd", "war_dev_resd", "mf1_dev_resd", "wf1_dev_resd", "uar_test_meld", "war_test_meld", "mf1_test_meld", "wf1_test_meld", "uar_test_resd", "war_test_resd", "mf1_test_resd", "wf1_test_resd", "путь"]
df = pd.concat([pd.read_csv(os.path.join(PATH_TO_MODEL, "result_Adam_16_lr.csv"), index_col=0), pd.read_csv(os.path.join(PATH_TO_MODEL, "result_AdamW_16_lr.csv"), index_col=0), pd.read_csv(os.path.join(PATH_TO_MODEL, "result_SGD_16_lr.csv"), index_col=0)])
df.columns=columns

In [14]:
df['average_dev_meld'] = (df['uar_dev_meld'] + df['war_dev_meld'] + df['mf1_dev_meld'] + df['wf1_dev_meld']) / 4.0
df['average_dev_resd'] = (df['uar_dev_resd'] + df['war_dev_resd'] + df['mf1_dev_resd'] + df['wf1_dev_resd']) / 4.0
df['average_test_meld'] = (df['uar_test_meld'] + df['war_test_meld'] + df['mf1_test_meld'] + df['wf1_test_meld']) / 4.0
df['average_test_resd'] = (df['uar_test_resd'] + df['war_test_resd'] + df['mf1_test_resd'] + df['wf1_test_resd']) / 4.0
df['avg_test'] = 0.5 * (df['average_test_meld'] + df['average_test_resd'])

3) BATCH_SIZE=64

In [18]:
BATCH_SIZE = 64
train_dataloader = DataLoader(dataset=Dataset_MELD_RESD('train'), batch_size=BATCH_SIZE, shuffle=True)
dev_meld_dataloader = DataLoader(dataset=Dataset_MELD_RESD('dev_meld'), batch_size=BATCH_SIZE, shuffle=False)
dev_resd_dataloader = DataLoader(dataset=Dataset_MELD_RESD('dev_resd'), batch_size=BATCH_SIZE, shuffle=False)
test_meld_dataloader = DataLoader(dataset=Dataset_MELD_RESD('test_meld'), batch_size=BATCH_SIZE, shuffle=False)
test_resd_dataloader = DataLoader(dataset=Dataset_MELD_RESD('test_resd'), batch_size=BATCH_SIZE, shuffle=False)

In [19]:
%%capture --no-stdout
optimizer_ = "Adam"
result = []
for lr in [1e-3, 1e-4, 1e-5]:
    print(f"lr={lr}, batch_size={BATCH_SIZE}, optimizer={optimizer_}")
    model_lstm = LSTMClassifier(num_classes = 7, model_name='jina', pooling=None, hidden_size=512, num_layers=2, bidirectional=True).to(device)
    optimizer = optim.Adam(params = model_lstm.parameters(), lr = LEARNING_RATE)
    loss_fn = nn.CrossEntropyLoss(weight=class_weights)
    trainer = ModelTrainer(model_lstm, train_dataloader, dev_meld_dataloader, dev_resd_dataloader, test_meld_dataloader, test_resd_dataloader, device, EPOCHS, ROUND_LOSS, ROUND_ACC, optimizer, loss_fn)
    trainer.train(PATH_TO_MODEL)
    checkpoint = torch.load(os.path.join(PATH_TO_MODEL, trainer._best_model_name))
    model_lstm.load_state_dict(checkpoint['model_state_dict'])
    metrics_dev_meld = evaluate_metrics(model_lstm, dev_meld_dataloader)
    metrics_dev_resd = evaluate_metrics(model_lstm, dev_resd_dataloader)
    print("Метрики на валидационной выборке MELD: ", metrics_dev_meld)
    print("Метрики на валидационной выборке RESD: ", metrics_dev_resd)
    metrics_test_meld = evaluate_metrics(model_lstm, test_meld_dataloader)
    metrics_test_resd = evaluate_metrics(model_lstm, test_resd_dataloader)
    print("Метрики на тестовой выборке MELD: ", metrics_test_meld)
    print("Метрики на тестовой выборке RESD: ", metrics_test_resd)
    result.append([{"lr" : lr, "batch_size": BATCH_SIZE, "optimizer" : optimizer_}, metrics_dev_meld, metrics_dev_resd, metrics_test_meld, metrics_test_resd, trainer._best_model_name])

lr=0.001, batch_size=64, optimizer=Adam
Ранняя остановка на эпохе 18 из-за отсутствия улучшения точности на тестовой выборке
Метрики на валидационной выборке MELD:  {'uar': 39.97693353343437, 'war': 49.77457168620379, 'mf1': 38.35349423327269, 'wf1': 49.44473013662986}
Метрики на валидационной выборке RESD:  {'uar': 32.8969578169432, 'war': 33.134328358208954, 'mf1': 32.70129734558985, 'wf1': 33.132414218710906}
Метрики на тестовой выборке MELD:  {'uar': 38.12396342310343, 'war': 51.685823754789276, 'mf1': 35.61281028321674, 'wf1': 52.75852233465097}
Метрики на тестовой выборке RESD:  {'uar': 33.280203477571895, 'war': 33.92857142857143, 'mf1': 32.801873927000685, 'wf1': 33.49787685624718}
lr=0.0001, batch_size=64, optimizer=Adam
Ранняя остановка на эпохе 20 из-за отсутствия улучшения точности на тестовой выборке
Метрики на валидационной выборке MELD:  {'uar': 41.68254342227561, 'war': 54.01262398557258, 'mf1': 40.310043585395235, 'wf1': 53.405804823130886}
Метрики на валидационной выб

In [20]:
df = pd.DataFrame(result, columns=["параметры", "метрики dev meld", "метрики dev resd", "метрики test meld", "метрики test resd", "путь"])
df = pd.concat([df["параметры"].apply(pd.Series), df["метрики dev meld"].apply(pd.Series), df["метрики dev resd"].apply(pd.Series), df["метрики test meld"].apply(pd.Series), df["метрики test resd"].apply(pd.Series), df["путь"]], axis=1)
df.columns = ["lr", "batch_size", "optimizer", "uar_dev_meld", "war_dev_meld", "mf1_dev_meld", "wf1_dev_meld", "uar_dev_resd", "war_dev_resd", "mf1_dev_resd", "wf1_dev_resd", "uar_test_meld", "war_test_meld", "mf1_test_meld", "wf1_test_meld", "uar_test_resd", "war_test_resd", "mf1_test_resd", "wf1_test_resd", "путь"]
df.to_csv(os.path.join(PATH_TO_MODEL, "result_Adam_64_lr.csv"))

In [21]:
%%capture --no-stdout
optimizer_ = "AdamW"
result = []
for lr in [1e-3, 1e-4, 1e-5]:
    print(f"lr={lr}, batch_size={BATCH_SIZE}, optimizer={optimizer_}")
    model_lstm = LSTMClassifier(num_classes = 7, model_name='jina', pooling=None, hidden_size=512, num_layers=2, bidirectional=True).to(device)
    optimizer = optim.AdamW(params = model_lstm.parameters(), lr = LEARNING_RATE)
    loss_fn = nn.CrossEntropyLoss(weight=class_weights)
    trainer = ModelTrainer(model_lstm, train_dataloader, dev_meld_dataloader, dev_resd_dataloader, test_meld_dataloader, test_resd_dataloader, device, EPOCHS, ROUND_LOSS, ROUND_ACC, optimizer, loss_fn)
    trainer.train(PATH_TO_MODEL)
    checkpoint = torch.load(os.path.join(PATH_TO_MODEL, trainer._best_model_name))
    model_lstm.load_state_dict(checkpoint['model_state_dict'])
    metrics_dev_meld = evaluate_metrics(model_lstm, dev_meld_dataloader)
    metrics_dev_resd = evaluate_metrics(model_lstm, dev_resd_dataloader)
    print("Метрики на валидационной выборке MELD: ", metrics_dev_meld)
    print("Метрики на валидационной выборке RESD: ", metrics_dev_resd)
    metrics_test_meld = evaluate_metrics(model_lstm, test_meld_dataloader)
    metrics_test_resd = evaluate_metrics(model_lstm, test_resd_dataloader)
    print("Метрики на тестовой выборке MELD: ", metrics_test_meld)
    print("Метрики на тестовой выборке RESD: ", metrics_test_resd)
    result.append([{"lr" : lr, "batch_size": BATCH_SIZE, "optimizer" : optimizer_}, metrics_dev_meld, metrics_dev_resd, metrics_test_meld, metrics_test_resd, trainer._best_model_name])

lr=0.001, batch_size=64, optimizer=AdamW
Ранняя остановка на эпохе 27 из-за отсутствия улучшения точности на тестовой выборке
Метрики на валидационной выборке MELD:  {'uar': 40.12974818186855, 'war': 53.832281334535615, 'mf1': 40.02500110055115, 'wf1': 52.874713672189536}
Метрики на валидационной выборке RESD:  {'uar': 31.588922525531952, 'war': 32.23880597014925, 'mf1': 31.0371042671595, 'wf1': 31.831163945810477}
Метрики на тестовой выборке MELD:  {'uar': 37.56208451276178, 'war': 55.3639846743295, 'mf1': 37.18877488400657, 'wf1': 55.26102700688984}
Метрики на тестовой выборке RESD:  {'uar': 33.325067634278156, 'war': 33.57142857142857, 'mf1': 33.48948401622059, 'wf1': 33.932631203718564}
lr=0.0001, batch_size=64, optimizer=AdamW
Ранняя остановка на эпохе 23 из-за отсутствия улучшения точности на тестовой выборке
Метрики на валидационной выборке MELD:  {'uar': 38.61126629944631, 'war': 52.750225428313804, 'mf1': 38.88265219318832, 'wf1': 51.73976862268312}
Метрики на валидационной вы

In [22]:
df = pd.DataFrame(result, columns=["параметры", "метрики dev meld", "метрики dev resd", "метрики test meld", "метрики test resd", "путь"])
df = pd.concat([df["параметры"].apply(pd.Series), df["метрики dev meld"].apply(pd.Series), df["метрики dev resd"].apply(pd.Series), df["метрики test meld"].apply(pd.Series), df["метрики test resd"].apply(pd.Series), df["путь"]], axis=1)
df.columns = ["lr", "batch_size", "optimizer", "uar_dev_meld", "war_dev_meld", "mf1_dev_meld", "wf1_dev_meld", "uar_dev_resd", "war_dev_resd", "mf1_dev_resd", "wf1_dev_resd", "uar_test_meld", "war_test_meld", "mf1_test_meld", "wf1_test_meld", "uar_test_resd", "war_test_resd", "mf1_test_resd", "wf1_test_resd", "путь"]
df.to_csv(os.path.join(PATH_TO_MODEL, "result_AdamW_64_lr.csv"))

In [29]:
columns = ["lr", "batch_size", "optimizer", "uar_dev_meld", "war_dev_meld", "mf1_dev_meld", "wf1_dev_meld", "uar_dev_resd", "war_dev_resd", "mf1_dev_resd", "wf1_dev_resd", "uar_test_meld", "war_test_meld", "mf1_test_meld", "wf1_test_meld", "uar_test_resd", "war_test_resd", "mf1_test_resd", "wf1_test_resd", "путь"]
df = pd.concat([pd.read_csv(os.path.join(PATH_TO_MODEL, "result_Adam_32_lr.csv"), index_col=0), 
                pd.read_csv(os.path.join(PATH_TO_MODEL, "result_AdamW_32_lr.csv"), index_col=0), 
                pd.read_csv(os.path.join(PATH_TO_MODEL, "result_SGD_32_lr.csv"), index_col=0),
                pd.read_csv(os.path.join(PATH_TO_MODEL, "result_Adam_16_lr.csv"), index_col=0), 
                pd.read_csv(os.path.join(PATH_TO_MODEL, "result_AdamW_16_lr.csv"), index_col=0), 
                pd.read_csv(os.path.join(PATH_TO_MODEL, "result_SGD_16_lr.csv"), index_col=0),
                pd.read_csv(os.path.join(PATH_TO_MODEL, "result_Adam_64_lr.csv"), index_col=0), 
                pd.read_csv(os.path.join(PATH_TO_MODEL, "result_AdamW_64_lr.csv"), index_col=0)])
df.columns=columns

In [19]:
columns = ["lr", "batch_size", "optimizer", "uar_dev_meld", "war_dev_meld", "mf1_dev_meld", "wf1_dev_meld", "uar_dev_resd", "war_dev_resd", "mf1_dev_resd", "wf1_dev_resd", "uar_test_meld", "war_test_meld", "mf1_test_meld", "wf1_test_meld", "uar_test_resd", "war_test_resd", "mf1_test_resd", "wf1_test_resd", "путь"]
df = pd.concat([pd.read_csv(os.path.join(PATH_TO_MODEL, "result_Adam_32_lr_.csv"), index_col=0), 
                pd.read_csv(os.path.join(PATH_TO_MODEL, "result_AdamW_32_lr.csv"), index_col=0), 
                pd.read_csv(os.path.join(PATH_TO_MODEL, "result_SGD_32_lr.csv"), index_col=0),
                pd.read_csv(os.path.join(PATH_TO_MODEL, "result_Adam_16_lr.csv"), index_col=0), 
                pd.read_csv(os.path.join(PATH_TO_MODEL, "result_AdamW_16_lr.csv"), index_col=0), 
                pd.read_csv(os.path.join(PATH_TO_MODEL, "result_SGD_16_lr.csv"), index_col=0),
                pd.read_csv(os.path.join(PATH_TO_MODEL, "result_Adam_64_lr.csv"), index_col=0), 
                pd.read_csv(os.path.join(PATH_TO_MODEL, "result_AdamW_64_lr.csv"), index_col=0)])
df.columns=columns

In [20]:
df['average_dev_meld'] = (df['uar_dev_meld'] + df['war_dev_meld'] + df['mf1_dev_meld'] + df['wf1_dev_meld']) / 4.0
df['average_dev_resd'] = (df['uar_dev_resd'] + df['war_dev_resd'] + df['mf1_dev_resd'] + df['wf1_dev_resd']) / 4.0
df['average_test_meld'] = (df['uar_test_meld'] + df['war_test_meld'] + df['mf1_test_meld'] + df['wf1_test_meld']) / 4.0
df['average_test_resd'] = (df['uar_test_resd'] + df['war_test_resd'] + df['mf1_test_resd'] + df['wf1_test_resd']) / 4.0
df['avg_test'] = 0.5 * (df['average_test_meld'] + df['average_test_resd'])

In [21]:
df.sort_values('avg_test' , ascending=False)

Unnamed: 0,lr,batch_size,optimizer,uar_dev_meld,war_dev_meld,mf1_dev_meld,wf1_dev_meld,uar_dev_resd,war_dev_resd,mf1_dev_resd,wf1_dev_resd,uar_test_meld,war_test_meld,mf1_test_meld,wf1_test_meld,uar_test_resd,war_test_resd,mf1_test_resd,wf1_test_resd,путь,average_dev_meld,average_dev_resd,average_test_meld,average_test_resd,avg_test
5,1e-05,32,AdamW,38.729496,53.201082,39.079711,52.827692,32.958375,34.029851,32.314244,33.119662,39.054925,56.666667,38.835626,57.067883,34.094218,35.0,34.036393,34.80689,LSTMClassifier_jina_12_39.45_checkpoint.pth,45.959495,33.105533,47.906275,34.484375,41.195325
1,0.0001,16,Adam,37.802573,53.832281,38.253049,52.432957,35.457073,35.820896,34.933152,35.283559,37.695731,57.356322,38.176643,56.678671,34.486781,35.357143,34.272154,34.82957,LSTMClassifier_jina_14_39.46_checkpoint.pth,45.580215,35.37367,47.476842,34.736412,41.106627
0,0.0001,32,Adam,36.914882,52.119026,37.339609,51.54657,34.982744,35.522388,34.838927,35.41086,38.453355,56.666667,38.438996,57.113097,34.071528,34.642857,33.920942,34.448325,LSTMClassifier_jina_14_39.44_checkpoint.pth,44.480022,35.18873,47.668029,34.270913,40.969471
0,0.001,16,Adam,37.908232,53.471596,38.065778,52.247452,37.026303,37.61194,36.823036,37.664417,36.109523,54.137931,35.528702,54.168751,36.958688,37.142857,36.499021,36.756112,LSTMClassifier_jina_21_40.09_checkpoint.pth,45.423264,37.281424,44.986227,36.83917,40.912698
1,0.0001,16,AdamW,36.837054,53.110911,37.907146,51.601004,33.10637,33.731343,32.428677,33.037918,38.275822,57.318008,38.632614,56.924175,34.095452,35.0,33.155147,33.672629,LSTMClassifier_jina_20_38.29_checkpoint.pth,44.864029,33.076077,47.787655,33.980807,40.884231
1,0.001,32,Adam,39.039104,50.586114,37.372601,50.64338,33.576457,33.731343,33.445619,33.942035,39.316384,54.40613,38.178395,55.343301,33.957275,34.285714,34.072937,34.521961,LSTMClassifier_jina_17_39.06_checkpoint.pth,44.4103,33.673863,46.811053,34.209472,40.510262
1,0.0001,64,AdamW,38.611266,52.750225,38.882652,51.739769,32.740326,33.432836,32.502848,33.093148,37.174405,55.057471,36.350077,55.206466,34.714948,35.0,35.105991,35.265614,LSTMClassifier_jina_13_40.64_checkpoint.pth,45.495978,32.942289,45.947105,35.021638,40.484372
0,0.001,16,AdamW,41.225548,54.283138,40.524112,54.258826,34.650506,35.223881,34.173431,34.769781,37.887771,54.597701,37.270326,55.363585,33.778988,33.571429,33.981938,33.869127,LSTMClassifier_jina_14_39.25_checkpoint.pth,47.572906,34.7044,46.279846,33.80037,40.040108
4,0.0001,32,AdamW,36.83144,52.38954,37.279228,51.447405,34.313625,34.626866,34.010307,34.490858,38.782937,57.471264,38.904485,56.794498,31.63451,32.5,31.559216,32.152163,LSTMClassifier_jina_12_39.32_checkpoint.pth,44.486903,34.360414,47.988296,31.961472,39.974884
0,0.001,64,AdamW,40.129748,53.832281,40.025001,52.874714,31.588923,32.238806,31.037104,31.831164,37.562085,55.363985,37.188775,55.261027,33.325068,33.571429,33.489484,33.932631,LSTMClassifier_jina_17_41.3_checkpoint.pth,46.715436,31.673999,46.343968,33.579653,39.96181


In [32]:
df.sort_values('avg_test' , ascending=False)

Unnamed: 0,lr,batch_size,optimizer,uar_dev_meld,war_dev_meld,mf1_dev_meld,wf1_dev_meld,uar_dev_resd,war_dev_resd,mf1_dev_resd,...,uar_test_resd,war_test_resd,mf1_test_resd,wf1_test_resd,путь,average_dev_meld,average_dev_resd,average_test_meld,average_test_resd,avg_test
2,1e-05,32,Adam,41.789292,53.651939,41.541501,53.521394,32.500326,33.134328,31.875439,...,35.227686,36.428571,34.913584,35.919871,LSTMClassifier_jina_9_39.74_checkpoint.pth,47.626031,32.528032,48.298286,35.622428,41.960357
5,1e-05,32,AdamW,38.729496,53.201082,39.079711,52.827692,32.958375,34.029851,32.314244,...,34.094218,35.0,34.036393,34.80689,LSTMClassifier_jina_12_39.45_checkpoint.pth,45.959495,33.105533,47.906275,34.484375,41.195325
1,0.0001,16,Adam,37.802573,53.832281,38.253049,52.432957,35.457073,35.820896,34.933152,...,34.486781,35.357143,34.272154,34.82957,LSTMClassifier_jina_14_39.46_checkpoint.pth,45.580215,35.37367,47.476842,34.736412,41.106627
0,0.0001,32,Adam,36.914882,52.119026,37.339609,51.54657,34.982744,35.522388,34.838927,...,34.071528,34.642857,33.920942,34.448325,LSTMClassifier_jina_14_39.44_checkpoint.pth,44.480022,35.18873,47.668029,34.270913,40.969471
0,0.001,16,Adam,37.908232,53.471596,38.065778,52.247452,37.026303,37.61194,36.823036,...,36.958688,37.142857,36.499021,36.756112,LSTMClassifier_jina_21_40.09_checkpoint.pth,45.423264,37.281424,44.986227,36.83917,40.912698
1,0.0001,16,AdamW,36.837054,53.110911,37.907146,51.601004,33.10637,33.731343,32.428677,...,34.095452,35.0,33.155147,33.672629,LSTMClassifier_jina_20_38.29_checkpoint.pth,44.864029,33.076077,47.787655,33.980807,40.884231
1,0.0001,64,AdamW,38.611266,52.750225,38.882652,51.739769,32.740326,33.432836,32.502848,...,34.714948,35.0,35.105991,35.265614,LSTMClassifier_jina_13_40.64_checkpoint.pth,45.495978,32.942289,45.947105,35.021638,40.484372
0,0.001,16,AdamW,41.225548,54.283138,40.524112,54.258826,34.650506,35.223881,34.173431,...,33.778988,33.571429,33.981938,33.869127,LSTMClassifier_jina_14_39.25_checkpoint.pth,47.572906,34.7044,46.279846,33.80037,40.040108
4,0.0001,32,AdamW,36.83144,52.38954,37.279228,51.447405,34.313625,34.626866,34.010307,...,31.63451,32.5,31.559216,32.152163,LSTMClassifier_jina_12_39.32_checkpoint.pth,44.486903,34.360414,47.988296,31.961472,39.974884
0,0.001,64,AdamW,40.129748,53.832281,40.025001,52.874714,31.588923,32.238806,31.037104,...,33.325068,33.571429,33.489484,33.932631,LSTMClassifier_jina_17_41.3_checkpoint.pth,46.715436,31.673999,46.343968,33.579653,39.96181


In [18]:
BATCH_SIZE = 32
train_dataloader = DataLoader(dataset=Dataset_MELD_RESD('train'), batch_size=BATCH_SIZE, shuffle=True)
dev_meld_dataloader = DataLoader(dataset=Dataset_MELD_RESD('dev_meld'), batch_size=BATCH_SIZE, shuffle=False)
dev_resd_dataloader = DataLoader(dataset=Dataset_MELD_RESD('dev_resd'), batch_size=BATCH_SIZE, shuffle=False)
test_meld_dataloader = DataLoader(dataset=Dataset_MELD_RESD('test_meld'), batch_size=BATCH_SIZE, shuffle=False)
test_resd_dataloader = DataLoader(dataset=Dataset_MELD_RESD('test_resd'), batch_size=BATCH_SIZE, shuffle=False)

In [13]:
%%capture --no-stdout
result = []
for dropout in [0, 0.2]:
    print(f"dropout = {dropout}")
    model_lstm = LSTMClassifier(num_classes = 7, model_name='jina', pooling=None, hidden_size=512, num_layers=2, dropout=dropout, bidirectional=True).to(device)
    optimizer = optim.Adam(params = model_lstm.parameters(), lr = LEARNING_RATE)
    loss_fn = nn.CrossEntropyLoss(weight=class_weights)
    trainer = ModelTrainer(model_lstm, train_dataloader, dev_meld_dataloader, dev_resd_dataloader, test_meld_dataloader, test_resd_dataloader, device, EPOCHS, ROUND_LOSS, ROUND_ACC, optimizer, loss_fn)
    trainer.train(PATH_TO_MODEL)
    checkpoint = torch.load(os.path.join(PATH_TO_MODEL, trainer._best_model_name))
    model_lstm.load_state_dict(checkpoint['model_state_dict'])
    metrics_dev_meld = evaluate_metrics(model_lstm, dev_meld_dataloader)
    metrics_dev_resd = evaluate_metrics(model_lstm, dev_resd_dataloader)
    print("Метрики на валидационной выборке MELD: ", metrics_dev_meld)
    print("Метрики на валидационной выборке RESD: ", metrics_dev_resd)
    metrics_test_meld = evaluate_metrics(model_lstm, test_meld_dataloader)
    metrics_test_resd = evaluate_metrics(model_lstm, test_resd_dataloader)
    print("Метрики на тестовой выборке MELD: ", metrics_test_meld)
    print("Метрики на тестовой выборке RESD: ", metrics_test_resd)
    result.append([{"dropout" : dropout}, metrics_dev_meld, metrics_dev_resd, metrics_test_meld, metrics_test_resd, trainer._best_model_name])

dropout = 0
Ранняя остановка на эпохе 28 из-за отсутствия улучшения точности на тестовой выборке
Ранняя остановка на эпохе 27 из-за отсутствия улучшения точности на тестовой выборке
Метрики на валидационной выборке MELD:  {'uar': 37.912184115985646, 'war': 53.38142470694319, 'mf1': 36.896080137723224, 'wf1': 51.78705739990214}
Метрики на валидационной выборке RESD:  {'uar': 33.45949007622373, 'war': 33.43283582089553, 'mf1': 33.171520336720874, 'wf1': 33.50039726070359}
Метрики на тестовой выборке MELD:  {'uar': 37.860450008683294, 'war': 56.28352490421455, 'mf1': 37.6742115578044, 'wf1': 55.95640912277109}
Метрики на тестовой выборке RESD:  {'uar': 32.37932951748741, 'war': 32.857142857142854, 'mf1': 32.658371639191266, 'wf1': 33.31895428751104}


In [14]:
df = pd.DataFrame(result, columns=["параметры", "метрики dev meld", "метрики dev resd", "метрики test meld", "метрики test resd", "путь"])
df = pd.concat([df["параметры"].apply(pd.Series), df["метрики dev meld"].apply(pd.Series), df["метрики dev resd"].apply(pd.Series), df["метрики test meld"].apply(pd.Series), df["метрики test resd"].apply(pd.Series), df["путь"]], axis=1)
df.columns = ["dropout", "uar_dev_meld", "war_dev_meld", "mf1_dev_meld", "wf1_dev_meld", "uar_dev_resd", "war_dev_resd", "mf1_dev_resd", "wf1_dev_resd", "uar_test_meld", "war_test_meld", "mf1_test_meld", "wf1_test_meld", "uar_test_resd", "war_test_resd", "mf1_test_resd", "wf1_test_resd", "путь"]
df.to_csv(os.path.join(PATH_TO_MODEL, "result_dropout.csv"))

In [13]:
columns = ["dropout","uar_dev_meld", "war_dev_meld", "mf1_dev_meld", "wf1_dev_meld", "uar_dev_resd", "war_dev_resd", "mf1_dev_resd", "wf1_dev_resd", "uar_test_meld", "war_test_meld", "mf1_test_meld", "wf1_test_meld", "uar_test_resd", "war_test_resd", "mf1_test_resd", "wf1_test_resd", "путь"]
df = pd.read_csv(os.path.join(PATH_TO_MODEL, "result_dropout.csv"), index_col=0)
df.columns=columns

In [14]:
df['average_dev_meld'] = (df['uar_dev_meld'] + df['war_dev_meld'] + df['mf1_dev_meld'] + df['wf1_dev_meld']) / 4.0
df['average_dev_resd'] = (df['uar_dev_resd'] + df['war_dev_resd'] + df['mf1_dev_resd'] + df['wf1_dev_resd']) / 4.0
df['average_test_meld'] = (df['uar_test_meld'] + df['war_test_meld'] + df['mf1_test_meld'] + df['wf1_test_meld']) / 4.0
df['average_test_resd'] = (df['uar_test_resd'] + df['war_test_resd'] + df['mf1_test_resd'] + df['wf1_test_resd']) / 4.0
df['avg_test'] = 0.5 * (df['average_test_meld'] + df['average_test_resd'])

In [15]:
df.sort_values('avg_test' , ascending=False)

Unnamed: 0,dropout,uar_dev_meld,war_dev_meld,mf1_dev_meld,wf1_dev_meld,uar_dev_resd,war_dev_resd,mf1_dev_resd,wf1_dev_resd,uar_test_meld,...,uar_test_resd,war_test_resd,mf1_test_resd,wf1_test_resd,путь,average_dev_meld,average_dev_resd,average_test_meld,average_test_resd,avg_test
0,0.0,36.173706,53.832281,36.713324,52.482559,33.759715,33.134328,33.067157,33.015469,38.070614,...,34.238051,34.285714,34.019167,34.288143,LSTMClassifier_jina_18_39.46_checkpoint.pth,44.800468,33.244167,47.61501,34.207769,40.91139
1,0.2,37.912184,53.381425,36.89608,51.787057,33.45949,33.432836,33.17152,33.500397,37.86045,...,32.37933,32.857143,32.658372,33.318954,LSTMClassifier_jina_17_39.14_checkpoint.pth,44.994187,33.391061,46.943649,32.80345,39.873549


Лучший результат у dropout=0.1

#### LSTM + xlm-roberta-base

In [17]:
PATH_TO_MODEL = os.path.join(ROOT_DIR, "Models_lstm_xlm-roberta-base")

In [None]:
%%capture --no-stdout
result = []
bidirectional = True
for num_layers in [1, 2, 3]:
    for hidden_size in [64, 128, 256, 512]:
        print(f"hidden_size={hidden_size}, num_layers={num_layers}, bidirectional={bidirectional}")
        model_lstm = LSTMClassifier(num_classes = 7, model_name='xlm-roberta-base', pooling=None, input_size=768, hidden_size=hidden_size, num_layers=num_layers, bidirectional=bidirectional).to(device)
        optimizer = optim.Adam(params = model_lstm.parameters(), lr = LEARNING_RATE)
        loss_fn = nn.CrossEntropyLoss(weight=class_weights)
        trainer = ModelTrainer(model_lstm, train_dataloader, dev_meld_dataloader, dev_resd_dataloader, test_meld_dataloader, test_resd_dataloader, device, EPOCHS, ROUND_LOSS, ROUND_ACC, optimizer, loss_fn)
        trainer.train(PATH_TO_MODEL)
        checkpoint = torch.load(os.path.join(PATH_TO_MODEL, trainer._best_model_name))
        model_lstm.load_state_dict(checkpoint['model_state_dict'])
        metrics_dev_meld = evaluate_metrics(model_lstm, dev_meld_dataloader)
        metrics_dev_resd = evaluate_metrics(model_lstm, dev_resd_dataloader)
        print("Метрики на валидационной выборке MELD: ", metrics_dev_meld)
        print("Метрики на валидационной выборке RESD: ", metrics_dev_resd)
        metrics_test_meld = evaluate_metrics(model_lstm, test_meld_dataloader)
        metrics_test_resd = evaluate_metrics(model_lstm, test_resd_dataloader)
        print("Метрики на тестовой выборке MELD: ", metrics_test_meld)
        print("Метрики на тестовой выборке RESD: ", metrics_test_resd)
        result.append([{"hidden_size" : hidden_size, "num_layers": num_layers, "bidirectional": bidirectional}, metrics_dev_meld, metrics_dev_resd, metrics_test_meld, metrics_test_resd, trainer._best_model_name])

hidden_size=64, num_layers=1, bidirectional=True
Ранняя остановка на эпохе 45 из-за отсутствия улучшения точности на тестовой выборке
Метрики на валидационной выборке MELD:  {'uar': 38.827812106223774, 'war': 50.40577096483319, 'mf1': 36.91064434417412, 'wf1': 51.676567690318066}
Метрики на валидационной выборке RESD:  {'uar': 26.4127612397616, 'war': 26.56716417910448, 'mf1': 22.839353507268356, 'wf1': 23.048264500795675}
Метрики на тестовой выборке MELD:  {'uar': 39.51256685010097, 'war': 52.03065134099617, 'mf1': 36.787711601586956, 'wf1': 54.38951887584004}
Метрики на тестовой выборке RESD:  {'uar': 22.46590703827546, 'war': 23.57142857142857, 'mf1': 19.03233075470106, 'wf1': 19.843132200070983}
hidden_size=128, num_layers=1, bidirectional=True
Ранняя остановка на эпохе 39 из-за отсутствия улучшения точности на тестовой выборке
Метрики на валидационной выборке MELD:  {'uar': 37.32787476510458, 'war': 52.569882777276824, 'mf1': 36.64340249387924, 'wf1': 52.96083673631994}
Метрики на

In [None]:
df = pd.DataFrame(result, columns=["параметры", "метрики dev meld", "метрики dev resd", "метрики test meld", "метрики test resd", "путь"])
df = pd.concat([df["параметры"].apply(pd.Series), df["метрики dev meld"].apply(pd.Series), df["метрики dev resd"].apply(pd.Series), df["метрики test meld"].apply(pd.Series), df["метрики test resd"].apply(pd.Series), df["путь"]], axis=1)
df.columns = ["hidden_size", "num_layers", "bidirectional", "uar_dev_meld", "war_dev_meld", "mf1_dev_meld", "wf1_dev_meld", "uar_dev_resd", "war_dev_resd", "mf1_dev_resd", "wf1_dev_resd", "uar_test_meld", "war_test_meld", "mf1_test_meld", "wf1_test_meld", "uar_test_resd", "war_test_resd", "mf1_test_resd", "wf1_test_resd", "путь"]
df.to_csv(os.path.join(PATH_TO_MODEL, "result_bidirectional_True_num_layers_hidden_size.csv"))

In [21]:
columns = ["hidden_size", "num_layers", "bidirectional","uar_dev_meld", "war_dev_meld", "mf1_dev_meld", "wf1_dev_meld", "uar_dev_resd", "war_dev_resd", "mf1_dev_resd", "wf1_dev_resd", "uar_test_meld", "war_test_meld", "mf1_test_meld", "wf1_test_meld", "uar_test_resd", "war_test_resd", "mf1_test_resd", "wf1_test_resd", "путь"]
df = pd.read_csv(os.path.join(PATH_TO_MODEL, "result_bidirectional_True_num_layers_hidden_size.csv"), index_col=0)
df.columns=columns

In [22]:
df['average_dev_meld'] = (df['uar_dev_meld'] + df['war_dev_meld'] + df['mf1_dev_meld'] + df['wf1_dev_meld']) / 4.0
df['average_dev_resd'] = (df['uar_dev_resd'] + df['war_dev_resd'] + df['mf1_dev_resd'] + df['wf1_dev_resd']) / 4.0
df['average_test_meld'] = (df['uar_test_meld'] + df['war_test_meld'] + df['mf1_test_meld'] + df['wf1_test_meld']) / 4.0
df['average_test_resd'] = (df['uar_test_resd'] + df['war_test_resd'] + df['mf1_test_resd'] + df['wf1_test_resd']) / 4.0
df['avg_test'] = 0.5 * (df['average_test_meld'] + df['average_test_resd'])

In [23]:
df.sort_values('avg_test' , ascending=False)

Unnamed: 0,hidden_size,num_layers,bidirectional,uar_dev_meld,war_dev_meld,mf1_dev_meld,wf1_dev_meld,uar_dev_resd,war_dev_resd,mf1_dev_resd,...,uar_test_resd,war_test_resd,mf1_test_resd,wf1_test_resd,путь,average_dev_meld,average_dev_resd,average_test_meld,average_test_resd,avg_test
11,512,3,True,36.87817,53.561767,36.526089,52.952735,27.74424,26.865672,25.488212,...,30.445908,30.714286,29.095534,29.369825,LSTMClassifier_xlm-roberta-base_13_35.09_check...,44.97969,26.452344,47.255998,29.906388,38.581193
1,128,1,True,37.327875,52.569883,36.643402,52.960837,28.034654,28.059701,26.547774,...,28.834865,28.928571,27.246451,27.20564,LSTMClassifier_xlm-roberta-base_29_36.01_check...,44.875499,27.397019,46.889623,28.053882,37.471752
6,256,2,True,37.313689,52.119026,36.799585,52.434286,26.938045,25.970149,22.34446,...,30.388351,31.071429,26.495679,27.33878,LSTMClassifier_xlm-roberta-base_12_33.78_check...,44.666646,24.505856,46.03859,28.82356,37.431075
7,512,2,True,39.79148,50.856628,38.15859,52.313153,27.462976,27.462687,24.698529,...,29.541851,30.0,26.863666,27.137964,LSTMClassifier_xlm-roberta-base_14_34.68_check...,45.279963,26.194062,45.44193,28.38587,36.9139
2,256,1,True,37.619559,53.201082,37.167445,52.872918,26.275017,25.671642,22.835367,...,27.841583,28.214286,24.767271,25.155193,LSTMClassifier_xlm-roberta-base_14_34.05_check...,45.215251,24.425116,47.036738,26.494583,36.76566
3,512,1,True,38.022669,49.143372,37.002132,50.632134,28.221411,27.761194,27.076303,...,28.609486,28.571429,25.936648,26.097705,LSTMClassifier_xlm-roberta-base_20_34.69_check...,43.700077,27.520712,45.47525,27.303817,36.389533
10,256,3,True,39.045854,51.758341,37.627123,52.077155,26.524231,27.164179,25.854419,...,28.258643,28.571429,26.564373,26.798871,LSTMClassifier_xlm-roberta-base_22_34.61_check...,45.127118,26.546821,45.137059,27.548329,36.342694
5,128,2,True,39.107542,51.217313,38.513537,52.598431,23.907665,23.283582,22.013331,...,25.064699,25.0,23.337434,23.674557,LSTMClassifier_xlm-roberta-base_21_33.44_check...,45.359205,22.964991,45.362316,24.269172,34.815744
4,64,2,True,39.120792,47.700631,37.405101,49.768899,24.83359,25.074627,23.301998,...,26.600391,26.785714,24.380037,24.673931,LSTMClassifier_xlm-roberta-base_48_33.2_checkp...,43.498856,24.309168,43.840214,25.610018,34.725116
9,128,3,True,37.281635,50.766456,36.782749,52.6425,27.489564,25.970149,22.093764,...,24.698927,25.0,20.512225,20.960188,LSTMClassifier_xlm-roberta-base_22_33.53_check...,44.368335,24.359562,46.459616,22.792835,34.626225


#### LSTM + canie-c

In [5]:
PATH_TO_MODEL = os.path.join(ROOT_DIR, "Models_lstm_canine-c")

In [13]:
%%capture --no-stdout
result = []
bidirectional = True
num_layers = 1
for hidden_size in [64, 128, 256, 512]:
    print(f"hidden_size={hidden_size}, num_layers={num_layers}, bidirectional={bidirectional}")
    model_lstm = LSTMClassifier(num_classes = 7, model_name='canine-c', pooling=None, input_size=768, hidden_size=hidden_size, num_layers=num_layers, bidirectional=bidirectional).to(device)
    optimizer = optim.Adam(params = model_lstm.parameters(), lr = LEARNING_RATE)
    loss_fn = nn.CrossEntropyLoss(weight=class_weights)
    trainer = ModelTrainer(model_lstm, train_dataloader, dev_meld_dataloader, dev_resd_dataloader, test_meld_dataloader, test_resd_dataloader, device, EPOCHS, ROUND_LOSS, ROUND_ACC, optimizer, loss_fn)
    trainer.train(PATH_TO_MODEL)
    checkpoint = torch.load(os.path.join(PATH_TO_MODEL, trainer._best_model_name))
    model_lstm.load_state_dict(checkpoint['model_state_dict'])
    metrics_dev_meld = evaluate_metrics(model_lstm, dev_meld_dataloader)
    metrics_dev_resd = evaluate_metrics(model_lstm, dev_resd_dataloader)
    print("Метрики на валидационной выборке MELD: ", metrics_dev_meld)
    print("Метрики на валидационной выборке RESD: ", metrics_dev_resd)
    metrics_test_meld = evaluate_metrics(model_lstm, test_meld_dataloader)
    metrics_test_resd = evaluate_metrics(model_lstm, test_resd_dataloader)
    print("Метрики на тестовой выборке MELD: ", metrics_test_meld)
    print("Метрики на тестовой выборке RESD: ", metrics_test_resd)
    result.append([{"hidden_size" : hidden_size, "num_layers": num_layers, "bidirectional": bidirectional}, metrics_dev_meld, metrics_dev_resd, metrics_test_meld, metrics_test_resd, trainer._best_model_name])

hidden_size=64, num_layers=1, bidirectional=True
Ранняя остановка на эпохе 13 из-за отсутствия улучшения точности на тестовой выборке
Метрики на валидационной выборке MELD:  {'uar': 30.46230807441907, 'war': 45.716862037871955, 'mf1': 29.331022483810866, 'wf1': 45.0623105823488}
Метрики на валидационной выборке RESD:  {'uar': 15.7563025210084, 'war': 15.82089552238806, 'mf1': 7.333087027914613, 'wf1': 7.125856922285126}
Метрики на тестовой выборке MELD:  {'uar': 30.523887391433796, 'war': 47.16475095785441, 'mf1': 28.693554863982783, 'wf1': 47.81916713062483}
Метрики на тестовой выборке RESD:  {'uar': 16.072501072501073, 'war': 17.142857142857142, 'mf1': 8.442150739033154, 'wf1': 8.689907207692048}
hidden_size=128, num_layers=1, bidirectional=True
Ранняя остановка на эпохе 44 из-за отсутствия улучшения точности на тестовой выборке
Метрики на валидационной выборке MELD:  {'uar': 31.614509863279412, 'war': 44.2741208295762, 'mf1': 30.758969586664985, 'wf1': 45.1612910260657}
Метрики на в

In [14]:
df = pd.DataFrame(result, columns=["параметры", "метрики dev meld", "метрики dev resd", "метрики test meld", "метрики test resd", "путь"])
df = pd.concat([df["параметры"].apply(pd.Series), df["метрики dev meld"].apply(pd.Series), df["метрики dev resd"].apply(pd.Series), df["метрики test meld"].apply(pd.Series), df["метрики test resd"].apply(pd.Series), df["путь"]], axis=1)
df.columns = ["hidden_size", "num_layers", "bidirectional", "uar_dev_meld", "war_dev_meld", "mf1_dev_meld", "wf1_dev_meld", "uar_dev_resd", "war_dev_resd", "mf1_dev_resd", "wf1_dev_resd", "uar_test_meld", "war_test_meld", "mf1_test_meld", "wf1_test_meld", "uar_test_resd", "war_test_resd", "mf1_test_resd", "wf1_test_resd", "путь"]
df.to_csv(os.path.join(PATH_TO_MODEL, "result_bidirectional_True_num_layers_1_hidden_size.csv"))

In [None]:
%%capture --no-stdout
result = []
bidirectional = True
num_layers = 2
for hidden_size in [64, 128, 256, 512]:
    print(f"hidden_size={hidden_size}, num_layers={num_layers}, bidirectional={bidirectional}")
    model_lstm = LSTMClassifier(num_classes = 7, model_name='canine-c', pooling=None, input_size=768, hidden_size=hidden_size, num_layers=num_layers, bidirectional=bidirectional).to(device)
    optimizer = optim.Adam(params = model_lstm.parameters(), lr = LEARNING_RATE)
    loss_fn = nn.CrossEntropyLoss(weight=class_weights)
    trainer = ModelTrainer(model_lstm, train_dataloader, dev_meld_dataloader, dev_resd_dataloader, test_meld_dataloader, test_resd_dataloader, device, EPOCHS, ROUND_LOSS, ROUND_ACC, optimizer, loss_fn)
    trainer.train(PATH_TO_MODEL)
    checkpoint = torch.load(os.path.join(PATH_TO_MODEL, trainer._best_model_name))
    model_lstm.load_state_dict(checkpoint['model_state_dict'])
    metrics_dev_meld = evaluate_metrics(model_lstm, dev_meld_dataloader)
    metrics_dev_resd = evaluate_metrics(model_lstm, dev_resd_dataloader)
    print("Метрики на валидационной выборке MELD: ", metrics_dev_meld)
    print("Метрики на валидационной выборке RESD: ", metrics_dev_resd)
    metrics_test_meld = evaluate_metrics(model_lstm, test_meld_dataloader)
    metrics_test_resd = evaluate_metrics(model_lstm, test_resd_dataloader)
    print("Метрики на тестовой выборке MELD: ", metrics_test_meld)
    print("Метрики на тестовой выборке RESD: ", metrics_test_resd)
    result.append([{"hidden_size" : hidden_size, "num_layers": num_layers, "bidirectional": bidirectional}, metrics_dev_meld, metrics_dev_resd, metrics_test_meld, metrics_test_resd, trainer._best_model_name])

hidden_size=64, num_layers=2, bidirectional=True
Ранняя остановка на эпохе 42 из-за отсутствия улучшения точности на тестовой выборке
Метрики на валидационной выборке MELD:  {'uar': 31.548537190294144, 'war': 45.807033363390445, 'mf1': 31.170001995649727, 'wf1': 46.058271066917364}
Метрики на валидационной выборке RESD:  {'uar': 19.034880618914233, 'war': 18.80597014925373, 'mf1': 15.834393516202466, 'wf1': 16.04896423173317}
Метрики на тестовой выборке MELD:  {'uar': 33.053646669792656, 'war': 46.89655172413793, 'mf1': 31.194879383111633, 'wf1': 48.41004187339584}
Метрики на тестовой выборке RESD:  {'uar': 15.411133536133534, 'war': 15.357142857142858, 'mf1': 13.114593910840464, 'wf1': 13.049032576493735}
hidden_size=128, num_layers=2, bidirectional=True
Ранняя остановка на эпохе 42 из-за отсутствия улучшения точности на тестовой выборке
Метрики на валидационной выборке MELD:  {'uar': 29.008440483355447, 'war': 46.528403967538324, 'mf1': 28.956367544465955, 'wf1': 45.34818491827514}
М

In [8]:
df = pd.DataFrame(result, columns=["параметры", "метрики dev meld", "метрики dev resd", "метрики test meld", "метрики test resd", "путь"])
df = pd.concat([df["параметры"].apply(pd.Series), df["метрики dev meld"].apply(pd.Series), df["метрики dev resd"].apply(pd.Series), df["метрики test meld"].apply(pd.Series), df["метрики test resd"].apply(pd.Series), df["путь"]], axis=1)
df.columns = ["hidden_size", "num_layers", "bidirectional", "uar_dev_meld", "war_dev_meld", "mf1_dev_meld", "wf1_dev_meld", "uar_dev_resd", "war_dev_resd", "mf1_dev_resd", "wf1_dev_resd", "uar_test_meld", "war_test_meld", "mf1_test_meld", "wf1_test_meld", "uar_test_resd", "war_test_resd", "mf1_test_resd", "wf1_test_resd", "путь"]
df.to_csv(os.path.join(PATH_TO_MODEL, "result_bidirectional_True_num_layers_2_hidden_size.csv"))

In [17]:
%%capture --no-stdout
result = []
bidirectional = True
num_layers = 3
for hidden_size in [64, 128, 256, 512]:
    print(f"hidden_size={hidden_size}, num_layers={num_layers}, bidirectional={bidirectional}")
    model_lstm = LSTMClassifier(num_classes = 7, model_name='canine-c', pooling=None, input_size=768, hidden_size=hidden_size, num_layers=num_layers, bidirectional=bidirectional).to(device)
    optimizer = optim.Adam(params = model_lstm.parameters(), lr = LEARNING_RATE)
    loss_fn = nn.CrossEntropyLoss(weight=class_weights)
    trainer = ModelTrainer(model_lstm, train_dataloader, dev_meld_dataloader, dev_resd_dataloader, test_meld_dataloader, test_resd_dataloader, device, EPOCHS, ROUND_LOSS, ROUND_ACC, optimizer, loss_fn)
    trainer.train(PATH_TO_MODEL)
    checkpoint = torch.load(os.path.join(PATH_TO_MODEL, trainer._best_model_name))
    model_lstm.load_state_dict(checkpoint['model_state_dict'])
    metrics_dev_meld = evaluate_metrics(model_lstm, dev_meld_dataloader)
    metrics_dev_resd = evaluate_metrics(model_lstm, dev_resd_dataloader)
    print("Метрики на валидационной выборке MELD: ", metrics_dev_meld)
    print("Метрики на валидационной выборке RESD: ", metrics_dev_resd)
    metrics_test_meld = evaluate_metrics(model_lstm, test_meld_dataloader)
    metrics_test_resd = evaluate_metrics(model_lstm, test_resd_dataloader)
    print("Метрики на тестовой выборке MELD: ", metrics_test_meld)
    print("Метрики на тестовой выборке RESD: ", metrics_test_resd)
    result.append([{"hidden_size" : hidden_size, "num_layers": num_layers, "bidirectional": bidirectional}, metrics_dev_meld, metrics_dev_resd, metrics_test_meld, metrics_test_resd, trainer._best_model_name])

hidden_size=64, num_layers=3, bidirectional=True
Ранняя остановка на эпохе 13 из-за отсутствия улучшения точности на тестовой выборке
Метрики на валидационной выборке MELD:  {'uar': 31.298467256244233, 'war': 47.97114517583408, 'mf1': 31.48225517887124, 'wf1': 46.166255509248785}
Метрики на валидационной выборке RESD:  {'uar': 14.285714285714285, 'war': 16.119402985074625, 'mf1': 4.007421150278293, 'wf1': 4.521806551657297}
Метрики на тестовой выборке MELD:  {'uar': 31.08668197681685, 'war': 51.685823754789276, 'mf1': 30.248508045355234, 'wf1': 50.45404942625312}
Метрики на тестовой выборке RESD:  {'uar': 13.650793650793652, 'war': 15.357142857142858, 'mf1': 3.827325322652425, 'wf1': 4.305740987983978}
hidden_size=128, num_layers=3, bidirectional=True
Ранняя остановка на эпохе 32 из-за отсутствия улучшения точности на тестовой выборке
Метрики на валидационной выборке MELD:  {'uar': 31.291496505725185, 'war': 47.15960324616772, 'mf1': 31.080189534483626, 'wf1': 46.94760443239679}
Метрик

In [18]:
df = pd.DataFrame(result, columns=["параметры", "метрики dev meld", "метрики dev resd", "метрики test meld", "метрики test resd", "путь"])
df = pd.concat([df["параметры"].apply(pd.Series), df["метрики dev meld"].apply(pd.Series), df["метрики dev resd"].apply(pd.Series), df["метрики test meld"].apply(pd.Series), df["метрики test resd"].apply(pd.Series), df["путь"]], axis=1)
df.columns = ["hidden_size", "num_layers", "bidirectional", "uar_dev_meld", "war_dev_meld", "mf1_dev_meld", "wf1_dev_meld", "uar_dev_resd", "war_dev_resd", "mf1_dev_resd", "wf1_dev_resd", "uar_test_meld", "war_test_meld", "mf1_test_meld", "wf1_test_meld", "uar_test_resd", "war_test_resd", "mf1_test_resd", "wf1_test_resd", "путь"]
df.to_csv(os.path.join(PATH_TO_MODEL, "result_bidirectional_True_num_layers_3_hidden_size.csv"))

In [9]:
columns = ["hidden_size", "num_layers", "bidirectional","uar_dev_meld", "war_dev_meld", "mf1_dev_meld", "wf1_dev_meld", "uar_dev_resd", "war_dev_resd", "mf1_dev_resd", "wf1_dev_resd", "uar_test_meld", "war_test_meld", "mf1_test_meld", "wf1_test_meld", "uar_test_resd", "war_test_resd", "mf1_test_resd", "wf1_test_resd", "путь"]
df = pd.concat([pd.read_csv(os.path.join(PATH_TO_MODEL, "result_bidirectional_True_num_layers_1_hidden_size.csv"), index_col=0), pd.read_csv(os.path.join(PATH_TO_MODEL, "result_bidirectional_True_num_layers_2_hidden_size.csv"), index_col=0), pd.read_csv(os.path.join(PATH_TO_MODEL, "result_bidirectional_True_num_layers_3_hidden_size.csv"), index_col=0)])
df.columns=columns

In [10]:
df['average_dev_meld'] = (df['uar_dev_meld'] + df['war_dev_meld'] + df['mf1_dev_meld'] + df['wf1_dev_meld']) / 4.0
df['average_dev_resd'] = (df['uar_dev_resd'] + df['war_dev_resd'] + df['mf1_dev_resd'] + df['wf1_dev_resd']) / 4.0
df['average_test_meld'] = (df['uar_test_meld'] + df['war_test_meld'] + df['mf1_test_meld'] + df['wf1_test_meld']) / 4.0
df['average_test_resd'] = (df['uar_test_resd'] + df['war_test_resd'] + df['mf1_test_resd'] + df['wf1_test_resd']) / 4.0
df['avg_test'] = 0.5 * (df['average_test_meld'] + df['average_test_resd'])

In [11]:
df.sort_values('avg_test' , ascending=False)

Unnamed: 0,hidden_size,num_layers,bidirectional,uar_dev_meld,war_dev_meld,mf1_dev_meld,wf1_dev_meld,uar_dev_resd,war_dev_resd,mf1_dev_resd,wf1_dev_resd,uar_test_meld,war_test_meld,mf1_test_meld,wf1_test_meld,uar_test_resd,war_test_resd,mf1_test_resd,wf1_test_resd,путь,average_dev_meld,average_dev_resd,average_test_meld,average_test_resd,avg_test
3,512,3,True,32.836567,48.872858,32.82088,48.226093,22.875292,23.283582,22.436011,22.72042,32.548708,49.616858,31.951018,49.785772,23.140542,23.571429,23.068252,23.190492,LSTMClassifier_canine-c_23_31.03_checkpoint.pth,40.6891,22.828826,40.975589,23.242679,32.109134
2,256,3,True,31.291497,47.159603,31.08019,46.947604,20.191946,20.597015,19.010481,19.12185,31.956565,48.314176,30.503783,48.824344,21.8004,22.142857,20.480827,20.618378,LSTMClassifier_canine-c_22_28.73_checkpoint.pth,39.119723,19.730323,39.899717,21.260616,30.580166
3,512,1,True,30.338131,47.880974,30.149915,46.40588,22.893636,23.283582,22.49599,22.930615,30.586997,49.54023,29.671017,48.903448,19.84248,20.357143,19.707425,20.046075,LSTMClassifier_canine-c_30_30.55_checkpoint.pth,38.693725,22.900956,39.675423,19.988281,29.831852
1,128,1,True,31.61451,44.274121,30.75897,45.161291,22.328989,22.686567,19.868019,20.394632,31.245216,44.789272,29.732197,46.838967,21.525755,21.785714,19.099977,19.485879,LSTMClassifier_canine-c_34_29.31_checkpoint.pth,37.952223,21.319552,38.151413,20.474331,29.312872
1,128,2,True,29.00844,46.528404,28.956368,45.348185,20.450783,21.492537,19.543413,20.505154,31.454466,50.0,30.922294,49.708009,17.950531,18.571429,17.24753,17.677308,LSTMClassifier_canine-c_32_28.75_checkpoint.pth,37.460349,20.497972,40.521192,17.8617,29.191446
2,256,1,True,32.327251,46.348061,31.234572,46.482165,19.982993,20.0,12.731672,12.677474,34.273613,48.582375,32.684783,50.111253,19.211419,20.357143,12.581806,13.158404,LSTMClassifier_canine-c_9_27.28_checkpoint.pth,39.098012,16.348035,41.413006,16.327193,28.8701
2,256,2,True,29.955079,43.101894,29.072894,43.432904,25.560915,25.970149,25.643111,26.168316,31.097088,44.40613,29.395283,45.685529,18.793261,18.214286,18.427433,18.059571,LSTMClassifier_canine-c_27_30.9_checkpoint.pth,36.390693,25.835623,37.646008,18.373638,28.009823
0,64,2,True,31.548537,45.807033,31.170002,46.058271,19.034881,18.80597,15.834394,16.048964,33.053647,46.896552,31.194879,48.410042,15.411134,15.357143,13.114594,13.049033,LSTMClassifier_canine-c_32_27.9_checkpoint.pth,38.645961,17.431052,39.88878,14.232976,27.060878
0,64,1,True,30.462308,45.716862,29.331022,45.062311,15.756303,15.820896,7.333087,7.125857,30.523887,47.164751,28.693555,47.819167,16.072501,17.142857,8.442151,8.689907,LSTMClassifier_canine-c_3_25.11_checkpoint.pth,37.643126,11.509035,38.55034,12.586854,25.568597
0,64,3,True,31.298467,47.971145,31.482255,46.166256,14.285714,16.119403,4.007421,4.521807,31.086682,51.685824,30.248508,50.454049,13.650794,15.357143,3.827325,4.305741,LSTMClassifier_canine-c_3_25.01_checkpoint.pth,39.229531,9.733586,40.868766,9.285251,25.077008
