In [1]:
import numpy as np
import pandas as pd
from typing import Tuple
import matplotlib.pyplot as plt
import os

#### Word2Vec
import gensim
from gensim.models import Word2Vec

####
import torch
from torch.utils.data import DataLoader, TensorDataset
import torch.nn.functional as F
import torch.nn as nn
import torchutils as tu
from sklearn.model_selection import train_test_split
from collections import Counter

from nltk.corpus import stopwords

stop_words = set(stopwords.words("russian"))

from torchmetrics.classification import (
    BinaryAccuracy,
    BinaryPrecision,
    BinaryRecall,
    BinaryF1Score,
)

# import sys
# import os

# sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
# from ..src.rnn_preprocessing_dima import (
#     data_preprocessing,
#     preprocess_single_string,
#     padding,
#     get_words_by_freq,
# )

# from ..src.fit_model import fit_model, fit_with_mlflow, plot_history, binary_metrics

print(gensim.__version__)

4.3.3


In [2]:
import multiprocessing as mp
from dataclasses import dataclass
from typing import Union
from tqdm.auto import tqdm
import mlflow
from time import time
import os
from tqdm.auto import tqdm
import re
import sklearn
import string

sklearn.set_config(transform_output="pandas")

In [3]:
if __name__ == "__main__":
    mp.set_start_method("spawn", force=True)
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
# GENERATOR = (
#     torch.Generator(device=DEVICE) if torch.cuda.is_available() else torch.Generator()
# )
GENERATOR = torch.Generator()

use_mlflow = True
mlflow.set_tracking_uri("http://localhost:5000")
CURR_DIR = os.curdir

In [4]:
import pymorphy3

morph = pymorphy3.MorphAnalyzer()

In [5]:
import torch
import torch.nn as nn
import re
import numpy as np
from tqdm.auto import tqdm
import mlflow
from time import time
import os
from torchmetrics.classification import (
    BinaryAccuracy,
    BinaryPrecision,
    BinaryRecall,
    BinaryF1Score,
)

import matplotlib.pyplot as plt


def binary_metrics(outputs, labels, device):
    acc = BinaryAccuracy().to(device)
    prec = BinaryPrecision().to(device)
    rec = BinaryRecall().to(device)
    f1 = BinaryF1Score().to(device)

    preds = outputs.squeeze().float()
    labels = labels.squeeze().float()
    return (
        acc(preds, labels).item(),
        prec(preds, labels).item(),
        rec(preds, labels).item(),
        f1(preds, labels).item(),
    )


def fit_model(
    epochs: int,
    model: nn.Module,
    model_name: str,
    optimizer: torch.optim.Optimizer,
    criterion,
    train_loader,
    valid_loader,
    device,
    use_mlflow=False,
):

    log = dict()
    log["train_loss"] = []
    log["valid_loss"] = []
    log["train_accuracy"] = []
    log["valid_accuracy"] = []
    log["train_precision"] = []
    log["valid_precision"] = []
    log["train_recall"] = []
    log["valid_recall"] = []
    log["train_f1"] = []
    log["valid_f1"] = []

    time_start = time()

    start_epoch = len(log["train_loss"])

    ### Создаем папку для записи весов
    # -----------------------------------------------------------------
    # Создаём корневую папку weights, если её нет
    folder_path = f"weights/"
    model_folder_path = os.path.join(folder_path, f"{model_name}")

    os.makedirs(model_folder_path, exist_ok=True)

    # Список номеров run_*
    run_nums = []

    # Ищем все подпапки с именем run_число
    for item_name in os.listdir(model_folder_path):
        full_path = os.path.join(model_folder_path, item_name)
        if os.path.isdir(full_path):
            match = re.search(r"run_(\d+)", item_name)
            if match:
                run_nums.append(int(match.group(1)))

    # Определяем следующий номер
    run = max(run_nums) + 1 if run_nums else 1

    # Создаём новую папку
    new_folder = os.path.join(model_folder_path, f"run_{run}")
    os.makedirs(new_folder, exist_ok=True)
    # -----------------------------------------------------------------

    ### Цикл обучения
    # -----------------------------------------------------------------
    for epoch in range(start_epoch + 1, start_epoch + epochs + 1):

        curr_run_path = os.path.join(folder_path, model_name, f"run_{run}")

        epoch_time_start = time()

        print(f'{"-"*13} Epoch {epoch} {"-"*13}')

        ### Обучение

        batch_acc = []
        batch_prec = []
        batch_recall = []
        batch_loss = []
        batch_f1 = []

        model.train()

        # Прогресс бар

        train_pbar = tqdm(
            train_loader, desc=f"Epoch {epoch}/{epochs} [Train]", leave=True
        )

        for inputs, labels in train_pbar:

            inputs = inputs.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()

            # Функции потерь

            outputs, _ = model(inputs)
            # outputs = model(inputs).squeeze()

            loss = criterion(outputs, labels.float())
            batch_loss.append(loss.item())

            # Метрики
            acc, prec, rec, f1 = binary_metrics(outputs, labels, device=device)

            batch_acc.append(acc)
            batch_prec.append(prec)
            batch_recall.append(rec)
            batch_f1.append(f1)

            loss.backward()
            optimizer.step()

        train_pbar.set_postfix(
            {
                "Loss": loss,
                "Accuracy": acc,
                "Precision": prec,
                "Recall": rec,
                "F1-score": f1,
            }
        )

        log["train_loss"].append(np.mean(batch_loss))
        log["train_accuracy"].append(np.mean(batch_acc))
        log["train_precision"].append(np.mean(batch_prec))
        log["train_recall"].append(np.mean(batch_recall))
        log["train_f1"].append(np.mean(batch_f1))

        ### Валидация

        batch_acc = []
        batch_prec = []
        batch_recall = []
        batch_loss = []
        batch_f1 = []

        model.eval()

        valid_pbar = tqdm(
            valid_loader, desc=f"Epoch {epoch}/{epochs} [Test]", leave=True
        )
        for inputs, labels in valid_pbar:

            inputs = inputs.to(device)
            labels = labels.to(device)

            with torch.no_grad():
                outputs, _ = model(inputs)
                # outputs = model(inputs).squeeze()

            loss = criterion(outputs, labels.float())
            batch_loss.append(loss.item())

            # Метрики
            acc, prec, rec, f1 = binary_metrics(outputs, labels, device=device)

            batch_acc.append(acc)
            batch_prec.append(prec)
            batch_recall.append(rec)
            batch_f1.append(f1)

        valid_pbar.set_postfix(
            {
                "Loss": loss,
                "Accuracy": acc,
                "Precision": prec,
                "Recall": rec,
                "F1-score": f1,
            }
        )
        ### Метрики и логирование

        log["valid_loss"].append(np.mean(batch_loss))
        log["valid_accuracy"].append(np.mean(batch_acc))
        log["valid_precision"].append(np.mean(batch_prec))
        log["valid_recall"].append(np.mean(batch_recall))
        log["valid_f1"].append(np.mean(batch_f1))

        # [MLflow] Логируем метрики
        if use_mlflow:
            # epoch – номер шага (можно указывать step=epoch)
            for c in log.keys():
                mlflow.log_metric(c, log[c][-1], step=epoch)

        epoch_time = time() - epoch_time_start

        ### Выводим результаты эпохи
        # Train stage
        print(
            f"Train stage: "
            f"loss: {log['train_loss'][-1]:>6.3f}  "
            f"Accuracy: {log['train_accuracy'][-1]:>6.3f}  "
            f"Precision: {log['train_precision'][-1]:>6.3f}  "
            f"Recall: {log['train_recall'][-1]:>6.3f}  "
            f"F1-score: {log['train_f1'][-1]:>6.3f}  "
        )

        # Valid stage
        print(
            f"Valid stage: "
            f"loss: {log['valid_loss'][-1]:>6.3f}  "
            f"Accuracy: {log['valid_accuracy'][-1]:>6.3f}  "
            f"Precision: {log['valid_precision'][-1]:>6.3f}  "
            f"Recall: {log['valid_recall'][-1]:>6.3f}  "
            f"F1-score: {log['valid_f1'][-1]:>6.3f}  "
        )
        print(f"Time: {epoch_time}")

        print(f'{"-"*35}\n')
        torch.save(
            model.state_dict(), os.path.join(curr_run_path, f"weight_epoch_{epoch}.pth")
        )

    total_training_time = time() - time_start
    print(f"Total time = {total_training_time:>5.1f} сек")
    # -----------------------------------------------------------------

    return log, total_training_time, run


def fit_with_mlflow(
    model,
    model_name,
    epochs,
    optimizer,
    criterion,
    train_loader,
    valid_loader,
    device,
    batch_size,
    lr,
):
    mlflow.set_experiment(
        f"{model_name} experiment"
    )  # установить (или создать) эксперимент
    with mlflow.start_run(run_name=f"{model_name}_BS = {batch_size}_lr_{lr}"):
        # Логируем гиперпараметры из config
        mlflow.log_param("batch_size", batch_size)
        mlflow.log_param("learning_rate", lr)
        mlflow.log_param("epochs", epochs)
        mlflow.log_param("device", device)
        mlflow.log_param("optimizer", optimizer)
        mlflow.log_param("criterion", criterion)

        # mlflow.pytorch.autolog(
        #     checkpoint=True,
        #     checkpoint_save_best_only=False,
        #     checkpoint_save_weights_only=False,
        #     checkpoint_save_freq="epoch",
        # )
        # mlflow.log_param("augmentation", ("Yes" if augmentation else "No"))
        print("начало обучения...")
        # Запускаем обучение
        logs, tot_time, run = fit_model(
            model=model,
            model_name=model_name,
            epochs=epochs,
            optimizer=optimizer,
            criterion=criterion,
            train_loader=train_loader,
            valid_loader=valid_loader,
            device=device,
            use_mlflow=True,
        )
        mlflow.log_param("Total time", tot_time)

        # Сохраняем модель в MLflow (опционально)
        # mlflow.pytorch.log_model(base_cnn, "model")

    # После выхода из `with` Run автоматически завершается
    return logs, tot_time, run


def plot_history(history, grid=True, suptitle="model 1"):
    fig, ax = plt.subplots(3, 2, figsize=(16, 20))
    fig.suptitle(suptitle, fontsize=24, fontweight="bold", y=0.85)
    ax[0][0].plot(history["train_loss"], label="train loss")
    ax[0][0].plot(history["valid_loss"], label="valid loss")
    ax[0][0].set_title(f'Loss on epoch {len(history["train_loss"])}', fontsize=16)
    ax[0][0].grid(grid)
    ax[0][0].set_ylim((0, max(history["train_loss"] + history["valid_loss"]) + 0.1))
    ax[0][0].legend(fontsize=14)
    ax[0][0].set_xlabel("Epoch", fontsize=14)
    ax[0][0].set_ylabel("Loss", fontsize=14)

    ax[0][1].plot(history["train_accuracy"], label="train accuracy")
    ax[0][1].plot(history["valid_accuracy"], label="valid accuracy")
    ax[0][1].set_title(
        f'Accuracy on epoch {len(history["train_loss"])}',
        fontsize=16,
        fontweight="bold",
    )
    ax[0][1].grid(grid)
    # ax[0][1].set_ylim((min(history["train_accuracy"]) - 0.05, 1))
    ax[0][1].set_ylim(0.5, 1)
    ax[0][1].legend(fontsize=14)
    ax[0][1].set_xlabel("Epoch", fontsize=14)
    ax[0][1].set_ylabel("Accuracy", fontsize=14)

    ax[1][0].plot(history["train_precision"], label="train precision")
    ax[1][0].plot(history["valid_precision"], label="valid precision")
    ax[1][0].set_title(
        f'Precision on epoch {len(history["train_loss"])}',
        fontsize=16,
        fontweight="bold",
    )
    ax[1][0].grid(grid)
    ax[1][0].set_ylim(0.5, 1)
    # ax[1][0].set_ylim(min(history["train_precision"]) - 0.05, 1)
    ax[1][0].legend(fontsize=14)
    ax[1][0].set_xlabel("Epoch", fontsize=14)
    ax[1][0].set_ylabel("Precision", fontsize=14)

    ax[1][1].plot(history["train_recall"], label="train recall")
    ax[1][1].plot(history["valid_recall"], label="valid recall")
    ax[1][1].set_title(
        f'Recal on epoch {len(history["train_loss"])}', fontsize=16, fontweight="bold"
    )
    ax[1][1].grid(grid)
    ax[1][1].set_ylim(0.5, 1)
    # ax[1][1].set_ylim((min(history["train_recall"]) - 0.05, 1))
    ax[1][1].legend(fontsize=14)
    ax[1][1].set_xlabel("Epoch", fontsize=14)
    ax[1][1].set_ylabel("Recal", fontsize=14)

    ax[2][0].plot(history["train_f1"], label="train f1")
    ax[2][0].plot(history["valid_f1"], label="valid f1")
    ax[2][0].set_title(
        f'F1-score on epoch {len(history["train_loss"])}',
        fontsize=16,
        fontweight="bold",
    )
    ax[2][0].grid(grid)
    ax[2][0].set_ylim(0.5, 1)
    # ax[2][0].set_ylim((min(history["train_f1"]) - 0.05, 1))
    ax[2][0].legend(fontsize=14)
    ax[2][0].set_xlabel("Epoch", fontsize=14)
    ax[2][0].set_ylabel("F1", fontsize=14)

    ax[2][1].remove()
    plt.subplots_adjust(top=0.8)
    # plt.tight_layout(rect=[0, 0, 1, 0.8])
    plt.show()
    return fig

In [6]:
stop_words.difference_update({"не", "нет", "без"})

In [7]:
normalized_stop_words = {morph.parse(word)[0].normal_form for word in stop_words}

In [8]:
def data_preprocessing(text: str) -> str:
    """preprocessing string: lowercase, removing html-tags, punctuation,
                            stopwords, digits

    Args:
        text (str): input string for preprocessing

    Returns:
        str: preprocessed string
    """

    text = text.lower()
    text = re.sub("<.*?>", "", text)  # html tags
    text = "".join(
        [c for c in text if c not in string.punctuation]
    )  # Remove punctuation
    text = " ".join([word for word in text.split() if word not in stop_words])
    text = " ".join([word for word in text.split() if not word.isdigit()])
    return text


def get_words_by_freq(sorted_words: list[tuple[str, int]], n: int = 10) -> list:
    return list(filter(lambda x: x[1] > n, sorted_words))


def padding(review_int: list, seq_len: int) -> np.array:  # type: ignore
    """Make left-sided padding for input list of tokens

    Args:
        review_int (list): input list of tokens
        seq_len (int): max length of sequence, it len(review_int[i]) > seq_len it will be trimmed, else it will be padded by zeros

    Returns:
        np.array: padded sequences
    """
    features = np.zeros((len(review_int), seq_len), dtype=int)
    for i, review in enumerate(review_int):
        if len(review) <= seq_len:
            zeros = list(np.zeros(seq_len - len(review)))
            new = zeros + review
        else:
            new = review[:seq_len]
        features[i, :] = np.array(new)

    return features


def preprocess_single_string(
    input_string: str, seq_len: int, vocab_to_int: dict, verbose: bool = False
) -> torch.Tensor:
    """Function for all preprocessing steps on a single string

    Args:
        input_string (str): input single string for preprocessing
        seq_len (int): max length of sequence, it len(review_int[i]) > seq_len it will be trimmed, else it will be padded by zeros
        vocab_to_int (dict, optional): word corpus {'word' : int index}. Defaults to vocab_to_int.

    Returns:
        list: preprocessed string
    """

    preprocessed_string = data_preprocessing(input_string)
    result_list = []
    for word in preprocessed_string.split():
        try:
            result_list.append(vocab_to_int[word])
        except KeyError as e:
            if verbose:
                print(f"{e}: not in dictionary!")
            pass
    result_padded = padding([result_list], seq_len)[0]

    return torch.tensor(result_padded)

In [9]:
df = pd.read_json(
    os.path.join(CURR_DIR, "..", "data", "healthcare_facilities_reviews.jsonl"),
    lines=True,
)
labels = df["sentiment"].copy().apply(lambda x: 1 if x == "positive" else 0)
df["labels"] = labels
data = df.loc[:, ["content", "labels"]].copy()
print(data.shape)
data.head()

(70597, 2)


Unnamed: 0,content,labels
0,Огромное спасибо за чудесное удаление двух зуб...,1
1,Хочу выразить особую благодарность замечательн...,1
2,Добрый вечер! Хотелось бы поблагодарить сотруд...,1
3,Женщины советского образца в регистратуре не и...,0
4,У меня с детства очень плохие зубы (тонкая и х...,1


In [10]:
reviews = df["content"].tolist()
preprocessed = [data_preprocessing(review) for review in reviews]

In [43]:
print(data["content"][985])

Главный врач абсолютно некомпетентна! Зря занимает свою должность и просиживает свою юбку! Её развальная политика приносит свои плоды - низкий уровень медицинского обслуживания. Нарушается основное право гражданина РФ на бесплатную медицинскую помощь!


In [11]:
corpus = [word for text in preprocessed for word in text.split()]
sorted_words = Counter(corpus).most_common()
sorted_words = get_words_by_freq(sorted_words, 200)

In [12]:
vocab_to_int = {w: i + 1 for i, (w, c) in enumerate(sorted_words)}

In [13]:
reviews_int = []
for text in preprocessed:
    r = [vocab_to_int[word] for word in text.split() if vocab_to_int.get(word)]
    reviews_int.append(r)
print([i for i in reviews_int[0]])
print(preprocessed[0])

[21, 4, 712, 511, 259, 1089, 34, 423, 245, 736, 664, 477, 866, 1613, 712, 31, 1003, 243, 15, 104, 1987, 2, 223]
огромное спасибо чудесное удаление двух зубов мудрости мгновение доктор матвеев профессионал большой буквы боялась страшно заняло реально секунд согласилась удаление сразу второго зуба без боли страха очень рекомендую


In [14]:
w2v_input = []
for review in preprocessed:
    cur_review = []
    for word in review.split():
        if vocab_to_int.get(word):
            cur_review.append(word)
    w2v_input.append(cur_review)
print(f"Total reviews: {len(w2v_input)}")
print(f"Random review for word2vec: {w2v_input[np.random.randint(0, 50000)]}")

Total reviews: 70597
Random review for word2vec: ['обращалась', 'клинику', 'острой', 'болью', 'ночи', 'несмотря', 'час', 'администратор', 'алексеевна', 'прошла', 'кабинет', 'мной', 'зашла', 'молодая', 'девушка', 'сначала', 'не', 'это', 'врач', 'поскольку', 'боль', 'очень', 'не', 'оказалось', 'повезло', 'настоящий', 'профессионал', 'точно', 'знает', 'делает', 'боль', 'рукой', 'лечение', 'идет', 'принимает', 'удобное', 'время', 'лечусь', 'другим', 'советую']


In [15]:
VOCAB_SIZE = len(vocab_to_int) + 1  # размер словаря вместе с токеном padding
EMBEDDING_DIM = 64  # embedding_dim
VOCAB_SIZE

3159

In [16]:
# Обучим Word2Vec
wv = Word2Vec(vector_size=EMBEDDING_DIM)  # размерность вектора для слова
# Сначала word2vec составляет словарь
wv.build_vocab(w2v_input)
print(f"Total reviews: {wv.corpus_count}")

Total reviews: 70597


In [17]:
wv.train(corpus_iterable=w2v_input, total_examples=wv.corpus_count, epochs=20)

(52531057, 57581740)

In [18]:
# os.mkdir(os.path.join(os.curdir, "weights/W2V_weights/"))
wv.save(os.path.join(os.curdir, "weights/W2V_weights/W2V_model.model"))

In [19]:
# Создаем слой эмбеддинга
embedding_matrix = np.zeros((VOCAB_SIZE, EMBEDDING_DIM))

# Бежим по всем словам словаря: если слово есть в word2vec,
# достаем его вектор; если слова нет, то распечатываем его и пропускаем
for word, i in vocab_to_int.items():
    try:
        embedding_vector = wv.wv[word]
        embedding_matrix[i] = embedding_vector
    except KeyError as e:
        pass
        print(f"{e}: word: {word}")

In [20]:
embedding_layer = nn.Embedding.from_pretrained(torch.FloatTensor(embedding_matrix))
print(f"Number of words x EMEDDING_DIM: {embedding_matrix.shape}")

Number of words x EMEDDING_DIM: (3159, 64)


In [21]:
padded = padding(review_int=reviews_int, seq_len=64)

In [22]:
X_train, X_valid, y_train, y_valid = train_test_split(
    np.array(padded),
    pd.get_dummies(df["sentiment"], drop_first=True).values.astype("int"),
    test_size=0.15,
    random_state=1,
)

In [23]:
BATCH_SIZE = 64
HIDDEN_SIZE = 32
SEQ_LEN = 64
train_data = TensorDataset(torch.from_numpy(X_train), torch.from_numpy(y_train))
valid_data = TensorDataset(torch.from_numpy(X_valid), torch.from_numpy(y_valid))

In [32]:
train_loader = DataLoader(
    train_data,
    shuffle=True,
    batch_size=BATCH_SIZE,
    drop_last=True,
    num_workers=0,
    pin_memory=True,
    generator=GENERATOR,
)
valid_loader = DataLoader(
    valid_data,
    shuffle=True,
    batch_size=BATCH_SIZE,
    drop_last=True,
    num_workers=0,
    pin_memory=True,
    generator=GENERATOR,
)

In [33]:
class BahdanauAttention(nn.Module):
    def __init__(self, hidden_size: int = HIDDEN_SIZE) -> None:
        super().__init__()
        self.hidden_size = hidden_size
        self.linear_key = nn.Linear(hidden_size, hidden_size)
        self.linear_query = nn.Linear(hidden_size, hidden_size)
        self.cls = nn.Linear(hidden_size, 1)
        self.tanh = nn.Tanh()

    def forward(self, lstm_outputs, final_hidden):
        # print(f"LSTM output shape: {lstm_outputs.shape}")
        # print(f"Final_hidden shape: {final_hidden.shape}")
        keys = self.linear_key(lstm_outputs)  # (batch_size, seq_len, hidden_size)
        # print(f"After linear keys shape: {keys.shape}")
        query = self.linear_query(final_hidden)  # (batch_size, hidden_size)
        query = query.unsqueeze(1)  # (batch_size, 1, hidden_size)
        # print(f"After linear query shape: {query.shape}")
        x = self.tanh(keys + query)  # (batch_size, seq_len, hidden_size)
        # print(f"After + X shape: {x.shape}")
        x = self.cls(x)  # (batch_size, seq_len, 1)
        # print(f"After cls x shape: {x.shape}")
        x = x.squeeze(-1)  # (batch_size, seq_len)
        # print(f"After squeeze x shape: {x.shape}")
        attention_weights = F.softmax(x, dim=-1)  # (batch_size, seq_len)
        # print(f"Attention weights shape: {attention_weights.shape}")
        attention_weights_bmm = attention_weights.unsqueeze(
            1
        )  # (batch_size, 1, seq_len)
        # print(f"Attention weights for bmm shape: {attention_weights_bmm.shape}")

        # bmm : (batch_size, 1, seq_len) * (batch_size, seq_len, hidden_size) = (batch_size, 1, hidden_size)
        context = torch.bmm(attention_weights_bmm, keys)  # (batch_size, 1, hidden_size)
        # print(f"Context shape: {context.shape}")
        context = context.squeeze(1)
        # print(f"Context final shape: {context.shape}")

        return context, attention_weights

In [34]:
@dataclass
class Config:
    n_layers: int
    embedding_size: int
    hidden_size: int
    vocab_size: int = VOCAB_SIZE
    device: str = DEVICE
    seq_len: int = SEQ_LEN
    bidirectional: Union[bool, int] = False

In [35]:
my_config = Config(
    n_layers=4,
    embedding_size=64,
    hidden_size=32,
    vocab_size=VOCAB_SIZE,
    device=DEVICE,
    seq_len=64,
    bidirectional=False,
)

In [36]:
class LSTMBahdanauAttention(nn.Module):
    def __init__(self, config=my_config) -> None:
        super().__init__()

        # инициализируем конфиг
        self.config = config
        self.seq_len = self.config.seq_len
        self.vocab_size = self.config.vocab_size
        self.hidden_size = self.config.hidden_size
        self.emb_size = self.config.embedding_size
        self.n_layers = self.config.n_layers
        self.device = self.config.device
        self.bidirectional = bool(self.config.bidirectional)

        self.embedding = embedding_layer
        self.lstm = nn.LSTM(self.emb_size, self.hidden_size, batch_first=True)
        self.bidirect_factor = 2 if self.bidirectional == 1 else 1
        self.attn = BahdanauAttention(self.hidden_size)
        self.clf = nn.Sequential(
            nn.Linear(self.hidden_size, 128), nn.Dropout(), nn.Tanh(), nn.Linear(128, 1)
        )

    def model_description(self):
        direction = "bidirect" if self.bidirectional else "onedirect"
        return f"rnn_{direction}_{self.n_layers}"

    def forward(self, x):
        embeddings = self.embedding(x)
        outputs, (h_n, _) = self.lstm(embeddings)
        # att_hidden, att_weights = self.attn(outputs, h_n[-1].squeeze(0))
        att_hidden, att_weights = self.attn(outputs, h_n[-1])
        out = self.clf(att_hidden)
        return out, att_weights

In [37]:
model = LSTMBahdanauAttention(config=my_config)
model = model.to(my_config.device)
criterion = nn.BCEWithLogitsLoss()
LR_BAN = 1e-4
WEIGHT_DECAY_BAN = 3e-4
optim = torch.optim.AdamW(model.parameters(), lr=LR_BAN, weight_decay=WEIGHT_DECAY_BAN)

In [38]:
!nvidia-smi

Fri Sep 26 12:53:49 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 570.172.08             Driver Version: 570.172.08     CUDA Version: 12.8     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 3080        Off |   00000000:01:00.0  On |                  N/A |
| 53%   47C    P0            113W /  340W |    1273MiB /  10240MiB |     17%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [39]:
logs, total_time, run = fit_with_mlflow(
    model=model,
    model_name="LSTM+BahdanauAtt",
    epochs=5,
    criterion=criterion,
    optimizer=optim,
    train_loader=train_loader,
    valid_loader=valid_loader,
    device=my_config.device,
    batch_size=BATCH_SIZE,
    lr=LR_BAN,
)

начало обучения...
------------- Epoch 1 -------------


Epoch 1/5 [Train]:   0%|          | 0/937 [00:00<?, ?it/s]

Epoch 1/5 [Test]:   0%|          | 0/165 [00:00<?, ?it/s]

Train stage: loss:  0.379  Accuracy:  0.822  Precision:  0.824  Recall:  0.904  F1-score:  0.854  
Valid stage: loss:  0.231  Accuracy:  0.914  Precision:  0.932  Recall:  0.920  F1-score:  0.925  
Time: 9.281614542007446
-----------------------------------

------------- Epoch 2 -------------


Epoch 2/5 [Train]:   0%|          | 0/937 [00:00<?, ?it/s]

Epoch 2/5 [Test]:   0%|          | 0/165 [00:00<?, ?it/s]

Train stage: loss:  0.214  Accuracy:  0.921  Precision:  0.940  Recall:  0.924  F1-score:  0.931  
Valid stage: loss:  0.210  Accuracy:  0.925  Precision:  0.942  Recall:  0.928  F1-score:  0.934  
Time: 9.374075889587402
-----------------------------------

------------- Epoch 3 -------------


Epoch 3/5 [Train]:   0%|          | 0/937 [00:00<?, ?it/s]

Epoch 3/5 [Test]:   0%|          | 0/165 [00:00<?, ?it/s]

Train stage: loss:  0.199  Accuracy:  0.926  Precision:  0.944  Recall:  0.929  F1-score:  0.936  
Valid stage: loss:  0.201  Accuracy:  0.927  Precision:  0.946  Recall:  0.927  F1-score:  0.935  
Time: 9.068009376525879
-----------------------------------

------------- Epoch 4 -------------


Epoch 4/5 [Train]:   0%|          | 0/937 [00:00<?, ?it/s]

Epoch 4/5 [Test]:   0%|          | 0/165 [00:00<?, ?it/s]

Train stage: loss:  0.190  Accuracy:  0.928  Precision:  0.947  Recall:  0.931  F1-score:  0.938  
Valid stage: loss:  0.194  Accuracy:  0.930  Precision:  0.945  Recall:  0.936  F1-score:  0.939  
Time: 8.940185070037842
-----------------------------------

------------- Epoch 5 -------------


Epoch 5/5 [Train]:   0%|          | 0/937 [00:00<?, ?it/s]

Epoch 5/5 [Test]:   0%|          | 0/165 [00:00<?, ?it/s]

Train stage: loss:  0.184  Accuracy:  0.931  Precision:  0.948  Recall:  0.933  F1-score:  0.940  
Valid stage: loss:  0.191  Accuracy:  0.930  Precision:  0.942  Recall:  0.938  F1-score:  0.939  
Time: 8.944113492965698
-----------------------------------

Total time =  45.6 сек
🏃 View run LSTM+BahdanauAtt_BS = 64_lr_0.0001 at: http://localhost:5000/#/experiments/590357008120533451/runs/70a33dc53c7a48d486c3a9ecfae6fbfd
🧪 View experiment at: http://localhost:5000/#/experiments/590357008120533451
