# Login


In [1]:
!pip install transformers huggingface_hub



In [2]:
repo_prefix = "Goshective/"

In [3]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

# Process Data

## Load Data

In [36]:
!mkdir -p Assets

In [None]:
import pandas as pd
import numpy as np
import torch
import json


data = pd.read_csv('Database/clean_data.csv', sep=';', encoding='utf-8')

with open("converter.json", "r", encoding="utf-8") as f:
    converter = json.load(f)

label2id = converter["label2id_reduced"]
id2label = converter["id2label_reduced"]

data.head()

Unnamed: 0,detailed_topic,appeal
0,"содержание л/клеток, дворовых территорий",ЗАЯВЛЕНИЕ нарушение периодичности проведения ...
1,ПРАВИЛА ПОЛЬЗОВАНИЯ ЖИЛЫМИ ПОМЕЩЕНИЯМИ (ПЕРЕПЛ...,"\nДобрый день,Прошу рассмотреть по существу жа..."
2,фасады,"\nИнформирую вас, что с 08.04.2024 в МЖД по ад..."
3,"содержание л/клеток, дворовых территорий","\nКоллективная жалоба на факт нарушения ТСЖ""Ко..."
4,подвалы,Прошу рассмотреть прилагающееся заявление о в...


## Prepare Data

In [12]:
from sklearn.model_selection import train_test_split

# topic_id -> sequential label index
data["label"] = data["detailed_topic"].map(label2id)

# Split data into training & validation sets
train_texts, val_texts, train_labels, val_labels = train_test_split(
    data["appeal"].tolist(), data["label"].tolist(), test_size=0.2, random_state=42
)

num_labels = len(label2id)

# Utilities

In [8]:
class AppealDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item["labels"] = torch.tensor(self.labels[idx])
        return item

In [9]:
from sklearn.metrics import accuracy_score, f1_score

def roc_auc_multiclass(labels, probabilities):
    """
    Compute the multi-class ROC AUC using the One-Versus-Rest approach.

    Parameters:
    - labels: (955,) array with true class indices (values from 0 to 108)
    - probabilities: (955, 109) array with predicted probabilities for each class

    Returns:
    - Macro-averaged ROC AUC score
    """
    num_classes = probabilities.shape[1]  # Should be 109 classes
    labels_one_hot = np.eye(num_classes)[labels]  # Convert labels to one-hot encoding (955, 109)

    aucs = []  # List to store AUC for each class

    for i in range(num_classes):
        # True labels for class i (binary: 1 if true class, 0 otherwise)
        y_true = labels_one_hot[:, i]  # Shape: (955,)

        # Predicted probabilities for class i
        y_score = probabilities[:, i]  # Shape: (955,)

        # **Check if class i is missing in the batch**
        num_positives = np.sum(y_true)
        num_negatives = len(y_true) - num_positives

        if num_positives == 0 or num_negatives == 0:
            aucs.append(0.5)  # If only one class is present, set AUC to 0.5 (random chance)
            continue  # Skip further computation

        # Sort by predicted score (descending order)
        sorted_indices = np.argsort(-y_score)
        y_true_sorted = y_true[sorted_indices]

        # Compute TPR and FPR
        cum_positive = np.cumsum(y_true_sorted)
        cum_negative = np.cumsum(1 - y_true_sorted)

        TPR = cum_positive / num_positives  # True positive rate
        FPR = cum_negative / num_negatives  # False positive rate

        auc = np.trapz(TPR, FPR)  # Compute AUC using trapezoidal rule
        aucs.append(auc)

    return np.mean(aucs)  # Macro-averaged AUC over all classes


def compute_metrics_top_k(eval_pred, k=3):
    """
    Compute accuracy, top-K accuracy, and ROC AUC for multi-class classification.

    Parameters:
    - eval_pred: Tuple (logits, labels), where:
      - logits: (N, num_classes) array of raw model outputs (before softmax)
      - labels: (N,) array of true class indices
    - k: Number of top predictions to consider for accuracy

    Returns:
    - Dictionary with accuracy, top-K accuracy, and ROC AUC
    """
    logits, labels = eval_pred  # Unpack logits (raw scores) and true labels
    probabilities = np.exp(logits) / np.exp(logits).sum(axis=-1, keepdims=True)  # Softmax

    # Top-1 (standard accuracy)
    predictions = np.argmax(logits, axis=-1)  # Get class with highest probability
    acc = accuracy_score(labels, predictions)  # Standard accuracy

    # Top-K Accuracy Calculation
    top_k_predictions = np.argsort(-probabilities, axis=-1)[:, :k]  # Get top-K predicted classes
    top_k_correct = np.any(top_k_predictions == labels[:, None], axis=-1)  # Check if true label is in top-K
    top_k_acc = np.mean(top_k_correct)  # Compute top-K accuracy

     # Top-5 Accuracy (Calculate-Ability of Classes)
    top_5_predictions = np.argsort(-probabilities, axis=-1)[:, :5]
    top_5_correct = np.any(top_5_predictions == labels[:, None], axis=-1)
    top_5_acc = np.mean(top_5_correct)

    # ROC AUC Calculation
    auc = roc_auc_multiclass(labels, probabilities)  # Compute multi-class AUC

    f1 = f1_score(labels, predictions, average="weighted")  # Standard F1-score

    return {"accuracy": acc, f"top_{k}_accuracy": top_k_acc, f"top_5_accuracy": top_5_acc, "roc_auc": auc, "f1": f1}

# BERT-Base Model

In [10]:
# repo_dir = "lab_comm_services_detailed_1" # previous version
repo_dir_bert_base = "lab_comm_services_bert_reduced" # our main

### Set up model

In [11]:
from transformers import AutoTokenizer

model_name = "DeepPavlov/rubert-base-cased"
tokenizer = AutoTokenizer.from_pretrained(model_name)

train_encodings = tokenizer(train_texts, truncation=True, padding=True, max_length=512)
val_encodings = tokenizer(val_texts, truncation=True, padding=True, max_length=512)

# PyTorch datasets
train_dataset = AppealDataset(train_encodings, train_labels)
val_dataset = AppealDataset(val_encodings, val_labels)

tokenizer_config.json:   0%|          | 0.00/24.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/642 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

In [13]:
from transformers import AutoModelForSequenceClassification

model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=num_labels)

pytorch_model.bin:   0%|          | 0.00/714M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/714M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at DeepPavlov/rubert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


### Run Training

In [14]:
from transformers import TrainingArguments, Trainer


training_args = TrainingArguments(
    output_dir=repo_dir_bert_base,
    eval_strategy="epoch",  # Evaluate only at the end of each epoch
    num_train_epochs=7,  # Total number of training epochs
    per_device_train_batch_size=16,  # Increase if GPU memory allows
    per_device_eval_batch_size=16,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=50,  # Log every 50 steps (optional)
    fp16=True,  # Mixed precision for speedup
    gradient_accumulation_steps=2,  # Simulates a larger batch size
    learning_rate=2e-5,  # Fine-tuned learning rate for stability
    warmup_ratio=0.1,  # Gradual learning rate increase at the start
    save_strategy="epoch",  # Save model at the end of each epoch
    save_total_limit=2,  # Keep only 2 most recent checkpoints
    # report_to="none"  # Disable W&B logging if not needed
)

training_args_debug = TrainingArguments(
    output_dir=repo_dir_bert_base,
    eval_strategy="epoch",  # Evaluate only after each epoch
    per_device_train_batch_size=32,  # Increase if GPU allows
    per_device_eval_batch_size=64,   # Larger batch for faster eval
    num_train_epochs=3,  # Reduce if overfitting
    weight_decay=0.001,  # Lighter weight decay
    logging_dir="./logs",
    logging_steps=500,  # Less frequent logging
    fp16=True,  # Enable mixed precision for speed
    gradient_accumulation_steps=2,  # Helps if batch size is limited
    dataloader_num_workers=4,  # Faster data loading
    push_to_hub=True
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics_top_k,
)

In [15]:
trainer.train()



<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mden-skvortsoff[0m ([33mden-skvortsoff-itmo-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Epoch,Training Loss,Validation Loss,Accuracy,Top 3 Accuracy,Top 5 Accuracy,Roc Auc,F1
1,2.8033,2.130625,0.47644,0.689005,0.770681,0.814618,0.357173
2,1.8465,1.625184,0.546597,0.794764,0.872251,0.916654,0.462006
3,1.3845,1.430893,0.609424,0.835602,0.902618,0.918389,0.560876
4,1.1647,1.316225,0.657592,0.853403,0.906806,0.927136,0.627715
5,0.9693,1.29611,0.651309,0.859686,0.906806,0.924005,0.617096
6,0.9219,1.287812,0.662827,0.856545,0.906806,0.924715,0.6356
7,0.8208,1.284799,0.66911,0.855497,0.904712,0.924645,0.642333


  auc = np.trapz(TPR, FPR)  # Compute AUC using trapezoidal rule
  auc = np.trapz(TPR, FPR)  # Compute AUC using trapezoidal rule
  auc = np.trapz(TPR, FPR)  # Compute AUC using trapezoidal rule
  auc = np.trapz(TPR, FPR)  # Compute AUC using trapezoidal rule
  auc = np.trapz(TPR, FPR)  # Compute AUC using trapezoidal rule
  auc = np.trapz(TPR, FPR)  # Compute AUC using trapezoidal rule
  auc = np.trapz(TPR, FPR)  # Compute AUC using trapezoidal rule


TrainOutput(global_step=840, training_loss=1.4537833032153902, metrics={'train_runtime': 1152.0197, 'train_samples_per_second': 23.193, 'train_steps_per_second': 0.729, 'total_flos': 7032210371923968.0, 'train_loss': 1.4537833032153902, 'epoch': 7.0})

In [16]:
repo_name = repo_prefix + repo_dir_bert_base

# Save model and tokenizer locally
model.save_pretrained(repo_name)
tokenizer.save_pretrained(repo_name)

# Push to Hugging Face Hub
model.push_to_hub(repo_name)
tokenizer.push_to_hub(repo_name)

model.safetensors:   0%|          | 0.00/712M [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

CommitInfo(commit_url='https://huggingface.co/Goshective/lab_comm_services_bert_reduced/commit/2df78769d149713ef9589deab323a62a0883683c', commit_message='Upload tokenizer', commit_description='', oid='2df78769d149713ef9589deab323a62a0883683c', pr_url=None, repo_url=RepoUrl('https://huggingface.co/Goshective/lab_comm_services_bert_reduced', endpoint='https://huggingface.co', repo_type='model', repo_id='Goshective/lab_comm_services_bert_reduced'), pr_revision=None, pr_num=None)

In [18]:
dict_stats_bert_base = trainer.evaluate(eval_dataset=val_dataset, metric_key_prefix="eval")
dict_stats_bert_base['log_history'] = trainer.state.log_history
dict_stats_bert_base

  auc = np.trapz(TPR, FPR)  # Compute AUC using trapezoidal rule


{'eval_loss': 1.284799337387085,
 'eval_accuracy': 0.669109947643979,
 'eval_top_3_accuracy': 0.8554973821989529,
 'eval_top_5_accuracy': 0.9047120418848168,
 'eval_roc_auc': 0.9246446705016445,
 'eval_f1': 0.6423334232464655,
 'eval_runtime': 9.9779,
 'eval_samples_per_second': 95.712,
 'eval_steps_per_second': 6.013,
 'epoch': 7.0,
 'log_history': [{'loss': 3.4066,
   'grad_norm': 3.8627471923828125,
   'learning_rate': 1.1428571428571429e-05,
   'epoch': 0.41841004184100417,
   'step': 50},
  {'loss': 2.8033,
   'grad_norm': 6.200514316558838,
   'learning_rate': 1.962962962962963e-05,
   'epoch': 0.8368200836820083,
   'step': 100},
  {'eval_loss': 2.130624771118164,
   'eval_accuracy': 0.47643979057591623,
   'eval_top_3_accuracy': 0.6890052356020943,
   'eval_top_5_accuracy': 0.7706806282722513,
   'eval_roc_auc': 0.8146183787346792,
   'eval_f1': 0.3571728834198921,
   'eval_runtime': 14.4628,
   'eval_samples_per_second': 66.032,
   'eval_steps_per_second': 4.149,
   'epoch': 1

# RuRoberta (SBER) Model

In [19]:
repo_dir_ruroberta = "lab_comm_services_sber_reduce"

### Set up model

In [20]:
from transformers import RobertaTokenizer

model_name = "sberbank-ai/ruRoberta-large"
tokenizer = RobertaTokenizer.from_pretrained(model_name)

train_encodings = tokenizer(train_texts, truncation=True, padding=True, max_length=512)
val_encodings = tokenizer(val_texts, truncation=True, padding=True, max_length=512)

# PyTorch datasets
train_dataset = AppealDataset(train_encodings, train_labels)
val_dataset = AppealDataset(val_encodings, val_labels)

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

config.json:   0%|          | 0.00/674 [00:00<?, ?B/s]

In [21]:
from transformers import RobertaForSequenceClassification

model = RobertaForSequenceClassification.from_pretrained(model_name, num_labels=num_labels)

pytorch_model.bin:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at sberbank-ai/ruRoberta-large and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


### Run Training

In [23]:
from transformers import Trainer, TrainingArguments


repo_name = repo_prefix + repo_dir_ruroberta

training_args = TrainingArguments(
    output_dir=repo_dir_ruroberta,
    push_to_hub=True,
    hub_model_id=repo_name,
    eval_strategy="epoch",  # Evaluate only at the end of each epoch
    num_train_epochs=4,  # Total number of training epochs
    per_device_train_batch_size=8,  # Increase if GPU memory allows
    per_device_eval_batch_size=8,
    weight_decay=0.01,
    logging_dir="./logs",
    fp16=True,  # Mixed precision for speedup
    gradient_accumulation_steps=2,  # Simulates a larger batch size
    learning_rate=2e-5,  # Fine-tuned learning rate for stability
    warmup_ratio=0.1,  # Gradual learning rate increase at the start
    save_strategy="epoch",  # Save model at the end of each epoch
    save_total_limit=2,  # Keep only 2 most recent checkpoints
    # report_to="none"  # Disable W&B logging if not needed
)


training_args_debug = TrainingArguments(
    output_dir=repo_dir_ruroberta,
    push_to_hub=True,
    hub_model_id=repo_name,
    eval_strategy="epoch",  # Evaluate only after each epoch
    per_device_train_batch_size=8,  # Increase if GPU allows
    per_device_eval_batch_size=8,   # Larger batch for faster eval
    num_train_epochs=2,  # Reduce if overfitting
    weight_decay=0.001,  # Lighter weight decay
    logging_dir="./logs",
    logging_steps=500,  # Less frequent logging
    fp16=True,  # Enable mixed precision for speed
    gradient_accumulation_steps=2,  # Helps if batch size is limited
    dataloader_num_workers=2,  # Faster data loading
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics_top_k,
)

In [24]:
trainer.train()

Epoch,Training Loss,Validation Loss,Accuracy,Top 3 Accuracy,Top 5 Accuracy,Roc Auc,F1
1,No log,1.315583,0.649215,0.86178,0.907853,0.941412,0.619669
2,No log,1.161212,0.680628,0.872251,0.925654,0.940454,0.657501
3,1.650700,1.120677,0.690052,0.887958,0.947644,0.940895,0.669729
4,1.650700,1.115244,0.699476,0.896335,0.949738,0.938849,0.684643


  auc = np.trapz(TPR, FPR)  # Compute AUC using trapezoidal rule
  auc = np.trapz(TPR, FPR)  # Compute AUC using trapezoidal rule
  auc = np.trapz(TPR, FPR)  # Compute AUC using trapezoidal rule
  auc = np.trapz(TPR, FPR)  # Compute AUC using trapezoidal rule


TrainOutput(global_step=956, training_loss=1.2161638587089763, metrics={'train_runtime': 2034.7622, 'train_samples_per_second': 7.504, 'train_steps_per_second': 0.47, 'total_flos': 1.4230362635157504e+16, 'train_loss': 1.2161638587089763, 'epoch': 4.0})

In [25]:
repo_name = repo_prefix + repo_dir_ruroberta

# Save model and tokenizer locally
model.save_pretrained(repo_name)
tokenizer.save_pretrained(repo_name)

# Push to Hugging Face Hub
model.push_to_hub(repo_name)
tokenizer.push_to_hub(repo_name)

README.md: 0.00B [00:00, ?B/s]

No files have been modified since last commit. Skipping to prevent empty commit.
No files have been modified since last commit. Skipping to prevent empty commit.


CommitInfo(commit_url='https://huggingface.co/Goshective/lab_comm_services_sber_reduce/commit/1c549e3a3b6d2e04b7398e37d8cf5f2650219ca7', commit_message='Upload tokenizer', commit_description='', oid='1c549e3a3b6d2e04b7398e37d8cf5f2650219ca7', pr_url=None, repo_url=RepoUrl('https://huggingface.co/Goshective/lab_comm_services_sber_reduce', endpoint='https://huggingface.co', repo_type='model', repo_id='Goshective/lab_comm_services_sber_reduce'), pr_revision=None, pr_num=None)

In [26]:
dict_stats_ruroberta = trainer.evaluate(eval_dataset=val_dataset, metric_key_prefix="eval")
dict_stats_ruroberta['log_history'] = trainer.state.log_history
dict_stats_ruroberta

  auc = np.trapz(TPR, FPR)  # Compute AUC using trapezoidal rule


{'eval_loss': 1.115243911743164,
 'eval_accuracy': 0.6994764397905759,
 'eval_top_3_accuracy': 0.8963350785340314,
 'eval_top_5_accuracy': 0.949738219895288,
 'eval_roc_auc': 0.9388494380083403,
 'eval_f1': 0.6846433398478617,
 'eval_runtime': 28.0216,
 'eval_samples_per_second': 34.081,
 'eval_steps_per_second': 4.282,
 'epoch': 4.0,
 'log_history': [{'eval_loss': 1.3155832290649414,
   'eval_accuracy': 0.6492146596858639,
   'eval_top_3_accuracy': 0.8617801047120419,
   'eval_top_5_accuracy': 0.9078534031413612,
   'eval_roc_auc': 0.9414123118314117,
   'eval_f1': 0.6196692572233727,
   'eval_runtime': 26.9447,
   'eval_samples_per_second': 35.443,
   'eval_steps_per_second': 4.454,
   'epoch': 1.0,
   'step': 239},
  {'eval_loss': 1.161211609840393,
   'eval_accuracy': 0.680628272251309,
   'eval_top_3_accuracy': 0.8722513089005236,
   'eval_top_5_accuracy': 0.9256544502617801,
   'eval_roc_auc': 0.9404544664859374,
   'eval_f1': 0.6575008710485803,
   'eval_runtime': 26.8665,
   'e

# Get Predictions

In [27]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer

repo_name_ruroberta = repo_prefix + repo_dir_ruroberta
repo_name_bert_base = repo_prefix + repo_dir_bert_base

model_ruroberta = AutoModelForSequenceClassification.from_pretrained(repo_name_ruroberta)
tokenizer_ruroberta = AutoTokenizer.from_pretrained(repo_name_ruroberta)

model_bert_base = AutoModelForSequenceClassification.from_pretrained(repo_name_bert_base)
tokenizer_bert_base = AutoTokenizer.from_pretrained(repo_name_bert_base)

In [28]:
def get_logits_batch(appeal_texts, model, tokenizer, batch_size=16):
    """
    Predicts logits for a set of texts using a single detailed classifier.

    Parameters:
    - appeal_texts: List of input texts to classify.
    - model: The model used for predictions.
    - tokenizer: The tokenizer to process the input texts.
    - batch_size: The batch size for processing texts in chunks.

    Returns:
    - logits: The raw output logits from the model in numpy format.
    """
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model.to(device)

    all_logits = []

    for i in range(0, len(appeal_texts), batch_size):
        batch_texts = appeal_texts[i:i+batch_size]
        inputs = tokenizer(batch_texts, return_tensors="pt", truncation=True, padding=True, max_length=512).to(device)

        with torch.no_grad():
            logits = model(**inputs).logits  # Get raw logits from the model

        all_logits.append(logits.cpu().numpy())  # Convert to numpy and append

    # Return all logits as a single numpy array
    return np.concatenate(all_logits, axis=0)


def get_preds_comparison(val_texts, model, tokenizer, threshold=0.05):
    # 1. Get logits
    logits = get_logits_batch(val_texts, model, tokenizer)

    # 2. Softmax to get probabilities
    probabilities = np.exp(logits) / np.exp(logits).sum(axis=-1, keepdims=True)

    # 3. Prepare top-k predictions with thresholds
    def get_topk_thresholded(probabilities, k, threshold):
        sorted_indices = np.argsort(-probabilities, axis=-1)
        sorted_probs = np.take_along_axis(probabilities, sorted_indices, axis=-1)

        final_preds = []
        for idx in range(len(probabilities)):
            preds = sorted_indices[idx]
            probs = sorted_probs[idx]

            kept = [(pred, prob) for pred, prob in zip(preds, probs) if prob > threshold]

            if preds[0] not in [x[0] for x in kept]:  # Ensure top prediction is included
                kept.insert(0, (preds[0], probs[0]))

            final_preds.append([x[0] for x in kept[:k]])
        return final_preds

    # 4. Generate prediction sets
    top5_preds = get_topk_thresholded(probabilities, k=5, threshold=threshold)
    return top5_preds

top5_preds_bert_base = get_preds_comparison(val_texts, model_bert_base, tokenizer_bert_base)
top5_preds_ruroberta = get_preds_comparison(val_texts, model_ruroberta, tokenizer_ruroberta)

In [32]:
print(top5_preds_bert_base[:5])
print(top5_preds_ruroberta[:5])

[[np.int64(0)], [np.int64(2), np.int64(4)], [np.int64(20)], [np.int64(6)], [np.int64(6), np.int64(2)]]
[[np.int64(0)], [np.int64(2), np.int64(4)], [np.int64(20)], [np.int64(6)], [np.int64(6)]]


# Download Outputs

In [None]:
import pickle


# Combine all into one dictionary
all_data = {
    "bert_base": {
        "predictions": top5_preds_bert_base,
        "basic_stats": dict_stats_bert_base,
        "val_labels": val_labels
    },
    "ruroberta": {
        "predictions": top5_preds_ruroberta,
        "basic_stats": dict_stats_ruroberta,
    }
}

# Save to one .pkl file
with open("Assets/transformer_outputs.pkl", "wb") as f:
    pickle.dump(all_data, f)