## Разметка датасета

In [None]:
import numpy as np
import pandas as pd


df = pd.read_csv("tg_messages_mriya.csv", encoding="utf-8")
df

In [None]:
labels = df['rating'].to_list()
for i, line in df.iterrows():
    if labels[i].is_integer() or line['text'] is np.nan:
        continue

    text = line['text']
    print(i)
    if len(text) > 100:
        curlen = 0
        for subtext in text.split(' '):
            print(subtext, end=' ')
            curlen += len(subtext)
            if curlen > 100:
                curlen = 0
                print()
    else:
        print(text)

    print('\n')

    try:
        label = None
        while label is None:
            try:
                label = int(input("Категория (0 - neutral, 1 - positive, 2 - negative):\n"))
            except ValueError:
                label = None
    except:
        break

    labels[i] = label
    print()

df['rating'] = labels

In [None]:
df.loc[~df['rating'].isna(), ['text', 'rating']]

In [None]:
df.dropna(axis=0, how='all').to_csv("tg_messages_mriya.csv", index=False)

## Предподготовка данных

In [None]:
import numpy as np
import pandas as pd


max_samples = 60000  # максимальное количество элементов в датасете

def sample_dataset(df, sample_size):
    proportions = df['label'].value_counts(normalize=True)
    return (df.groupby('label')
            .apply(lambda x: x.sample(round(sample_size*proportions[x.name]), random_state=42))
            .reset_index(drop=True))


test_datasets = {}
df = pd.read_csv("/kaggle/input/russian-sentiment-analysis-test-dataset/tg_messages_mriya.csv")
test_datasets['tg_messages_mriya'] = df[['text', 'rating']].rename(columns={'rating': 'label'})
test_datasets['tg_messages_mriya'].head()

### Sentiment of bank reviews

In [None]:
df = pd.read_csv("/kaggle/input/russian-sentiment-analysis-test-dataset/sentiment_of_bank_reviews.csv", sep='\t')
df.head()

Unnamed: 0,idx,Score,Text
0,0,Positive,В Альфа-Банке работает замечательная девушка -...
1,1,Negative,Оформляя рассрочку в м. Видео в меге тёплый ст...
2,2,Positive,Очень порадовала оперативность работы в банке....
3,3,Negative,Имела неосторожность оформить потреб. кредит в...
4,4,Negative,Небольшая предыстория: Нашел на сайте MDM банк...


In [None]:
df['Score'].value_counts()

Score
Positive    7000
Negative    6999
Name: count, dtype: int64

In [None]:
df = df[['Text', 'Score']].rename(columns={'Text': 'text', 'Score': 'label'})
label_repl = {'Negative': 2, 'Positive': 1}
df['label'] = df['label'].apply(lambda x: label_repl[x])

test_datasets['sentiment_of_bank_reviews'] = df
test_datasets['sentiment_of_bank_reviews'].head()

Unnamed: 0,text,label
0,В Альфа-Банке работает замечательная девушка -...,1
1,Оформляя рассрочку в м. Видео в меге тёплый ст...,2
2,Очень порадовала оперативность работы в банке....,1
3,Имела неосторожность оформить потреб. кредит в...,2
4,Небольшая предыстория: Нашел на сайте MDM банк...,2


### RuTweetCorp

In [None]:
df_pos = pd.read_csv('/kaggle/input/russian-sentiment-analysis-test-dataset/RuTweetCorp_positive.csv')
df_neg = pd.read_csv('/kaggle/input/russian-sentiment-analysis-test-dataset/RuTweetCorp_negative.csv')
df_pos.head()

Unnamed: 0,id,tdate,tname,ttext,ttype,trep,trtw,tfav,tstcount,tfoll,tfrien,listcount
0,408906692374446080,1386325927,pleease_shut_up,"@first_timee хоть я и школота, но поверь, у на...",1,0,0,0,7569,62,61,0
1,408906692693221377,1386325927,alinakirpicheva,"Да, все-таки он немного похож на него. Но мой ...",1,0,0,0,11825,59,31,2
2,408906695083954177,1386325927,EvgeshaRe,RT @KatiaCheh: Ну ты идиотка) я испугалась за ...,1,0,1,0,1273,26,27,0
3,408906695356973056,1386325927,ikonnikova_21,"RT @digger2912: ""Кто то в углу сидит и погибае...",1,0,1,0,1549,19,17,0
4,408906761416867842,1386325943,JumpyAlex,@irina_dyshkant Вот что значит страшилка :D\nН...,1,0,0,0,597,16,23,1


In [None]:
df_pos = pd.concat([df_pos[['ttext']], pd.DataFrame({'label': [1] * len(df_pos)})], axis=1)
df_neg = pd.concat([df_neg[['ttext']], pd.DataFrame({'label': [2] * len(df_neg)})], axis=1)
df = pd.concat([df_pos, df_neg], axis=0).rename(columns={'ttext': 'text'})
df = df.sample(frac=1).reset_index(drop=True)

test_datasets['rutweetcorp'] = df
test_datasets['rutweetcorp'].head()

Unnamed: 0,text,label
0,день как всегда поминутно был распланирован и ...,1
1,Лунный медведь вслух читает сказки… Не такой п...,1
2,@ekantu да уж!!видно это синдром только что пр...,1
3,"Сегодня мне надо сразу кучу всего сделать, а м...",2
4,В один день поругалась почти со всеми.. А с ут...,2


In [None]:
test_datasets['rutweetcorp']['label'].value_counts()

label
1    114911
2    111923
Name: count, dtype: int64

In [None]:
test_datasets['rutweetcorp'] = sample_dataset(test_datasets['rutweetcorp'], max_samples)
test_datasets['rutweetcorp']['label'].value_counts()

  .apply(lambda x: x.sample(round(sample_size*proportions[x.name]), random_state=42))


label
1    30395
2    29605
Name: count, dtype: int64

### RuReviews

In [None]:
df = pd.read_csv('/kaggle/input/russian-sentiment-analysis-test-dataset/rureviews.csv', sep='\t')
df.head()

Unnamed: 0,review,sentiment
0,качество плохое пошив ужасный (горловина напер...,negative
1,"Товар отдали другому человеку, я не получила п...",negative
2,"Ужасная синтетика! Тонкая, ничего общего с пре...",negative
3,"товар не пришел, продавец продлил защиту без м...",negative
4,"Кофточка голая синтетика, носить не возможно.",negative


In [None]:
df['sentiment'].value_counts()

sentiment
negative    30000
neautral    30000
positive    30000
Name: count, dtype: int64

In [None]:
df = df.rename(columns={'review': 'text', 'sentiment': 'label'})
label_repl = {'negative': 2, 'neautral': 0, 'positive': 1}
df['label'] = df['label'].apply(lambda x: label_repl[x])

test_datasets['rureviews'] = df
test_datasets['rureviews'].head()

Unnamed: 0,text,label
0,качество плохое пошив ужасный (горловина напер...,2
1,"Товар отдали другому человеку, я не получила п...",2
2,"Ужасная синтетика! Тонкая, ничего общего с пре...",2
3,"товар не пришел, продавец продлил защиту без м...",2
4,"Кофточка голая синтетика, носить не возможно.",2


In [None]:
test_datasets['rureviews'] = sample_dataset(test_datasets['rureviews'], max_samples)
test_datasets['rureviews']['label'].value_counts()

  .apply(lambda x: x.sample(round(sample_size*proportions[x.name]), random_state=42))


label
0    20000
1    20000
2    20000
Name: count, dtype: int64

### RuSentiment

In [None]:
df_preselected = pd.read_csv('/kaggle/input/russian-sentiment-analysis-test-dataset/rusentiment_preselected_posts.csv')
df_random = pd.read_csv('/kaggle/input/russian-sentiment-analysis-test-dataset/rusentiment_random_posts.csv')
df_test = pd.read_csv('/kaggle/input/russian-sentiment-analysis-test-dataset/rusentiment_test.csv')
df = pd.concat([df_preselected, df_random, df_test], axis=0)[['text', 'label']]
df.head()

Unnamed: 0,text,label
0,Прорвём информационную блокаду изнутри.,neutral
1,"Никогда у меня не будет ""одного приложения для...",negative
2,"Кури-и тебя не укусит злая собака, потому что ...",skip
3,"Есть 3 типа людей:\nУмные, которые делают все ...",neutral
4,мегафон чет накрыло,neutral


In [None]:
df['label'].value_counts()

label
neutral     12720
positive     6646
skip         4440
negative     3912
speech       3467
Name: count, dtype: int64

In [None]:
label_repl = {'neutral': 0, 'positive': 1, 'skip': 0, 'negative': 2, 'speech': 0}
df['label'] = df['label'].apply(lambda x: label_repl[x])

test_datasets['rusentiment'] = df
test_datasets['rusentiment'] .head()

Unnamed: 0,text,label
0,Прорвём информационную блокаду изнутри.,0
1,"Никогда у меня не будет ""одного приложения для...",2
2,"Кури-и тебя не укусит злая собака, потому что ...",0
3,"Есть 3 типа людей:\nУмные, которые делают все ...",0
4,мегафон чет накрыло,0


### ru_sentiment_MonoHime

In [None]:
df = pd.read_csv('/kaggle/input/russian-sentiment-analysis-test-dataset/ru_sentiment_monohime.csv', index_col=0)
df.head()

Unnamed: 0,text,sentiment
43956,Развода на деньги нет\nНаблюдаюсь в Лайфклиник...,1
17755,Отель выбрали потому что рядом со стадионом. О...,0
20269,"Вылечили\nГноился с рождения глазик, в поликли...",1
16648,Хорошее расположение.С вокзала дошли пешком.Но...,0
27879,"Отличное месторасположение,прекрасный вид,особ...",1


In [None]:
df['sentiment'].value_counts()

sentiment
1    100792
2     55310
0     54887
Name: count, dtype: int64

In [None]:
df = df.rename(columns={'sentiment': 'label'}).reset_index(drop=True)
test_datasets['ru_sentiment_monohime'] = df
test_datasets['ru_sentiment_monohime'].head()

Unnamed: 0,text,label
0,Развода на деньги нет\nНаблюдаюсь в Лайфклиник...,1
1,Отель выбрали потому что рядом со стадионом. О...,0
2,"Вылечили\nГноился с рождения глазик, в поликли...",1
3,Хорошее расположение.С вокзала дошли пешком.Но...,0
4,"Отличное месторасположение,прекрасный вид,особ...",1


In [None]:
test_datasets['ru_sentiment_monohime'] = sample_dataset(test_datasets['ru_sentiment_monohime'], max_samples)
test_datasets['ru_sentiment_monohime']['label'].value_counts()

  .apply(lambda x: x.sample(round(sample_size*proportions[x.name]), random_state=42))


label
1    28663
2    15729
0    15608
Name: count, dtype: int64

In [None]:
test_datasets

{'tg_messages_mriya':                                                   text  label
 0                       Ну Мрия это ещё не весь Крым☺️      0
 1                                  МРИЯ - это КОСМОС❤️      1
 2    __🧑‍🚀 Поехали!__\n\nНе только в космос, но и в...      1
 3    **Мечты о звёздах начинаются с Земли!**\n\nИ е...      1
 4    **Мечты о звёздах начинаются с Земли!**\n\nИ е...      1
 ..                                                 ...    ...
 329  Мы уверены, что форум [Новые горизонты](https:...      0
 330  Мы уверены, что форум [Новые горизонты](https:...      0
 331  Фрагмент выступления генерального директора ку...      0
 332  Фрагмент выступления генерального директора ку...      0
 333  __В форуме __[__Новые горизонты__](https://t.m...      0
 
 [334 rows x 2 columns],
 'sentiment_of_bank_reviews':                                                     text  label
 0      В Альфа-Банке работает замечательная девушка -...      1
 1      Оформляя рассрочку в м. Видео

## Тестирование

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import (
    accuracy_score,
    f1_score,
    classification_report,
    confusion_matrix
)

MODEL_NAMES = [
    "blanchefort/rubert-base-cased-sentiment",
    "seara/rubert-tiny2-russian-sentiment",
    "cointegrated/rubert-tiny-sentiment-balanced",
    "MonoHime/rubert_conversational_cased_sentiment",
    "sismetanin/rubert-ru-sentiment-rureviews",
    "blanchefort/rubert-base-cased-sentiment-rurewiews",
    "rajora/distilbert-multilingual-sentiment"
    "sismetanin/mbart_ru_sum_gazeta-ru-sentiment-rusentiment",
    "sismetanin/mbart_ru_sum_gazeta-ru-sentiment-rureviews",
    "kartashoffv/vashkontrol-sentiment-rubert",
    "tabularisai/multilingual-sentiment-analysis",
    "sismetanin/sbert-ru-sentiment-rusentiment"
]
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
BATCH_SIZE = 12
MAX_LENGTH = 512

In [22]:
import gc
from tqdm import tqdm


class CustomDataset(Dataset):
    def __init__(self, texts, labels, tokenizer):
        self.encodings = tokenizer(
            texts,
            truncation=True,
            padding=True,
            max_length=MAX_LENGTH,
            return_tensors="pt"
        )
        self.labels = torch.tensor(labels)

    def __getitem__(self, idx):
        return {
            "input_ids": self.encodings["input_ids"][idx],
            "attention_mask": self.encodings["attention_mask"][idx],
            "labels": self.labels[idx]
        }

    def __len__(self):
        return len(self.labels)

def evaluate_model(model, dataloader, model_name, label_consistance=True):
    model.eval()
    predictions, true_labels = [], []

    i = 1
    with torch.no_grad():
        for batch in tqdm(dataloader, desc=f"Evaluating {model_name}"):
            try:
                inputs = {
                    "input_ids": batch["input_ids"].to(DEVICE),
                    "attention_mask": batch["attention_mask"].to(DEVICE)
                }
                outputs = model(**inputs)
                logits = outputs.logits
                if not label_consistance:
                    if model.config.num_labels == 3:
                        logits[:, 1] = torch.max(logits[:, :2], dim=1)[0]
                        logits = logits[:, 1:]
                    elif model.config.num_labels == 5:
                        logits[:, 1] = torch.max(logits[:, 1:], dim=1)[0]
                        logits = torch.column_stack([logits[:, 1], logits[:, 0]])
                
                predictions.extend(torch.argmax(logits, dim=1).cpu().tolist())
                true_labels.extend(batch["labels"].cpu().tolist())

                if i % 25 == 0:
                    del inputs
                    del outputs
                    gc.collect()

                i += 1
            finally:
                continue

    return predictions, true_labels

In [None]:
import time

results = []
for model_name in MODEL_NAMES:
    try:
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        model = AutoModelForSequenceClassification.from_pretrained(model_name).to(DEVICE)
    except Exception as e:
        print(f"Ошибка загрузки модели {model_name}: {str(e)}")
        raise e

    time.sleep(30)
    for dataset_name, df in list(test_datasets.items()):
        try:
            texts = df["text"].tolist()
            labels = df["label"].tolist()

            labels_are_consistent = model.config.num_labels == 2
            labels = [label if label == 0 else label - 1 for label in labels]

            dataset = CustomDataset(texts, labels, tokenizer)
            dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, pin_memory=True)

            preds, truth = evaluate_model(model, dataloader, model_name, labels_are_consistent)
        
            metrics = {
                "model": model_name,
                "dataset": dataset_name,
                "accuracy": accuracy_score(truth, preds),
                "f1": f1_score(truth, preds, zero_division=0.),
                "f1_weighted": f1_score(truth, preds, average="weighted", zero_division=0.),
                "f1_macro": f1_score(truth, preds, average="macro", zero_division=0.),
                "classification_report": classification_report(truth, preds, zero_division=0.),
                "confusion_matrix": confusion_matrix(truth, preds).tolist()
            }
            print(metrics)
            results.append(metrics)
        finally:
            results_df = pd.DataFrame(results)
            results_df.to_csv("/kaggle/working/model_evaluation_results5.csv", index=False)
            results_df.to_excel("/kaggle/working/model_evaluation_results5.xlsx", index=False)


Evaluating tabularisai/multilingual-sentiment-analysis: 100%|██████████| 28/28 [00:05<00:00,  4.81it/s]


{'model': 'tabularisai/multilingual-sentiment-analysis', 'dataset': 'tg_messages_mriya', 'accuracy': 0.8532934131736527, 'f1': 0.14035087719298248, 'f1_weighted': 0.9034677651330126, 'f1_macro': 0.5300772389238235, 'classification_report': '              precision    recall  f1-score   support\n\n           0       0.99      0.86      0.92       327\n           1       0.08      0.57      0.14         7\n\n    accuracy                           0.85       334\n   macro avg       0.53      0.72      0.53       334\nweighted avg       0.97      0.85      0.90       334\n', 'confusion_matrix': [[281, 46], [3, 4]]}


Evaluating tabularisai/multilingual-sentiment-analysis: 100%|██████████| 1167/1167 [04:29<00:00,  4.32it/s]


{'model': 'tabularisai/multilingual-sentiment-analysis', 'dataset': 'sentiment_of_bank_reviews', 'accuracy': 0.709622115865419, 'f1': 0.7317008778298462, 'f1_weighted': 0.7076405783434305, 'f1_macro': 0.707642296936251, 'classification_report': '              precision    recall  f1-score   support\n\n           0       0.75      0.63      0.68      7000\n           1       0.68      0.79      0.73      6999\n\n    accuracy                           0.71     13999\n   macro avg       0.72      0.71      0.71     13999\nweighted avg       0.72      0.71      0.71     13999\n', 'confusion_matrix': [[4391, 2609], [1456, 5543]]}


Evaluating tabularisai/multilingual-sentiment-analysis: 100%|██████████| 5000/5000 [05:07<00:00, 16.23it/s]


{'model': 'tabularisai/multilingual-sentiment-analysis', 'dataset': 'rutweetcorp', 'accuracy': 0.5848333333333333, 'f1': 0.5160099479288102, 'f1_weighted': 0.5770584157794634, 'f1_macro': 0.5762650568453951, 'classification_report': '              precision    recall  f1-score   support\n\n           0       0.57      0.72      0.64     30395\n           1       0.61      0.45      0.52     29605\n\n    accuracy                           0.58     60000\n   macro avg       0.59      0.58      0.58     60000\nweighted avg       0.59      0.58      0.58     60000\n', 'confusion_matrix': [[21811, 8584], [16326, 13279]]}


Evaluating tabularisai/multilingual-sentiment-analysis: 100%|██████████| 5000/5000 [19:13<00:00,  4.33it/s]


{'model': 'tabularisai/multilingual-sentiment-analysis', 'dataset': 'rureviews', 'accuracy': 0.7442333333333333, 'f1': 0.6258168340973373, 'f1_weighted': 0.7457504941327817, 'f1_macro': 0.7157670791239206, 'classification_report': '              precision    recall  f1-score   support\n\n           0       0.82      0.80      0.81     40000\n           1       0.61      0.64      0.63     20000\n\n    accuracy                           0.74     60000\n   macro avg       0.71      0.72      0.72     60000\nweighted avg       0.75      0.74      0.75     60000\n', 'confusion_matrix': [[31821, 8179], [7167, 12833]]}


Evaluating tabularisai/multilingual-sentiment-analysis: 100%|██████████| 2599/2599 [09:57<00:00,  4.35it/s]


{'model': 'tabularisai/multilingual-sentiment-analysis', 'dataset': 'rusentiment', 'accuracy': 0.680647747314414, 'f1': 0.29603449494592493, 'f1_weighted': 0.7310782388791437, 'f1_macro': 0.5447573922171716, 'classification_report': '              precision    recall  f1-score   support\n\n           0       0.91      0.70      0.79     27273\n           1       0.20      0.54      0.30      3912\n\n    accuracy                           0.68     31185\n   macro avg       0.56      0.62      0.54     31185\nweighted avg       0.82      0.68      0.73     31185\n', 'confusion_matrix': [[19132, 8141], [1818, 2094]]}


## Резульаты тестирования

In [1]:
import pandas as pd


df = pd.read_csv('model_evaluation_results.csv')
df.head()

Unnamed: 0,model,dataset,accuracy,f1,f1_weighted,f1_macro,classification_report,confusion_matrix
0,blanchefort/rubert-base-cased-sentiment,tg_messages_mriya,0.428144,0.073394,0.474088,0.360002,precision recall f1-score ...,"[[229, 98], [3, 4]]"
1,blanchefort/rubert-base-cased-sentiment,sentiment_of_bank_reviews,0.188156,0.0,0.161686,0.107783,precision recall f1-score ...,"[[7000, 0], [6999, 0]]"
2,blanchefort/rubert-base-cased-sentiment,rutweetcorp,0.004435,0.0,0.004476,0.002945,precision recall f1-score ...,"[[114911, 0], [111923, 0]]"
3,blanchefort/rubert-base-cased-sentiment,rureviews,0.7646,0.758498,0.764956,0.758498,precision recall f1-score ...,"[[31552, 8448], [2380, 17620]]"
4,blanchefort/rubert-base-cased-sentiment,rusentiment,0.652173,0.577716,0.667291,0.625903,precision recall f1-score ...,"[[23009, 4264], [591, 3321]]"


In [None]:
# Лучшие модели по средней метрике f1 по всем датасетам
df.groupby('model', as_index=False)[['accuracy', 'f1', 'f1_weighted', 'f1_macro']].mean().sort_values('f1', ascending=False).reset_index(drop=True)

Unnamed: 0,model,accuracy,f1,f1_weighted,f1_macro
0,sismetanin/sbert-ru-sentiment-rusentiment,0.857854,0.704924,0.85485,0.798078
1,sismetanin/mbart_ru_sum_gazeta-ru-sentiment-ru...,0.838541,0.662536,0.834842,0.769567
2,seara/rubert-tiny2-russian-sentiment,0.720111,0.649739,0.722711,0.692811
3,blanchefort/rubert-base-cased-sentiment-rurewiews,0.729443,0.562702,0.749961,0.657095
4,kartashoffv/vashkontrol-sentiment-rubert,0.761192,0.491615,0.761985,0.652517
5,tabularisai/multilingual-sentiment-analysis,0.718583,0.474826,0.734957,0.625491
6,blanchefort/rubert-base-cased-sentiment,0.441121,0.333462,0.442384,0.399416
7,MonoHime/rubert_conversational_cased_sentiment,0.441616,0.274251,0.385541,0.343955
8,rajora/distilbert-multilingual-sentiment,0.574856,0.107546,0.555197,0.408155
9,cointegrated/rubert-tiny-sentiment-balanced,0.177766,0.068005,0.177878,0.157889


In [None]:
# Лучшие модели на датасете сообщений о МРИЯ с телеграм по метрике f1
df[df['dataset'] == 'tg_messages_mriya'].sort_values('f1', ascending=False).drop('dataset', axis=1).reset_index(drop=True)

Unnamed: 0,model,accuracy,f1,f1_weighted,f1_macro,classification_report,confusion_matrix
0,sismetanin/sbert-ru-sentiment-rusentiment,0.979042,0.533333,0.979724,0.761307,precision recall f1-score ...,"[[323, 4], [3, 4]]"
1,sismetanin/mbart_ru_sum_gazeta-ru-sentiment-ru...,0.976048,0.428571,0.976048,0.70817,precision recall f1-score ...,"[[323, 4], [4, 3]]"
2,seara/rubert-tiny2-russian-sentiment,0.610778,0.333333,0.617717,0.51975,precision recall f1-score ...,"[[324, 3], [5, 2]]"
3,kartashoffv/vashkontrol-sentiment-rubert,0.916168,0.222222,0.940324,0.588959,precision recall f1-score ...,"[[302, 25], [3, 4]]"
4,tabularisai/multilingual-sentiment-analysis,0.853293,0.140351,0.903468,0.530077,precision recall f1-score ...,"[[281, 46], [3, 4]]"
5,blanchefort/rubert-base-cased-sentiment-rurewiews,0.796407,0.128205,0.86889,0.506475,precision recall f1-score ...,"[[261, 66], [2, 5]]"
6,blanchefort/rubert-base-cased-sentiment,0.428144,0.073394,0.474088,0.360002,precision recall f1-score ...,"[[229, 98], [3, 4]]"
7,MonoHime/rubert_conversational_cased_sentiment,0.838323,0.035714,0.893404,0.473739,precision recall f1-score ...,"[[279, 48], [6, 1]]"
8,rajora/distilbert-multilingual-sentiment,0.772455,0.025641,0.853465,0.448414,precision recall f1-score ...,"[[257, 70], [6, 1]]"
9,sismetanin/mbart_ru_sum_gazeta-ru-sentiment-ru...,0.194611,0.014652,0.312609,0.16682,precision recall f1-score ...,"[[63, 264], [5, 2]]"


In [None]:
# Лучшие модели на датасете RuSentiment по метрике f1
df[df['dataset'] == 'rusentiment'].sort_values('f1', ascending=False).drop('dataset', axis=1).reset_index(drop=True)

Unnamed: 0,model,accuracy,f1,f1_weighted,f1_macro,classification_report,confusion_matrix
0,sismetanin/sbert-ru-sentiment-rusentiment,0.979157,0.918321,0.979306,0.953188,precision recall f1-score ...,"[[26881, 392], [258, 3654]]"
1,sismetanin/mbart_ru_sum_gazeta-ru-sentiment-ru...,0.970146,0.884045,0.970471,0.933456,precision recall f1-score ...,"[[26705, 568], [363, 3549]]"
2,seara/rubert-tiny2-russian-sentiment,0.691454,0.584467,0.700646,0.650767,precision recall f1-score ...,"[[25617, 1656], [1613, 2299]]"
3,blanchefort/rubert-base-cased-sentiment,0.652173,0.577716,0.667291,0.625903,precision recall f1-score ...,"[[23009, 4264], [591, 3321]]"
4,blanchefort/rubert-base-cased-sentiment-rurewiews,0.628571,0.369358,0.690678,0.553063,precision recall f1-score ...,"[[16210, 11063], [520, 3392]]"
5,kartashoffv/vashkontrol-sentiment-rubert,0.821773,0.320704,0.825084,0.609068,precision recall f1-score ...,"[[24315, 2958], [2600, 1312]]"
6,tabularisai/multilingual-sentiment-analysis,0.680648,0.296034,0.731078,0.544757,precision recall f1-score ...,"[[19132, 8141], [1818, 2094]]"
7,MonoHime/rubert_conversational_cased_sentiment,0.135514,0.223777,0.049588,0.12419,precision recall f1-score ...,"[[340, 26933], [26, 3886]]"
8,rajora/distilbert-multilingual-sentiment,0.568286,0.122874,0.639567,0.418278,precision recall f1-score ...,"[[16779, 10494], [2969, 943]]"
9,sismetanin/rubert-ru-sentiment-rureviews,0.45461,0.054691,0.54624,0.335719,precision recall f1-score ...,"[[13685, 13588], [3420, 492]]"


In [None]:
# Лучшие модели на каждом датасете по f1
df.loc[df.groupby('dataset')['f1'].idxmax(), ['dataset', 'model', 'f1']].reset_index(drop=True)

Unnamed: 0,dataset,model,f1
0,ru_sentiment_monohime,sismetanin/mbart_ru_sum_gazeta-ru-sentiment-ru...,0.633516
1,rureviews,seara/rubert-tiny2-russian-sentiment,0.762741
2,rusentiment,sismetanin/sbert-ru-sentiment-rusentiment,0.918321
3,rutweetcorp,sismetanin/sbert-ru-sentiment-rusentiment,0.837019
4,sentiment_of_bank_reviews,seara/rubert-tiny2-russian-sentiment,0.836662
5,tg_messages_mriya,sismetanin/sbert-ru-sentiment-rusentiment,0.533333


In [None]:
# Вводим усреднённую мертику как среднее всех четырёх
df['summetric'] = df[['accuracy', 'f1', 'f1_weighted', 'f1_macro']].sum(axis=1) / 4
df

Unnamed: 0,model,dataset,accuracy,f1,f1_weighted,f1_macro,classification_report,confusion_matrix,summetric
0,blanchefort/rubert-base-cased-sentiment,tg_messages_mriya,0.428144,0.073394,0.474088,0.360002,precision recall f1-score ...,"[[229, 98], [3, 4]]",0.333907
1,blanchefort/rubert-base-cased-sentiment,sentiment_of_bank_reviews,0.188156,0.000000,0.161686,0.107783,precision recall f1-score ...,"[[7000, 0], [6999, 0]]",0.114406
2,blanchefort/rubert-base-cased-sentiment,rutweetcorp,0.004435,0.000000,0.004476,0.002945,precision recall f1-score ...,"[[114911, 0], [111923, 0]]",0.002964
3,blanchefort/rubert-base-cased-sentiment,rureviews,0.764600,0.758498,0.764956,0.758498,precision recall f1-score ...,"[[31552, 8448], [2380, 17620]]",0.761638
4,blanchefort/rubert-base-cased-sentiment,rusentiment,0.652173,0.577716,0.667291,0.625903,precision recall f1-score ...,"[[23009, 4264], [591, 3321]]",0.630771
...,...,...,...,...,...,...,...,...,...
67,sismetanin/sbert-ru-sentiment-rusentiment,sentiment_of_bank_reviews,0.762126,0.705934,0.753114,0.753111,precision recall f1-score ...,"[[6672, 328], [3002, 3997]]",0.743571
68,sismetanin/sbert-ru-sentiment-rusentiment,rutweetcorp,0.854067,0.837019,0.852656,0.852452,precision recall f1-score ...,"[[28760, 1635], [7121, 22484]]",0.849048
69,sismetanin/sbert-ru-sentiment-rusentiment,rureviews,0.763200,0.615668,0.757813,0.722276,precision recall f1-score ...,"[[34412, 5588], [8620, 11380]]",0.714739
70,sismetanin/sbert-ru-sentiment-rusentiment,rusentiment,0.979157,0.918321,0.979306,0.953188,precision recall f1-score ...,"[[26881, 392], [258, 3654]]",0.957493


In [None]:
# Лучшие модели по среднему усреднённой метрики на всех датасетах
df.groupby('model', as_index=False)['summetric'].mean().sort_values('summetric', ascending=False).reset_index(drop=True)

Unnamed: 0,model,summetric
0,sismetanin/sbert-ru-sentiment-rusentiment,0.803927
1,sismetanin/mbart_ru_sum_gazeta-ru-sentiment-ru...,0.776372
2,seara/rubert-tiny2-russian-sentiment,0.696343
3,blanchefort/rubert-base-cased-sentiment-rurewiews,0.6748
4,kartashoffv/vashkontrol-sentiment-rubert,0.666827
5,tabularisai/multilingual-sentiment-analysis,0.638464
6,rajora/distilbert-multilingual-sentiment,0.411438
7,blanchefort/rubert-base-cased-sentiment,0.404096
8,MonoHime/rubert_conversational_cased_sentiment,0.361341
9,sismetanin/rubert-ru-sentiment-rureviews,0.244553


In [None]:
# Лучшие модели на каждом датасете по усреднённой метрике
df.loc[df.groupby('dataset')['summetric'].idxmax(), ['dataset', 'model', 'summetric']].reset_index(drop=True)

Unnamed: 0,dataset,model,summetric
0,ru_sentiment_monohime,sismetanin/mbart_ru_sum_gazeta-ru-sentiment-ru...,0.750369
1,rureviews,seara/rubert-tiny2-russian-sentiment,0.784561
2,rusentiment,sismetanin/sbert-ru-sentiment-rusentiment,0.957493
3,rutweetcorp,sismetanin/sbert-ru-sentiment-rusentiment,0.849048
4,sentiment_of_bank_reviews,seara/rubert-tiny2-russian-sentiment,0.819138
5,tg_messages_mriya,sismetanin/sbert-ru-sentiment-rusentiment,0.813352
