In [1]:
from sklearn.metrics import f1_score, balanced_accuracy_score, precision_score, recall_score, confusion_matrix
import torch
import pickle
import pandas as pd

import langid
from tqdm.auto import tqdm
tqdm.pandas()

from data.dataset import get_dataloaders

In [2]:
data_path = "../data/"

In [3]:
with open(data_path + 'e5-preds.pkl', 'rb') as f:
    res_vals = pickle.load(f)

In [4]:
dataset = get_dataloaders(model_name=None, data_path=data_path, use_cache=False, batch_size=64, debug=1, comment_column='kommentar_original')
df_all = pd.DataFrame(dataset['test']['text'], columns=['text'])

In [5]:
df_all['labels'] = res_vals['labels']
df_all['predictions'] = res_vals['predictions']
df_all['neg_prob'] = res_vals['probabilities'][:,0]
df_all['pos_prob'] = res_vals['probabilities'][:,1]

In [6]:
langid.set_languages(['fr', 'de'])
df_all['language'] = df_all["text"].progress_apply(lambda x :langid.classify(x)[0])
df_de = df_all[df_all['language'] == 'de']
df_fr = df_all[df_all['language'] == 'fr']

  0%|          | 0/41746 [00:00<?, ?it/s]

In [7]:
def calculate_metrics(model_name, predictions, labels):
    scores = ['f1', 'precision', 'recall']
    params = ['micro', 'macro', 'weighted', 'binary']
    columns = ['model','accuracy'] + [score + '_' + param for score in scores for param in params]
    df = pd.DataFrame(0, index = [0], columns=columns + ['f1_binary_neg'])
    df.iloc[0, 0] = model_name
    df.iloc[0, 1] = balanced_accuracy_score(predictions, labels)
    i = 2
    for param in params:
        df.loc[0, f'{scores[0]}_{param}'] = f1_score(predictions, labels, average=param)
        df.loc[0, f'{scores[1]}_{param}'] = precision_score(predictions, labels, average=param)
        df.loc[0, f'{scores[2]}_{param}'] = recall_score(predictions, labels, average=param)
        i=i+3

    df.loc[0, f'{scores[0]}_binary_neg'] = f1_score(1 - predictions, 1 - labels, average='binary')

    return df

In [10]:
def get_general_metrics(df_list, name_list, confidence=0.5):

    res = []
    for df, name in zip(df_list, name_list):
        df = df[df[['neg_prob', 'pos_prob']].max(axis=1) >= confidence]
        res.append(calculate_metrics(name, df['predictions'], df['labels']))
    return pd.concat(res)

get_general_metrics([df_all, df_de, df_fr], ['all', 'de', 'fr'])

Unnamed: 0,model,accuracy,f1_micro,f1_macro,f1_weighted,f1_binary,precision_micro,precision_macro,precision_weighted,precision_binary,recall_micro,recall_macro,recall_weighted,recall_binary,f1_binary_neg
0,all,0.74929,0.839985,0.777048,0.827131,0.658591,0.839985,0.846628,0.842238,0.85701,0.839985,0.74929,0.839985,0.534778,0.895504
0,de,0.741892,0.870942,0.776792,0.859499,0.631826,0.870942,0.847557,0.865874,0.815452,0.870942,0.741892,0.870942,0.515699,0.921758
0,fr,0.751793,0.756988,0.745264,0.746724,0.690615,0.756988,0.800212,0.797342,0.907642,0.756988,0.751793,0.756988,0.557347,0.799913
