### Create results files for each fold of every for each model

In [3]:
from statistics import stdev
from sklearn.metrics import precision_recall_fscore_support, accuracy_score
from os import walk, makedirs
import pandas as pd
import numpy as np

predictions_dir = '../predictions'
# k_folds_dirs = next(walk(predictions_dir))[1]
pretrained_dirnames =['bert-base-multilingual-cased-sentiment-multilingual',
                      'dehatebert-mono-french',
                      'french_xlm_xnli',
                      'xlm-roberta-base-sentiment-multilingual']

result_cols=["model", "traindata", "accuracy", "std_accuracy", "precision", "std_precision", "recall", "std_recall", "f1", "std_f1"]
dfs ={'bert-base-multilingual-cased-sentiment-multilingual':pd.DataFrame(columns=result_cols),
      'dehatebert-mono-french':pd.DataFrame(columns=result_cols),
      'french_xlm_xnli':pd.DataFrame(columns=result_cols),
      'xlm-roberta-base-sentiment-multilingual':pd.DataFrame(columns=result_cols)}

for pretrained in pretrained_dirnames:
  df = dfs.get(pretrained)
  result_files = next(walk(f"{predictions_dir}/k_is_0/{pretrained}/"))[2]
  for file in result_files: # 'text', 'class', 'predicted'
    prec = list()
    recall = list()
    f1 = list()
    accuracy = list()
    for k in range(0,3):
      results = pd.read_csv(f"{predictions_dir}/k_is_{k}/{pretrained}/{file}").sample(frac=1)
      prec_curr, recall_curr, f1_curr, _ = precision_recall_fscore_support(results["class"], results["predicted"], average='macro')
      accuracy_curr = accuracy_score(results["class"], results["predicted"])
      prec.append(prec_curr)
      recall.append(recall_curr)
      f1.append(f1_curr)
      accuracy.append(accuracy_curr)
    
    # add results to dict
    df.loc[len(df.index)] = [pretrained, file,
                            np.mean(accuracy), stdev(accuracy),
                            np.mean(prec), stdev(prec),
                            np.mean(recall), stdev(recall),
                            np.mean(f1), stdev(f1)]

In [4]:
results_dir="../results"
for pretrained in pretrained_dirnames:
  dfs[pretrained].to_csv(f"{results_dir}/{pretrained}", index=False)