In [None]:
from pathlib import Path
import numpy as np
import pandas as pd

path = 'results/'
basepath = Path(path)
files_in_basepath = basepath.iterdir()

dic = {}

for item in files_in_basepath:
  if item.is_file():
    
    name = item.name.split('_')

    tam = len(name)

    percent = name[tam-1]
    
    id = 1
    
    if name[0] == 'fnc0' or name[0] == 'fnc1' or name[0] == 'fnc2':
      dataset = name[0] + '-' + name[1]
      id=id+1
    else:
      dataset = name[0]

    if name[id] == 'BoW-term-frequency-IDF':
      preprocessing = 'Bow-TFIDF'
    elif name[id] == 'BoW-term-frequency':
      preprocessing = 'Bow-TF'
    elif name[id] == 'BoW-binary':
      preprocessing = 'Bow-Binary'
    else:
      preprocessing = name[id]
  

    df = pd.read_csv(path + item.name, sep=';')

    best_f1_mean = max(df['f1-score-mean'])

    best_f1_std = float(df[df['f1-score-mean'] == best_f1_mean]['f1-score-std'].iloc[0])

    best_pre_mean = float(df[df['f1-score-mean'] == best_f1_mean]['precision-mean'].iloc[0])

    best_pre_std = float(df[df['f1-score-mean'] == best_f1_mean]['precision-std'].iloc[0])

    brest_rev_mean = float(df[df['f1-score-mean'] == best_f1_mean]['recall-mean'].iloc[0])

    brest_rev_std = float(df[df['f1-score-mean'] == best_f1_mean]['recall-std'].iloc[0])

    best_aucroc_mean = max(df['auc_roc-mean'])

    best_aucroc_std = float(df[df['auc_roc-mean'] == best_aucroc_mean]['auc_roc-std'].iloc[0])

    best_acuracia_mean = max(df['accuracy-mean'])

    best_acuracia_std = float(df[df['accuracy-mean'] == best_acuracia_mean]['accuracy-std'].iloc[0])


    if dataset not in dic:
      dic[dataset] = {}
      dic[dataset][preprocessing] = pd.DataFrame(columns=['percent', 'precision-mean', 'precision-std',	'recall-mean', 'recall-std','f1-score-mean','f1-score-std',	'auc_roc-mean', 'auc_roc-std', 'accuracy-mean', 'accuracy-std'])
    elif preprocessing not in dic[dataset]:
      dic[dataset][preprocessing] = pd.DataFrame(columns=['percent', 'precision-mean', 'precision-std',	'recall-mean', 'recall-std','f1-score-mean','f1-score-std',	'auc_roc-mean', 'auc_roc-std', 'accuracy-mean', 'accuracy-std'])

    df_bests = dic[dataset][preprocessing]
    df_bests = df_bests.append({'percent' : percent.replace('.csv','_%'),
                                'precision-mean' : best_pre_mean, 
                                'precision-std' : best_pre_std, 
                                'recall-mean' : brest_rev_mean, 
                                'recall-std' : brest_rev_std, 
                                'f1-score-mean' : best_f1_mean, 
                                'f1-score-std' : best_f1_std, 
                                'auc_roc-mean' : best_aucroc_mean, 
                                'auc_roc-std' : best_aucroc_std, 
                                'accuracy-mean' : best_acuracia_mean,
                                'accuracy-std' : best_acuracia_std} , 
                               ignore_index=True)
    dic[dataset][preprocessing] = df_bests
    

In [None]:
def return_df(dataset, metric):
  df = pd.DataFrame(columns=['percent'] + list(dic[dataset].keys()))
  percents = ['3_%','5_%','7_%','10_%']
  df['percent'] = percents

  for prepro in dic[dataset].keys():
    df_aux = dic[dataset][prepro]
    values_list = []
    for percent in percents:
      values_list.append(df_aux[df_aux['percent'] == percent][metric].iloc[0])
    
    df[prepro] = values_list

  return df

In [None]:
metrics = ['f1-score-mean', 'f1-score-std', 'auc_roc-mean', 'auc_roc-std']
datasets = ['fakebr', 'fcn', 'fnn', 'fnc0-font', 'fnc1-font', 'fnc2-font']

In [None]:
for dataset in datasets:
  for metric in metrics:
    return_df(dataset, metric).T.to_csv(dataset + '_' + metric + '.csv', sep=';')

In [None]:
metric = 'f1-score-mean'

In [None]:
return_df('fcn', metric).T

Unnamed: 0,0,1,2,3
percent,3_%,5_%,7_%,10_%
Bow-Binary,0.590994,0.605292,0.609554,0.614134
Bow-TF,0.556182,0.582053,0.590989,0.595872
DBERTML,0.426008,0.567771,0.639536,0.706079
LIWC,0.556652,0.587139,0.599918,0.613213
Bow-TFIDF,0.252036,0.263504,0.292758,0.306639
MAE-FK,0.791086,0.762311,0.758954,0.755764
Concat-VAE-Density,0.834927,0.848999,0.856835,0.86614
MVAE-FK,0.863159,0.872674,0.876779,0.875571
MVAE-LIWC,0.829789,0.810797,0.812261,0.824411


In [None]:
return_df('fnn', metric).T

Unnamed: 0,0,1,2,3
percent,3_%,5_%,7_%,10_%
Bow-TF,0.327464,0.344203,0.349203,0.3534
Bow-Binary,0.355206,0.359653,0.360895,0.362079
LIWC,0.379059,0.382344,0.384551,0.386056
DBERTML,0.320759,0.345414,0.353765,0.36288
Bow-TFIDF,0.138188,0.147678,0.173259,0.226588
MAE-Triple,0.389493,0.388108,0.390113,0.394862
Concat-AE-Density,0.373424,0.391562,0.399573,0.401715
VAE,0.388422,0.387965,0.391128,0.39499
Density,0.374862,0.384254,0.383687,0.378119


In [None]:
return_df('fnn', metric).T

Unnamed: 0,0,1,2,3
percent,3_%,5_%,7_%,10_%
Bow-TF,0.327464,0.344203,0.349203,0.3534
Bow-Binary,0.355206,0.359653,0.360895,0.362079
LIWC,0.379059,0.382344,0.384551,0.386056
DBERTML,0.320759,0.345414,0.353765,0.36288
Bow-TFIDF,0.138188,0.147678,0.173259,0.226588
MAE-Triple,0.389493,0.388108,0.390113,0.394862
Concat-AE-Density,0.373424,0.391562,0.399573,0.401715
VAE,0.388422,0.387965,0.391128,0.39499
Density,0.374862,0.384254,0.383687,0.378119


In [None]:
return_df('fakebr', metric).T

Unnamed: 0,0,1,2,3
percent,3_%,5_%,7_%,10_%
DBERTML,0.573674,0.601598,0.617782,0.628042
Bow-Binary,0.617413,0.618455,0.619609,0.621113
Bow-TFIDF,0.201775,0.264095,0.215712,0.271078
Bow-TF,0.599661,0.60342,0.606575,0.609963
LIWC,0.597282,0.607415,0.614343,0.619874
Concat-DBBERT-Density,0.640366,0.647586,0.652834,0.653496
MVAE-FK,0.648719,0.648096,0.653225,0.651895
MVAE-LIWC,0.645236,0.646974,0.647395,0.649342
MVAE-Triple,0.645946,0.646177,0.64887,0.649905


In [None]:
return_df('fnc0-font', metric).T

Unnamed: 0,0,1,2,3
percent,3_%,5_%,7_%,10_%
Bow-TF,0.632734,0.634931,0.636704,0.638223
LIWC,0.63274,0.636508,0.638102,0.640178
Bow-TFIDF,0.634641,0.628951,0.572334,0.6239
DBERTML,0.896606,0.920533,0.928676,0.934176
Bow-Binary,0.716309,0.714904,0.715015,0.712655
AE,0.889514,0.918285,0.928292,0.933076
MAE-LIWC,0.857018,0.805957,0.766985,0.742166
MVAE-FK,0.952287,0.945088,0.94467,0.945446
Density,0.66096,0.656161,0.662123,0.673194


In [None]:
return_df('fnc1-font', metric).T

Unnamed: 0,0,1,2,3
percent,3_%,5_%,7_%,10_%
LIWC,0.635305,0.639707,0.641368,0.642767
Bow-Binary,0.647138,0.646511,0.646133,0.645737
Bow-TF,0.638165,0.639191,0.639689,0.640044
DBERTML,0.701687,0.709473,0.706771,0.709479
Bow-TFIDF,0.634318,0.607593,0.614448,0.61555
MVAE-FK,0.725245,0.736864,0.729825,0.721645
MAE-LIWC,0.665963,0.646235,0.643982,0.64576
Concat-VAE-Density,0.748369,0.751585,0.75108,0.753196
MAE-Triple,0.700729,0.702381,0.684456,0.68485


In [None]:
return_df('fnc2-font', metric).T

Unnamed: 0,0,1,2,3
percent,3_%,5_%,7_%,10_%
DBERTML,0.669882,0.678936,0.680665,0.679277
Bow-Binary,0.645522,0.644267,0.643634,0.643163
Bow-TFIDF,0.622822,0.611702,0.592016,0.580921
LIWC,0.632807,0.639582,0.64044,0.641047
Bow-TF,0.632047,0.63732,0.637737,0.638733
Density,0.645396,0.666036,0.68092,0.655832
AE,0.671385,0.671214,0.668139,0.670463
MAE-LIWC,0.657654,0.655682,0.648995,0.646498
MAE-FK,0.700302,0.697565,0.698256,0.694206
