In [7]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
import numpy as np
from sklearn.metrics import f1_score

In [25]:
def load_predictions(dataset_name, partition, fold_number=1):
    path = './Saved_Predict_and_Proba/Folds/' + dataset_name.upper() + '/F' + str(fold_number)  + '/pred_' + partition +'.csv'
    table_pred = pd.read_csv(path)
    if dataset_name == 'zw':
        label = table_pred['norm']
        preds = table_pred.drop('norm', axis=1)
    else: 
        label = table_pred['class']
        preds = table_pred.drop('class', axis=1)
    preds = preds.drop('Unnamed: 0', axis=1)
    return label, preds

def load_datasets(dataset_name, fold_number):
    label_train, preds_train = load_predictions(dataset_name, 'train', fold_number)
    label_val, preds_val = load_predictions(dataset_name, 'val', fold_number)
    label_test, preds_test = load_predictions(dataset_name, 'test', fold_number)
    return label_train, preds_train, label_test, preds_test, label_val, preds_val

def compile_results(dataset_name):
    algorithms_list = ['SVM', 'MLP', 'KNN', 'RF', 'EXTRA', 'CNN', 'LR', 'NB']
    fe_list = ['CV', 'TFIDF', 'W2V', 'GLOVE', 'FAST']
    results_f1_val = np.zeros((5, len(algorithms_list), len(fe_list)))
    results_f1_test = np.zeros((5, len(algorithms_list), len(fe_list)))

    for fold in range(1, 6):
        _, _, label_test, preds_test, label_val, preds_val = load_datasets(dataset_name, fold)

        for idx_alg, alg in enumerate(algorithms_list):
            for idx_fe, fe in enumerate(fe_list):
                #VAL
                y_pred_val = preds_val.filter(regex=alg+'-'+fe)
                results_f1_val[fold-1, idx_alg, idx_fe] = f1_score(label_val, y_pred_val,average='macro')           
                #TESTE
                y_pred_test = preds_test.filter(regex=alg+'-'+fe)
                results_f1_test[fold-1,idx_alg, idx_fe] = f1_score(label_test, y_pred_test,average='macro')
    return results_f1_val, results_f1_test

# Getting TD results

In [26]:
dataset_name = 'TD'
results_f1_val, results_f1_test = compile_results('TD')
results_df_test = pd.DataFrame(results_f1_test.mean(axis=0), columns=fe_list, index=algorithms_list)
results_df_val = pd.DataFrame(results_f1_val.mean(axis=0), columns=fe_list, index=algorithms_list)

# Getting ZW results

In [27]:
dataset_name = 'ZW'
results_f1_val, results_f1_test = compile_results('TD')
results_df_test = pd.DataFrame(results_f1_test.mean(axis=0), columns=fe_list, index=algorithms_list)
results_df_val = pd.DataFrame(results_f1_val.mean(axis=0), columns=fe_list, index=algorithms_list)

Unnamed: 0,CV,TFIDF,W2V,GLOVE,FAST
SVM,0.714897,0.697909,0.554967,0.508617,0.548171
MLP,0.705022,0.699068,0.595587,0.522664,0.614495
KNN,0.637151,0.406986,0.519574,0.502654,0.478859
RF,0.713962,0.678404,0.495706,0.500067,0.493595
EXTRA,0.704401,0.663261,0.47142,0.495077,0.463997
CNN,0.708612,0.596139,0.688834,0.651194,0.694465
LR,0.71369,0.708053,0.550345,0.483981,0.551479
NB,0.704162,0.65803,0.534599,0.48284,0.582332


# Getting union results (TD+ZW)

# Getting ZW Results