In [1]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
import numpy as np
from sklearn.metrics import f1_score

In [2]:
def load_predictions(dataset_name, partition, fold_number=1):
    path = './Saved_Predict_and_Proba/Folds/' + dataset_name.upper() + '/F' + str(fold_number)  + '/pred_' + partition +'.csv'
    table_pred = pd.read_csv(path)
    if dataset_name.lower() == 'zw':
        label = table_pred['norm']
        preds = table_pred.drop('norm', axis=1)
    else: 
        label = table_pred['class']
        preds = table_pred.drop('class', axis=1)
    preds = preds.drop('Unnamed: 0', axis=1)
    return label, preds

def load_datasets(dataset_name, fold_number):
    label_train, preds_train = load_predictions(dataset_name, 'train', fold_number)
    label_val, preds_val = load_predictions(dataset_name, 'val', fold_number)
    label_test, preds_test = load_predictions(dataset_name, 'test', fold_number)
    return label_train, preds_train, label_test, preds_test, label_val, preds_val

def compile_results(dataset_name):
    algorithms_list = ['SVM', 'MLP', 'KNN', 'RF', 'EXTRA', 'CNN', 'LR', 'NB']
    fe_list = ['CV', 'TFIDF', 'W2V', 'GLOVE', 'FAST']
    results_f1_val = np.zeros((5, len(algorithms_list), len(fe_list)))
    results_f1_test = np.zeros((5, len(algorithms_list), len(fe_list)))

    for fold in range(1, 6):
        _, _, label_test, preds_test, label_val, preds_val = load_datasets(dataset_name, fold)

        for idx_alg, alg in enumerate(algorithms_list):
            for idx_fe, fe in enumerate(fe_list):
                #VAL
                y_pred_val = preds_val.filter(regex=alg+'-'+fe)
                results_f1_val[fold-1, idx_alg, idx_fe] = f1_score(label_val, y_pred_val,average='macro')           
                #TESTE
                y_pred_test = preds_test.filter(regex=alg+'-'+fe)
                results_f1_test[fold-1,idx_alg, idx_fe] = f1_score(label_test, y_pred_test,average='macro')
    return results_f1_val, results_f1_test

algorithms_list = ['SVM', 'MLP', 'KNN', 'RF', 'EXTRA', 'CNN', 'LR', 'NB']
fe_list = ['CV', 'TFIDF', 'W2V', 'GLOVE', 'FAST']

# Getting TD results

In [21]:
dataset_name = 'TD'
results_f1_val, results_f1_test = compile_results(dataset_name)
results_df_test_TD = pd.DataFrame(results_f1_test.mean(axis=0), columns=fe_list, index=algorithms_list).round(3)
results_df_val_TD = pd.DataFrame(results_f1_val.mean(axis=0), columns=fe_list, index=algorithms_list).round(3)

std_df_test_TD = pd.DataFrame(results_f1_test.std(axis=0), columns=fe_list, index=algorithms_list).round(3)
std_df_val_TD = pd.DataFrame(results_f1_val.std(axis=0), columns=fe_list, index=algorithms_list).round(3)

### save csv

In [23]:
for index in range(5):
    df = pd.DataFrame(results_f1_test[index],columns=fe_list, index=algorithms_list).round(3)
    df.to_csv(f'Results TD Fold {index+1}.csv')

# Getting ZW results

In [24]:
dataset_name = 'ZW'
results_f1_val_ZW, results_f1_test_ZW = compile_results(dataset_name)

results_df_test_ZW = pd.DataFrame(results_f1_test_ZW.mean(axis=0), columns=fe_list, index=algorithms_list).round(3)
results_df_val_ZW = pd.DataFrame(results_f1_val_ZW.mean(axis=0), columns=fe_list, index=algorithms_list).round(3)

std_df_test_ZW = pd.DataFrame(results_f1_test_ZW.std(axis=0), columns=fe_list, index=algorithms_list).round(3)
std_df_val_ZW = pd.DataFrame(results_f1_val_ZW.std(axis=0), columns=fe_list, index=algorithms_list).round(3)

### save CSV

In [25]:
for index in range(5):
    df = pd.DataFrame(results_f1_test_ZW[index],columns=fe_list, index=algorithms_list).round(3)
    df.to_csv(f'Results ZW Fold {index+1}.csv')

# Getting union results (TD+ZW)

In [26]:
dataset_name = 'Union'
results_f1_val_Union, results_f1_test_Union = compile_results(dataset_name)

results_df_test_Union = pd.DataFrame(results_f1_test_Union.mean(axis=0), columns=fe_list, index=algorithms_list).round(3)
results_df_val_Union = pd.DataFrame(results_f1_val_Union.mean(axis=0), columns=fe_list, index=algorithms_list).round(3)

std_df_test_Union = pd.DataFrame(results_f1_test_Union.std(axis=0), columns=fe_list, index=algorithms_list).round(3)
std_df_val_Union = pd.DataFrame(results_f1_val_Union.std(axis=0), columns=fe_list, index=algorithms_list).round(3)

## save CSV

In [27]:
for index in range(5):
    df = pd.DataFrame(results_f1_test_Union[index],columns=fe_list, index=algorithms_list).round(3)
    df.to_csv(f'Results TD+ZW Fold {index+1}.csv')

In [34]:
results_df_test.to_latex()

'\\begin{tabular}{lrrrrr}\n\\toprule\n{} &     CV &  TFIDF &    W2V &  GLOVE &   FAST \\\\\n\\midrule\nSVM   &  0.872 &  0.881 &  0.758 &  0.670 &  0.753 \\\\\nMLP   &  0.871 &  0.852 &  0.757 &  0.673 &  0.770 \\\\\nKNN   &  0.691 &  0.358 &  0.666 &  0.610 &  0.608 \\\\\nRF    &  0.866 &  0.864 &  0.645 &  0.644 &  0.647 \\\\\nEXTRA &  0.860 &  0.855 &  0.623 &  0.631 &  0.608 \\\\\nCNN   &  0.842 &  0.815 &  0.848 &  0.836 &  0.855 \\\\\nLR    &  0.882 &  0.875 &  0.724 &  0.621 &  0.735 \\\\\nNB    &  0.850 &  0.820 &  0.635 &  0.604 &  0.666 \\\\\n\\bottomrule\n\\end{tabular}\n'

In [35]:
std_df_test.to_latex()

'\\begin{tabular}{lrrrrr}\n\\toprule\n{} &     CV &  TFIDF &    W2V &  GLOVE &   FAST \\\\\n\\midrule\nSVM   &  0.004 &  0.004 &  0.003 &  0.006 &  0.007 \\\\\nMLP   &  0.004 &  0.004 &  0.005 &  0.009 &  0.003 \\\\\nKNN   &  0.018 &  0.009 &  0.009 &  0.007 &  0.008 \\\\\nRF    &  0.006 &  0.003 &  0.009 &  0.007 &  0.007 \\\\\nEXTRA &  0.004 &  0.003 &  0.010 &  0.007 &  0.008 \\\\\nCNN   &  0.014 &  0.011 &  0.011 &  0.006 &  0.009 \\\\\nLR    &  0.003 &  0.003 &  0.003 &  0.007 &  0.006 \\\\\nNB    &  0.006 &  0.005 &  0.006 &  0.006 &  0.010 \\\\\n\\bottomrule\n\\end{tabular}\n'