In [1]:
import pandas as pd 

from utils.preprocessing import preprocess_df
from utils.df_loader import load_adult_df, load_compas_df, load_german_df, load_diabetes_df, load_breast_cancer_df
from utils.evaluation import get_evaluations, EvaluationMatrix

In [2]:
#### Select dataset ####
dataset_name = 'diabetes' # [adult, german, compas]

if dataset_name == 'adult':
    dataset_loading_fn = load_adult_df
elif dataset_name == 'german':
    dataset_loading_fn = load_german_df
elif dataset_name == 'compas':
    dataset_loading_fn = load_compas_df
elif dataset_name == 'diabetes':
    dataset_loading_fn = load_diabetes_df
elif dataset_name == 'breast_cancer':
    dataset_loading_fn = load_breast_cancer_df
else:
    raise Exception("Unsupported dataset")

df_info = preprocess_df(dataset_loading_fn)

In [3]:
folder_name = 'proto_german'
model_name = 'rfc'
file_name = f'{folder_name}_{model_name}_result.csv'
result_path = f'./results/{folder_name}/{file_name}'
result_df = pd.read_csv(result_path)

In [4]:
result_df.head(5)

Unnamed: 0.1,Unnamed: 0,scaled_input_duration_in_month,scaled_input_credit_amount,scaled_input_installment_as_income_perc,scaled_input_present_res_since,scaled_input_age,scaled_input_credits_this_bank,scaled_input_people_under_maintenance,scaled_input_account_check_status,scaled_input_credit_history,...,origin_cf_other_installment_plans,origin_cf_housing,origin_cf_job,origin_cf_telephone,origin_cf_foreign_worker,origin_cf_default,running_time,Found,ground_truth,prediction
0,0,0.470588,0.365192,1.0,0.666667,0.178571,0.0,0.0,< 0 DM,delay in paying off in the past,...,none,own,unskilled - resident,none,yes,Y,154.583607,Y,Y,Y
1,0,0.470588,0.365192,1.0,0.666667,0.178571,0.0,0.0,< 0 DM,delay in paying off in the past,...,none,own,unskilled - resident,none,yes,Y,154.696503,Y,Y,Y
2,0,0.470588,0.365192,1.0,0.666667,0.178571,0.0,0.0,< 0 DM,delay in paying off in the past,...,none,own,unskilled - resident,none,yes,Y,154.213599,Y,Y,Y
3,0,0.470588,0.365192,1.0,0.666667,0.178571,0.0,0.0,< 0 DM,delay in paying off in the past,...,none,own,unskilled - resident,none,yes,Y,153.861547,Y,Y,Y
4,0,0.470588,0.365192,1.0,0.666667,0.178571,0.0,0.0,< 0 DM,delay in paying off in the past,...,none,own,unskilled - resident,none,yes,Y,154.619455,Y,Y,Y


In [5]:
evaluation_df = get_evaluations(result_df[80:81], df_info, matrix = [EvaluationMatrix.L1, EvaluationMatrix.L2, EvaluationMatrix.Sparsity, EvaluationMatrix.Realistic, EvaluationMatrix.MAD, EvaluationMatrix.Mahalanobis])

In [6]:
evaluation_df[evaluation_df['Found']=="Y"]

Unnamed: 0.1,Unnamed: 0,scaled_input_duration_in_month,scaled_input_credit_amount,scaled_input_installment_as_income_perc,scaled_input_present_res_since,scaled_input_age,scaled_input_credits_this_bank,scaled_input_people_under_maintenance,scaled_input_account_check_status,scaled_input_credit_history,...,running_time,Found,ground_truth,prediction,L1,L2,Sparsity,Realistic,MAD,Mahalanobis
80,0,0.205882,0.103114,1.0,1.0,0.089286,0.333333,0.0,< 0 DM,critical account/ other credits existing (not ...,...,154.43507,Y,Y,Y,20.731616,4.491336,24.0,True,1.068046,2.297903


In [7]:
csv_save_result_path = f'results/{folder_name}/eval_{file_name}'
evaluation_df.to_csv(csv_save_result_path)