In [1]:
import pandas as pd 

from utils.preprocessing import preprocess_df
from utils.df_loader import load_adult_df, load_compas_df, load_german_df, load_diabetes_df, load_breast_cancer_df
from utils.evaluation import get_evaluations, EvaluationMatrix

In [2]:
#### Select dataset ####
dataset_name = 'diabetes' # [adult, german, compas]
cf_algorithm= 'dice' 
model_name = 'rfc'

if dataset_name == 'adult':
    dataset_loading_fn = load_adult_df
elif dataset_name == 'german':
    dataset_loading_fn = load_german_df
elif dataset_name == 'compas':
    dataset_loading_fn = load_compas_df
elif dataset_name == 'diabetes':
    dataset_loading_fn = load_diabetes_df
elif dataset_name == 'breast_cancer':
    dataset_loading_fn = load_breast_cancer_df
else:
    raise Exception("Unsupported dataset")

df_info = preprocess_df(dataset_loading_fn)

In [3]:
folder_name = f'{cf_algorithm}_{dataset_name}'
file_name = f'{folder_name}_{model_name}_result.csv'
result_path = f'./results/{folder_name}/{file_name}'
result_df = pd.read_csv(result_path)

In [4]:
result_df.head(5)

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,scaled_input_Pregnancies,scaled_input_Glucose,scaled_input_BloodPressure,scaled_input_SkinThickness,scaled_input_Insulin,scaled_input_BMI,scaled_input_DiabetesPedigreeFunction,scaled_input_Age,...,origin_cf_SkinThickness,origin_cf_Insulin,origin_cf_BMI,origin_cf_DiabetesPedigreeFunction,origin_cf_Age,origin_cf_Outcome,running_time,Found,ground_truth,prediction
0,0,0,0.411765,0.909548,0.688525,0.212121,0.22695,0.535022,0.216909,0.5,...,21.000001,191.999994,30.697496,0.586,51.0,N,0.071768,Y,Y,Y
1,0,0,0.411765,0.909548,0.688525,0.212121,0.22695,0.535022,0.216909,0.5,...,21.000001,191.999994,35.900002,0.586,51.0,N,0.078791,Y,Y,Y
2,0,0,0.411765,0.909548,0.688525,0.212121,0.22695,0.535022,0.216909,0.5,...,21.000001,191.999994,15.924904,0.586,51.0,N,0.079228,Y,Y,Y
3,0,0,0.411765,0.909548,0.688525,0.212121,0.22695,0.535022,0.216909,0.5,...,21.000001,191.999994,35.900002,0.586,51.0,N,0.075799,Y,Y,Y
4,0,0,0.411765,0.909548,0.688525,0.212121,0.22695,0.535022,0.216909,0.5,...,21.000001,191.999994,35.900002,0.586,51.0,N,0.07383,Y,Y,Y


In [5]:
evaluation_df = get_evaluations(result_df, df_info, matrix = [EvaluationMatrix.L1, EvaluationMatrix.L2, EvaluationMatrix.Sparsity, EvaluationMatrix.Realistic, EvaluationMatrix.MAD, EvaluationMatrix.Mahalanobis])

In [6]:
evaluation_df[evaluation_df['Found']=="Y"]

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,scaled_input_Pregnancies,scaled_input_Glucose,scaled_input_BloodPressure,scaled_input_SkinThickness,scaled_input_Insulin,scaled_input_BMI,scaled_input_DiabetesPedigreeFunction,scaled_input_Age,...,running_time,Found,ground_truth,prediction,L1,L2,Sparsity,Realistic,MAD,Mahalanobis
0,0,0,0.411765,0.909548,0.688525,0.212121,0.226950,0.535022,0.216909,0.500000,...,0.071768,Y,Y,Y,0.539915,0.468837,2,True,0.568060,0.076819
1,0,0,0.411765,0.909548,0.688525,0.212121,0.226950,0.535022,0.216909,0.500000,...,0.078791,Y,Y,Y,0.708988,0.595142,2,True,0.728284,0.098540
2,0,0,0.411765,0.909548,0.688525,0.212121,0.226950,0.535022,0.216909,0.500000,...,0.079228,Y,Y,Y,0.956567,0.553143,3,True,1.147663,0.097230
3,0,0,0.411765,0.909548,0.688525,0.212121,0.226950,0.535022,0.216909,0.500000,...,0.075799,Y,Y,Y,0.584293,0.487267,2,True,0.601122,0.080781
4,0,0,0.411765,0.909548,0.688525,0.212121,0.226950,0.535022,0.216909,0.500000,...,0.073830,Y,Y,Y,0.658100,0.621622,2,True,0.658310,0.098956
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,19,0,0.000000,0.502513,0.721311,0.606061,0.130024,0.697466,0.377455,0.166667,...,0.073774,Y,N,N,1.276057,0.966085,2,True,1.033210,0.181072
96,19,0,0.000000,0.502513,0.721311,0.606061,0.130024,0.697466,0.377455,0.166667,...,0.072753,Y,N,N,1.003458,0.717622,2,True,0.778396,0.174200
97,19,0,0.000000,0.502513,0.721311,0.606061,0.130024,0.697466,0.377455,0.166667,...,0.071777,Y,N,N,0.601762,0.536370,2,True,0.503615,0.104266
98,19,0,0.000000,0.502513,0.721311,0.606061,0.130024,0.697466,0.377455,0.166667,...,0.073492,Y,N,N,1.383921,1.010444,2,True,1.697070,0.150069


In [7]:
csv_save_result_path = f'results/{folder_name}/eval_{file_name}'
evaluation_df.to_csv(csv_save_result_path)