In [1]:
import pandas as pd 

from utils.preprocessing import preprocess_df
from utils.df_loader import load_adult_df, load_compas_df, load_german_df, load_diabetes_df
from utils.evaluation import get_evaluations, EvaluationMatrix

In [2]:
#### Select dataset ####
dataset_name = 'adult' # [adult, german, compas]

if dataset_name == 'adult':
    dataset_loading_fn = load_adult_df
elif dataset_name == 'german':
    dataset_loading_fn = load_german_df
elif dataset_name == 'compas':
    dataset_loading_fn = load_compas_df
elif dataset_name == 'diabetes':
    dataset_loading_fn = load_diabetes_df
else:
    raise Exception("Unsupported dataset")

df_info = preprocess_df(dataset_loading_fn)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col][df[col] == '?'] = df[col].value_counts().index[0]


In [3]:
folder_name = 'dice_adult'
model_name = 'rfc'
file_name = f'{folder_name}_{model_name}_result.csv'
result_path = f'results/{folder_name}/{file_name}'
result_df = pd.read_csv(result_path)

In [4]:
result_df

Unnamed: 0.1,Unnamed: 0,scaled_input_age,scaled_input_workclass,scaled_input_education,scaled_input_marital-status,scaled_input_occupation,scaled_input_relationship,scaled_input_race,scaled_input_sex,scaled_input_capital-gain,...,origin_cf_sex,origin_cf_capital-gain,origin_cf_capital-loss,origin_cf_hours-per-week,origin_cf_native-country,origin_cf_class,running_time,Found,ground_truth,prediction
0,0,0.520548,State-gov,Bachelors,Married-civ-spouse,Prof-specialty,Husband,White,Male,0.0,...,Male,0.0,0.0,71.511943,United-States,>50K,0.19913,Y,<=50K,<=50K
1,0,0.657534,Private,Bachelors,Married-civ-spouse,Transport-moving,Husband,White,Male,0.0,...,Male,0.0,4791.6,24.529313,United-States,<=50K,0.192298,Y,<=50K,<=50K
2,0,0.164384,Private,HS-grad,Never-married,Craft-repair,Not-in-family,White,Male,0.0,...,Male,79999.2,0.0,39.999999,Guatemala,>50K,0.189915,Y,<=50K,<=50K
3,0,0.493151,State-gov,Doctorate,Married-civ-spouse,Prof-specialty,Husband,White,Male,0.0,...,Male,0.0,0.0,39.999999,United-States,<=50K,0.188636,Y,>50K,>50K
4,0,0.205479,Private,Bachelors,Never-married,Adm-clerical,Not-in-family,Asian-Pac-Islander,Male,0.0,...,Male,79999.2,0.0,39.999999,United-States,>50K,0.188295,Y,<=50K,<=50K


In [5]:
evaluation_df = get_evaluations(result_df, df_info, matrix = [EvaluationMatrix.L1, EvaluationMatrix.L2, EvaluationMatrix.Sparsity, EvaluationMatrix.Realistic, EvaluationMatrix.MAD, EvaluationMatrix.Mahalanobis])

In [7]:
evaluation_df

Unnamed: 0.1,Unnamed: 0,scaled_input_age,scaled_input_workclass,scaled_input_education,scaled_input_marital-status,scaled_input_occupation,scaled_input_relationship,scaled_input_race,scaled_input_sex,scaled_input_capital-gain,...,running_time,Found,ground_truth,prediction,L1,L2,Sparsity,Realistic,MAD,Mahalanobis
0,0,0.520548,State-gov,Bachelors,Married-civ-spouse,Prof-specialty,Husband,White,Male,0.0,...,0.19913,Y,<=50K,<=50K,0.576652,0.576652,1,True,1.863057,0.072655
1,0,0.657534,Private,Bachelors,Married-civ-spouse,Transport-moving,Husband,White,Male,0.0,...,0.192298,Y,<=50K,<=50K,1.146217,1.100971,2,False,7.345553,0.102239
2,0,0.164384,Private,HS-grad,Never-married,Craft-repair,Not-in-family,White,Male,0.0,...,0.189915,Y,<=50K,<=50K,0.8,0.8,1,True,10.114321,0.059083
3,0,0.493151,State-gov,Doctorate,Married-civ-spouse,Prof-specialty,Husband,White,Male,0.0,...,0.188636,Y,>50K,>50K,2.253354,1.436728,3,True,0.433432,0.21498
4,0,0.205479,Private,Bachelors,Never-married,Adm-clerical,Not-in-family,Asian-Pac-Islander,Male,0.0,...,0.188295,Y,<=50K,<=50K,1.308151,0.947743,2,True,10.943135,0.115663


In [6]:
csv_save_result_path = f'results/{folder_name}/eval_{file_name}'
evaluation_df.to_csv(csv_save_result_path)