In [1]:
import json
from typing import List, Dict

import pandas as pd
import numpy as np
from pathlib import Path
# import plotnine as pn
from typeguard import typechecked

In [2]:
def read_data_baseline_new(path):
    """
    Generates a dict object representing the data in the baseline_splits.json.
            Parameters:
                    path : path of the file baseline_splits.json
            Returns:
                    baseline_results (Dict): a preprocessed dict representing the data
    """
    baseline_results = []
    with open(path, 'r') as f:
        results = json.load(f)
        
    for dataset_key, dataset_value in results["results"].items():
        for preprocessing_key, preprocessing_value in dataset_value.items():
            if preprocessing_key == 'baseline':
                for classifier_key, performance_metrics in preprocessing_value.items():
                    baseline_results.append({'experiment':'baseline', 'bootstrap_type':"No bootstrap", 'dataset':dataset_key, 'preprocessing':['No Preprocessing'], 'classifier':classifier_key, 'metrics':performance_metrics})
            elif preprocessing_key != "DisparateImpactRemover":
                for classifier_key, performance_metrics in preprocessing_value[0]["results"].items():
                    key = dataset_key + "-" + preprocessing_key + "-" + classifier_key
                    baseline_results.append({'experiment':'preprocessing', 'bootstrap_type':"No bootstrap", 'dataset':dataset_key, 'preprocessing':[preprocessing_key], 'classifier':classifier_key, 'metrics':performance_metrics})
    return baseline_results

In [3]:
def get_fairboost_run_results(dataset_name:str, raw_run_results:Dict, with_preprocessing:bool) -> List[Dict]:
    run_results=[]
    
    if with_preprocessing:
        # this is the fairbosot results (with preprocessing)
        experiment = 'fairboost'
        
        # get the preprocessing methods used in the run (e.g., [LFR, OptimPreproc] or [Reweighing])     
        preprocessing_methods = list(raw_run_results['hyperparameters']["preprocessing"].keys())  
        
        # get the bootstrap method for fairboost (None, Defaul, or Custom)         
        bootstrap_method = raw_run_results['hyperparameters']["init"]['bootstrap_type'].lower()
        
    else:
        # since this is a fairboost baseline/normal ensemble we don't have preprocessing
        experiment = 'ensemble'
        preprocessing_methods = ['No Preprocessing'] 
        
        # get the bootstrap method for the ensemble (None, Defaul, or Custom)         
        bootstrap_method = raw_run_results['hyperparameters']['bootstrap_type'].lower()
            
    # iterate over classifiers to get their performance metrics         
    for classifier, performance_metrics in raw_run_results["results"].items():
        run_results.append({'experiment':experiment, 'bootstrap_type':bootstrap_method, 'dataset':dataset_name, 'preprocessing':preprocessing_methods, 'classifier':classifier, 'metrics':performance_metrics})
    
    return run_results

In [4]:
def read_data_fairboost_new(path):
    """
    Generates a dict object representing the data in the fairboost_splits.json.
            Parameters:
                    path : path of the file fairboost_splits.json
            Returns:
                    dict (Dict): a preprocessed dict representing the data
    """
    all_results = []
    with open(path, 'r') as f:
        results = json.load(f)
        
    for dataset, dataset_results in results["results"].items():
        
        # results when ensemble doesn't apply prerpcessing techniques         
        ensemble_only_results = dataset_results['baseline']
        
        for run in ensemble_only_results:
            ensemble_run_results = get_fairboost_run_results(dataset_name=dataset, raw_run_results=run, with_preprocessing=False)
            all_results.extend(ensemble_run_results)
            
        
        # results for fairboost (ensemble + prerpcessing)         
        fairboost_results = dataset_results['fairboost']
        
        for run in fairboost_results:
            fairboost_run_results = get_fairboost_run_results(dataset_name=dataset, raw_run_results=run, with_preprocessing=True)
            all_results.extend(fairboost_run_results)
            
    return all_results

In [5]:
@typechecked
def read_data() -> List[Dict]:
    """
    Read data from files and return its content as a list of dictionnaries.
            Returns:
                    data: the data contained in both files
    """
    data_path = Path("raw_data")
    fairboost_results_path = Path(data_path, 'fairboost_splits.json')
    baseline_results_path = Path(data_path, 'baseline_splits.json')
    data_baseline = read_data_baseline_new(baseline_results_path)
    data_fairboost = read_data_fairboost_new(fairboost_results_path)
    return data_baseline + data_fairboost

In [6]:
data = read_data()
df = pd.DataFrame(data)
df

Unnamed: 0,experiment,bootstrap_type,dataset,preprocessing,classifier,metrics
0,baseline,No bootstrap,german,[No Preprocessing],Logistic Regression,"{'accuracy': [0.7333333333333333, 0.68, 0.6666..."
1,baseline,No bootstrap,german,[No Preprocessing],Random Forest,"{'accuracy': [0.69, 0.66, 0.69, 0.683333333333..."
2,preprocessing,No bootstrap,german,[OptimPreproc],Logistic Regression,"{'accuracy': [0.7066666666666667, 0.6766666666..."
3,preprocessing,No bootstrap,german,[OptimPreproc],Random Forest,"{'accuracy': [0.7133333333333334, 0.66, 0.6866..."
4,preprocessing,No bootstrap,german,[LFR],Logistic Regression,"{'accuracy': [0.7133333333333334, 0.6833333333..."
...,...,...,...,...,...,...
145,fairboost,custom,compas,"[OptimPreproc, Reweighing]",Random Forest,"{'accuracy': [0.6287878787878788, 0.6470959595..."
146,fairboost,custom,compas,"[LFR, Reweighing]",Logistic Regression,"{'accuracy': [0.6515151515151515, 0.6597222222..."
147,fairboost,custom,compas,"[LFR, Reweighing]",Random Forest,"{'accuracy': [0.6534090909090909, 0.6559343434..."
148,fairboost,custom,compas,"[LFR, OptimPreproc, Reweighing]",Logistic Regression,"{'accuracy': [0.6540404040404041, 0.6679292929..."


In [7]:
import statistics

def caclculate_accuracy_fairness_h_mean(f1_scores, normalized_di_scores):
    
    # TODO: find method for normalization that doesn't return negative values     
#     normalized_DI = [(1-di_score) if di_score <=1 else (1-di_score**-1) for di_score in DI]
    
    try:
        harmonic_means = list(map(lambda x, y:statistics.harmonic_mean([x,y]), f1_scores, normalized_di_scores))
    except Exception as e:
        print('faced error in harmonic mean calculation')
        print(f'f1_scores: {f1_scores}')
        print(f'DI: {DI}')
        print(f'normalized_DI: {normalized_DI}')
        print(e)
    return sum(harmonic_means)/len(harmonic_means)
#     print()

In [11]:
h_mean_scores = []
f1_scores = []
normalized_di_scores = []
for performance_metric in df.loc[:,"metrics"]:
    
    f1_score = performance_metric["f1-score"]
    f1_scores.append(f1_score)
    
    di_score = performance_metric["disparate_impact"]
    normalized_di_score = [score if score <=1 else (score**-1) for score in di_score]
    normalized_di_scores.append(normalized_di_score)
    
    h_mean_score = caclculate_accuracy_fairness_h_mean(f1_score, normalized_di_score)
    h_mean_scores.append(h_mean_score)
    

# df['f1_scores'] = f1_scores
# df['normalized_di_scores'] = normalized_di_scores
df['h_mean'] = h_mean_scores
df.drop(['metrics'], axis = 1, inplace=True)


In [12]:
def try_join(l):
    try:
        return ','.join(map(str, l))
    except TypeError:
        return 'No Preprocessing'

In [13]:
df['preprocessing'] = [try_join(l) for l in df['preprocessing']]
# df2.drop(['preprocessing'], axis = 1, inplace=True)
df2.head(60)

NameError: name 'df2' is not defined

In [None]:
groups = df2.groupby(['experiment','bootstrap_type','dataset', 'preprocessing'])

In [None]:
df3 = groups['h_mean'].agg([np.mean]).reset_index()
df3.sort_values(by=['mean'], ascending=False)
df3['setting'] = df['experiment'] + ', ' + df['bootstrap_type'] + ', ' + df['preprocessing']
df3


In [None]:
# result_tuples = []
# for config, config_info in data.items():
#     classifier_results = config_info['classifier_value']
#     h_mean = caclculate_accuracy_fairness_h_mean(classifier_results['f1-score'],  classifier_results['disparate_impact'])
#     result_tuples.append((config_info['dataset'], config_info['preprocessing'], config_info['classifier'], h_mean))
    
# df = pd.DataFrame(result_tuples, columns=['dataset', 'preprocessing', 'classifier', 'h_mean'])

In [None]:
# result_tuples = []
# for config, config_info in data.items():
#     classifier_results = config_info['classifier_value']
#     h_mean = caclculate_accuracy_fairness_h_mean(classifier_results['f1-score'],  classifier_results['disparate_impact'])
#     result_tuples.append((config_info['dataset'], config_info['preprocessing'], config_info['classifier'], h_mean))
    
# df = pd.DataFrame(result_tuples, columns=['dataset', 'preprocessing', 'classifier', 'h_mean'])

In [None]:
# df.to_csv('best_method.csv', index=False)

In [None]:
# group_1 = df.groupby(['experiment','bootstrap_type','dataset','preprocessing',]).agg(['mean']).reset_index()
# group_1
# group_1.to_csv('best_method_combined_classifiers.csv', index=False)

In [None]:
group_2 = df.groupby(['preprocessing',]).agg(['mean']).reset_index()
group_2.to_csv('best_method_overall.csv', index=False)

In [None]:
group_2 = df.groupby(['preprocessing',]).agg(['mean']).reset_index()
group_2.to_csv('best_method_overall.csv', index=False)