In [1]:
#load relevant libraries
import os
import sys
import pathlib
import shutil
import warnings
warnings.simplefilter('ignore')
from helper import extract_info

os.chdir('..')
# parent_dir = os.path.dirname(os.getcwd())
# sys.path.append(parent_dir)

from fp.traindata_samplers import CompleteData
from fp.missingvalue_handlers import CompleteCaseAnalysis
from fp.dataset_experiments import GermanCreditDatasetSexExperiment
from fp.scalers import NamedStandardScaler, NamedMinMaxScaler
from fp.learners import NonTunedLogisticRegression, LogisticRegression, DecisionTree, NonTunedLogisticRegression, NonTunedDecisionTree          
from fp.post_processors import NoPostProcessing, RejectOptionPostProcessing, EqualOddsPostProcessing, CalibratedEqualOddsPostProcessing
from fp.pre_processors import NoPreProcessing, DIRemover, Reweighing

import numpy as np
import matplotlib.pyplot as plt

In [2]:
#parameters in this cell can be adjusted for experimentation

seeds = [0xbeef, 0xcafe, 0xdead, 0xdeadcafe, 0xdeadbeef, 0xbeefcafe, 0xcafebeef, 50, 583, 5278, 100000, 0xefac,0xfeeb, 0xdaed, 0xefacdaed, 0xfeebdead]
learners = [NonTunedLogisticRegression(), LogisticRegression()] 
#tuned and non tuned DecisionTree() can also be used.

#pairs of pre and post processors
processors = [(NoPreProcessing(), NoPostProcessing()), (DIRemover(1.0), NoPostProcessing()), (DIRemover(0.5), NoPostProcessing()), (Reweighing(), NoPostProcessing()),
              (NoPreProcessing(), RejectOptionPostProcessing()), (NoPreProcessing(), CalibratedEqualOddsPostProcessing())]

#update these for the purpose of plotting as per your experiment
title_list = ['NoPreProcessing', 'DIRemover(1.0)', 'DIRemover(0.5)', 'Reweighing', 'Reject Option', 'Caliberated Equal Odds']
#add pair wise tuples for each pair of learners.
label_list = [('NonTunedLogistic', 'TunedLogistic')]

In [3]:
def calculate_metrics(seed, learner, pre_processor, post_processor):
    '''
    Experiment function to run the experiments
    '''
    exp = GermanCreditDatasetSexExperiment(
        fixed_random_seed=seed,
        train_data_sampler=CompleteData(),
        missing_value_handler=CompleteCaseAnalysis(),
        numeric_attribute_scaler=NamedStandardScaler(),
        learners=[learner],
        pre_processors=[pre_processor],
        post_processors=[post_processor],
        optimal_validation_strategy=[])
    exp.run()

In [4]:
def run_exp(seeds, learners, processors):
    '''
    This is the main driver function that calls the calculate_metrics to give metrices on combinations of various learners, pre and post processing techniques.
    '''
    accuracy, disp_imp, fnr, fpr = [], [], [], []
    for processor in processors:
        for learner in learners:
            learner_acc, learner_di, learner_fnr, learner_fpr = [], [], [], []
            for seed in seeds:    
                calculate_metrics(seed, learner, pre_processor=processor[0], post_processor=processor[1])
                extract_info(learner_acc, learner_di, learner_fnr, learner_fpr)
            accuracy.append(learner_acc)
            disp_imp.append(learner_di)
            fnr.append(learner_fnr)
            fpr.append(learner_fpr)
    
    return accuracy, disp_imp, fnr, fpr

accuracy, disp_imp, fnr, fpr  = run_exp(seeds, learners, processors)

complete_case removed 0 instances from training data
complete_case removed 0 instances from validation data
Injecting zero columns for features not present set()
complete_case removed 0 instances from validation data
Injecting zero columns for features not present set()
complete_case removed 0 instances from training data
complete_case removed 0 instances from validation data
Injecting zero columns for features not present set()
complete_case removed 0 instances from validation data
Injecting zero columns for features not present set()
complete_case removed 0 instances from training data
Fitting 5 folds for each of 12 candidates, totalling 60 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  60 out of  60 | elapsed:    1.7s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.


complete_case removed 0 instances from validation data
Injecting zero columns for features not present set()
complete_case removed 0 instances from validation data
Injecting zero columns for features not present set()
complete_case removed 0 instances from training data
Fitting 5 folds for each of 12 candidates, totalling 60 fits


[Parallel(n_jobs=-1)]: Done  60 out of  60 | elapsed:    0.3s finished


complete_case removed 0 instances from validation data
Injecting zero columns for features not present set()
complete_case removed 0 instances from validation data
Injecting zero columns for features not present set()
complete_case removed 0 instances from training data
complete_case removed 0 instances from validation data
Injecting zero columns for features not present set()
complete_case removed 0 instances from validation data
Injecting zero columns for features not present set()
complete_case removed 0 instances from training data
complete_case removed 0 instances from validation data
Injecting zero columns for features not present set()
complete_case removed 0 instances from validation data
Injecting zero columns for features not present set()
complete_case removed 0 instances from training data
Fitting 5 folds for each of 12 candidates, totalling 60 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  53 out of  60 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  60 out of  60 | elapsed:    0.3s finished


complete_case removed 0 instances from validation data
Injecting zero columns for features not present set()
complete_case removed 0 instances from validation data
Injecting zero columns for features not present set()
complete_case removed 0 instances from training data
Fitting 5 folds for each of 12 candidates, totalling 60 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  60 out of  60 | elapsed:    0.3s finished


complete_case removed 0 instances from validation data
Injecting zero columns for features not present set()
complete_case removed 0 instances from validation data
Injecting zero columns for features not present set()
complete_case removed 0 instances from training data
complete_case removed 0 instances from validation data
Injecting zero columns for features not present set()
complete_case removed 0 instances from validation data
Injecting zero columns for features not present set()
complete_case removed 0 instances from training data
complete_case removed 0 instances from validation data
Injecting zero columns for features not present set()
complete_case removed 0 instances from validation data
Injecting zero columns for features not present set()
complete_case removed 0 instances from training data
Fitting 5 folds for each of 12 candidates, totalling 60 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  60 out of  60 | elapsed:    0.3s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.


complete_case removed 0 instances from validation data
Injecting zero columns for features not present set()
complete_case removed 0 instances from validation data
Injecting zero columns for features not present set()
complete_case removed 0 instances from training data
Fitting 5 folds for each of 12 candidates, totalling 60 fits


[Parallel(n_jobs=-1)]: Done  53 out of  60 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  60 out of  60 | elapsed:    0.3s finished


complete_case removed 0 instances from validation data
Injecting zero columns for features not present set()
complete_case removed 0 instances from validation data
Injecting zero columns for features not present set()
complete_case removed 0 instances from training data
complete_case removed 0 instances from validation data
Injecting zero columns for features not present set()
complete_case removed 0 instances from validation data
Injecting zero columns for features not present set()
complete_case removed 0 instances from training data
complete_case removed 0 instances from validation data
Injecting zero columns for features not present set()
complete_case removed 0 instances from validation data
Injecting zero columns for features not present set()
complete_case removed 0 instances from training data
Fitting 5 folds for each of 12 candidates, totalling 60 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  53 out of  60 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  60 out of  60 | elapsed:    0.3s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.


complete_case removed 0 instances from validation data
Injecting zero columns for features not present set()
complete_case removed 0 instances from validation data
Injecting zero columns for features not present set()
complete_case removed 0 instances from training data
Fitting 5 folds for each of 12 candidates, totalling 60 fits


[Parallel(n_jobs=-1)]: Done  60 out of  60 | elapsed:    0.3s finished


complete_case removed 0 instances from validation data
Injecting zero columns for features not present set()
complete_case removed 0 instances from validation data
Injecting zero columns for features not present set()
complete_case removed 0 instances from training data
complete_case removed 0 instances from validation data
Injecting zero columns for features not present set()
complete_case removed 0 instances from validation data
Injecting zero columns for features not present set()
complete_case removed 0 instances from training data
complete_case removed 0 instances from validation data
Injecting zero columns for features not present set()
complete_case removed 0 instances from validation data
Injecting zero columns for features not present set()
complete_case removed 0 instances from training data
Fitting 5 folds for each of 12 candidates, totalling 60 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  60 out of  60 | elapsed:    0.3s finished


complete_case removed 0 instances from validation data
Injecting zero columns for features not present set()
complete_case removed 0 instances from validation data
Injecting zero columns for features not present set()
complete_case removed 0 instances from training data
Fitting 5 folds for each of 12 candidates, totalling 60 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  53 out of  60 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  60 out of  60 | elapsed:    0.3s finished


complete_case removed 0 instances from validation data
Injecting zero columns for features not present set()
complete_case removed 0 instances from validation data
Injecting zero columns for features not present set()
complete_case removed 0 instances from training data
complete_case removed 0 instances from validation data
Injecting zero columns for features not present set()
complete_case removed 0 instances from validation data
Injecting zero columns for features not present set()
complete_case removed 0 instances from training data
complete_case removed 0 instances from validation data
Injecting zero columns for features not present set()
complete_case removed 0 instances from validation data
Injecting zero columns for features not present set()
complete_case removed 0 instances from training data
Fitting 5 folds for each of 12 candidates, totalling 60 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  60 out of  60 | elapsed:    0.3s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.


complete_case removed 0 instances from validation data
Injecting zero columns for features not present set()
complete_case removed 0 instances from validation data
Injecting zero columns for features not present set()
complete_case removed 0 instances from training data
Fitting 5 folds for each of 12 candidates, totalling 60 fits
complete_case removed 0 instances from validation data
Injecting zero columns for features not present set()
complete_case removed 0 instances from validation data
Injecting zero columns for features not present set()


[Parallel(n_jobs=-1)]: Done  53 out of  60 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  60 out of  60 | elapsed:    0.3s finished


In [5]:
def plotter(title_list, label_list, x, y, x_ticks, x_label, main_title):
    '''
    Function to plot various comparison plots.
    '''
    #update label list and title list as per the experiment being performed.
    fig, axs = plt.subplots(len(title_list), len(label_list), figsize=((10,20)))
    axs = axs.flatten()
    for i in range(0, len(y), 2):
        loc = i//2
        axs[loc].scatter([float(valuei) for valuei in x[i]], [float(valuei) for valuei in y[i]], c='b', marker='o')
        axs[loc].scatter([float(valuei) for valuei in x[i+1]], [float(valuei) for valuei in y[i+1]], c='r', marker='o')
        axs[loc].set_xticks(x_ticks)
        axs[loc].set_yticks(np.arange(0.5, 1, 0.1))
        axs[loc].set_title(title_list[i//4])
        axs[loc].grid(True)
        axs[loc].set_xlabel(x_label)
        axs[loc].set_ylabel('Accuracy')
        axs[loc].legend(label_list[int(i%(len(label_list)*2)/2)])
    fig.suptitle(main_title)
    plt.subplots_adjust(wspace=0.3, hspace=0.43)
    fig.savefig('examples/' + main_title + '.png')
    plt.show()

plotter(title_list, label_list, x=disp_imp, y=accuracy, x_ticks=[0.5, 1, 1.5], x_label='DI', main_title='accuracy_vs_di')
plotter(title_list, label_list, x=fnr, y=accuracy, x_ticks=[-0.4, 0, 0.4], x_label='FNR', main_title='accuracy_vs_fnr')
plotter(title_list, label_list, x=fpr, y=accuracy, x_ticks=[-0.4, 0, 0.4], x_label='FPR', main_title='accuracy_vs_fpr')