Selection Impact (T-tests)

In [1]:
import os
import pandas as pd
from scipy.stats import ttest_rel
import numpy as np
def t_test(original, selection):
    """Comparing method"""
    def two_tailed_t_test(original, selection):
        #n_d = len(selection)
        #n_c = len(original)
        #n = min(n_d, n_c)
        if len(selection) == len(original):
            n = len(original)
        else:
            print("problema")
            print (len(original))
            print (len(selection))
        t, p = ttest_rel(original[:n], selection[:n])
        if np.isnan(t):
            t, p = 0, 1
        return {"t-stats":t, "p-value":p}

    def one_tailed_t_test(original, selection, direction):
        two_tail = two_tailed_t_test(original, selection)
        t, p_two = two_tail['t-stats'], two_tail['p-value']
        if direction == 'positive':
            if t > 0 :
                p = p_two * 0.5
            else:
                p = 1 - p_two * 0.5
        else:
            if t < 0:
                p = p_two * 0.5
            else:
                p = 1 - p_two * 0.5
        return {"t-stats":t, "p-value":p}

    result = {}
    result['two_tail'] = two_tailed_t_test(original, selection)
    result['one_tail_pos'] = one_tailed_t_test(original, selection, 'positive')
    result['one_tail_neg'] = one_tailed_t_test(original, selection, 'negative')
    return result


def evaluate_score(original, selection):
    alpha =  0.05
    results = t_test(original, selection)
    difference = 'insignificant'

    if results['two_tail']['p-value'] < alpha:
        if results['one_tail_neg']['p-value'] < alpha:
            difference = 'positive'
        if results['one_tail_pos']['p-value'] < alpha:
            difference = 'negative'

    return difference


T-tests 

ARS dataset

In [2]:
import os
from os.path import join 
import pandas as pd
import numpy as np
# Define paths and initialize variables
root = "../../results/"
systems = ["CRAIGPB", "GLISTERPB", "GradMatchPB", "Random"]
dataset = "ars" 
ratios = ["0.05", "0.1", "0.2", "0.3"]
models = ['Logreg', 'MLP', 'SVM']

systems_path = [join(root, f"{dataset}-selection", sys) for sys in systems]
ratio_path = [f"{dataset}_{ratio}" for ratio in ratios]

directory_full = join(root, f"{dataset}-selection", "Full", f"{dataset}_1")

# Metrics tables
index_labels = ['negative-f', 'insignificant-f','positive-f','negative-u', 'insignificant-u','positive-u','negative-c', 'insignificant-c','positive-c', 'insignificant-f-u and positive-c' ]
# Initialize influence table
cols = ['ratio', 'system', 'time', 'acc', 'F1_score', 'Precision', 'Recall', 'SPD_gender', 'EOD_gender', 'AOD_gender', 'DI_gender', 'DcI_gender']
compute_influence = pd.DataFrame(columns=cols)


# Initialize full result and cost DataFrames
df_full_result = pd.DataFrame()
df_full_cost = pd.DataFrame()
df_full_cost_time = pd.DataFrame()

i = 0
files = []

df_average_results = pd.DataFrame()

# Load full system results for comparisons
for model in models: 

    df_full_result = pd.DataFrame()
    df_full_cost = pd.DataFrame()
    df_full_cost_time = pd.DataFrame()

    for filename in os.listdir(directory_full):
        if "fair_metrics_"+ model in filename :
           file_path = os.path.join(directory_full, filename)
           if model == 'SVM':
               df_r = pd.read_csv(file_path).iloc[60:, 1:]
           else:
               df_r = pd.read_csv(file_path).iloc[130:, 1:]
           df_full_result = pd.concat([df_full_result, df_r.abs()], ignore_index=True)

        if "cost_metrics_" + model in filename and filename.endswith("_0.csv"):
            file_path = os.path.join(directory_full, filename)
            if model == 'SVM':
                df_ct = pd.read_csv(file_path).iloc[60:, 1:]  
            else:
                df_ct = pd.read_csv(file_path).iloc[130:, 1:]  
            df_full_cost_time = pd.concat([df_full_cost_time, df_ct.abs()], ignore_index=True)

        if "cost_metrics_" + model in filename :
            file_path = os.path.join(directory_full, filename)
            if model == 'SVM':
                df_c = pd.read_csv(file_path).iloc[60:, 1:]
            else:
                df_c = pd.read_csv(file_path).iloc[130:, 1:]
            df_full_cost = pd.concat([df_full_cost, df_c.abs()], ignore_index=True)

    print('**********************')
    print(model)


    for col in ['DI_gender']:
        df_full_result[col] = df_full_result[col].apply(lambda x: abs((1 - x)/(1 + x)))
    

    ### Calcul de valeurs moyenne pour le full 
    result_avg = df_full_result.mean()
    cost_avg = df_full_cost.mean()
    cost_time_avg = df_full_cost_time.mean()

    # Ligne de valeurs moyennes de full 
    avg_data = pd.DataFrame({
        **{col + '_avg': [result_avg[col]] for col in result_avg.index},
        **{col + '_avg': [cost_avg[col]] for col in cost_avg.index},
        **{col + '_avg': [cost_time_avg[col]] for col in cost_time_avg.index},
        'dataset': [dataset],
        'model': [model],
        'system': ['Full'],
        'ratio': [1],
    })

    df_average_results = pd.concat([df_average_results, avg_data], ignore_index=True)

    # Lire les fichiers de système de selection par système et par ratio

    for directory_path_1 in systems_path:
       for directory_path_2 in ratio_path:
           directory = join(directory_path_1, directory_path_2) 
           system = directory_path_1.split('/')[-1]           
        
           df_full_result_selection = pd.DataFrame()
           df_full_cost_selection = pd.DataFrame()
           df_full_cost_time_selection = pd.DataFrame()
        
           # Cost file processing
           for filename in os.listdir(directory):
                if "fair_metrics_" + model in filename:
                    file_path = os.path.join(directory, filename)
                    if model == 'SVM':
                        df_to_add_f = pd.read_csv(file_path).iloc[60:, 1:].abs()
                    else:
                        df_to_add_f = pd.read_csv(file_path).iloc[130:, 1:].abs()
                    df_full_result_selection = pd.concat([df_full_result_selection, df_to_add_f], ignore_index=True)
                    
                if (("cost_metrics_" + model in filename) and (filename.endswith("_0.csv"))):
                    file_path = os.path.join(directory, filename)
                    if model == 'SVM':
                        df_to_add_t = pd.read_csv(file_path).iloc[60:, 1:].abs()
                    else:
                        df_to_add_t = pd.read_csv(file_path).iloc[130:, 1:].abs()

                    df_full_cost_time_selection = pd.concat([df_full_cost_time_selection, df_to_add_t], ignore_index=True)
        
                if "cost_metrics_" + model in filename:
                    file_path = os.path.join(directory, filename)
                    if model == 'SVM':
                        df_to_add_c = pd.read_csv(file_path).iloc[60:, 1:].abs()
                    else:
                        df_to_add_c = pd.read_csv(file_path).iloc[130:, 1:].abs()
                    df_full_cost_selection = pd.concat([df_full_cost_selection, df_to_add_c], ignore_index=True)
           
                
           for col in ['DI_gender']:
             df_full_result_selection[col] = df_full_result_selection[col].apply(lambda x: abs((1 - x)/(1 + x)))
           
           ### Calcul de valeurs moyenne pour le full 
           result_sel_avg = df_full_result_selection.mean()
           cost_sel_avg = df_full_cost_selection.mean()
           cost_time_sel_avg = df_full_cost_time_selection.mean()

           ## Faire les t-test
           # Cost
           # time
           test_time = evaluate_score(df_full_cost_time_selection['Full_training_time'].to_numpy(), df_full_cost_time['Full_training_time'].to_numpy()) 
           
           # Model quality
           # accuracy
           test_acc = evaluate_score(df_full_result['Accuracy'].to_numpy(), df_full_result_selection['Accuracy'].to_numpy()) 
           # F1-score
           test_f1 = evaluate_score(df_full_result['F1_score'].to_numpy(), df_full_result_selection['F1_score'].to_numpy()) 
           #precision
           test_precision = evaluate_score(df_full_result['Precision'].to_numpy(), df_full_result_selection['Precision'].to_numpy()) 
           # recall 
           test_recall = evaluate_score(df_full_result['Recall'].to_numpy(), df_full_result_selection['Recall'].to_numpy()) 

           # Fairness
           # SPD gender
           test_spd_g = evaluate_score(df_full_result_selection['SPD_gender'].to_numpy(), df_full_result['SPD_gender'].to_numpy())
           # EOD gender
           test_eod_g = evaluate_score(df_full_result_selection['EOD_gender'].to_numpy(), df_full_result['EOD_gender'].to_numpy())
           # AOD gender
           test_aod_g = evaluate_score(df_full_result_selection['AOD_gender'].to_numpy(), df_full_result['AOD_gender'].to_numpy())
           # DI gender
           test_di_g = evaluate_score(df_full_result_selection['DI_gender'].to_numpy(), df_full_result['DI_gender'].to_numpy())
           # DcI gender
           test_dci_g = evaluate_score(df_full_result_selection['DcI_gender'].to_numpy(), df_full_result['DcI_gender'].to_numpy())



           avg_sel_data = pd.DataFrame({
            **{col + '_avg': [result_sel_avg[col]] for col in result_sel_avg.index},
            **{col + '_avg': [cost_sel_avg[col]] for col in cost_sel_avg.index},
            **{col + '_avg': [cost_time_sel_avg[col]] for col in cost_time_sel_avg.index},
           'dataset': [dataset],
           'model': [model],
           'system': [system],
           'ratio': [directory_path_2.split('_')[-1]],
           'test_time' : [test_time], 
           'test_acc' : [test_acc], 
           'test_f1' : [test_f1], 
           'test_precision' : [test_precision], 
           'test_recall' : [test_recall],
           'test_SPD_gender' : [test_spd_g], 
           'test_EOD_gender' : [test_eod_g], 
           'test_AOD_gender' : [test_aod_g], 
           'test_DI_gender' : [test_di_g], 
           'test_DcI_gender' : [test_dci_g], 

           'test_SPD_age' : ' ',
           'test_EOD_age' : ' ',
           'test_AOD_age' : ' ', 
           'test_DI_age' : ' ', 
           'test_DcI_age' : ' ', 

           'test_SPD_race' : ' ',
           'test_EOD_race' : ' ',
           'test_AOD_race' : ' ', 
           'test_DI_race' : ' ', 
           'test_DcI_race' : ' ', 
            })
           
           df_average_results = pd.concat([df_average_results, avg_sel_data], ignore_index=True)




 
result_path = join(root, "test", f"{dataset}_ttest_results_epochs_intervals.csv")
df_average_results.to_csv(result_path, index=False)



**********************
Logreg


  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)


**********************
MLP


  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)


**********************
SVM


  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)


Adult dataset

In [3]:
import os
import pandas as pd
import numpy as np
# Define paths and initialize variables
root = "../../results/"
systems = ["CRAIGPB", "GLISTERPB", "GradMatchPB", "Random"]
dataset = "adult" 
ratios = ["0.05", "0.1", "0.2", "0.3"]
models = ['Logreg', 'SVM', 'MLP']

systems_path = [join(root, f"{dataset}-selection", sys) for sys in systems]
ratio_path = [f"{dataset}_{ratio}" for ratio in ratios]

directory_full = join(root, f"{dataset}-selection", "Full", f"{dataset}_1")

# Initialize full result and cost DataFrames
df_full_result = pd.DataFrame()
df_full_cost = pd.DataFrame()
df_full_cost_time = pd.DataFrame()

i = 0
files = []

df_average_results = pd.DataFrame()

# Load full system results for comparisons
for model in models: 

    df_full_result = pd.DataFrame()
    df_full_cost = pd.DataFrame()
    df_full_cost_time = pd.DataFrame()

    for filename in os.listdir(directory_full):
        if "fair_metrics_"+ model in filename :
            file_path = os.path.join(directory_full, filename)
            if model == 'MLP':
                df_r = pd.read_csv(file_path).iloc[380:, 1:]
            elif model == 'Logreg' :
                df_r = pd.read_csv(file_path).iloc[130:, 1:]
            else : 
                df_r = pd.read_csv(file_path).iloc[130:,1:]

            df_full_result = pd.concat([df_full_result, df_r.abs()], ignore_index=True)

        if "cost_metrics_" + model in filename and filename.endswith("_0.csv"):
            file_path = os.path.join(directory_full, filename)
            if model == 'MLP':
             df_ct = pd.read_csv(file_path).iloc[380:, 1:]
            elif model == 'Logreg' :
             df_ct = pd.read_csv(file_path).iloc[130:, 1:]
            else :
             df_ct = pd.read_csv(file_path).iloc[130:,1:]
            df_full_cost_time = pd.concat([df_full_cost_time, df_ct.abs()], ignore_index=True)

        if "cost_metrics_" + model in filename :
            file_path = os.path.join(directory_full, filename)
            if model == 'MLP':
             df_c = pd.read_csv(file_path).iloc[380:, 1:]
            elif model == 'Logreg' :
             df_c = pd.read_csv(file_path).iloc[130:, 1:]
            else : 
             df_c = pd.read_csv(file_path).iloc[130:,1:]
            df_full_cost = pd.concat([df_full_cost, df_c.abs()], ignore_index=True)

    print('**********************')
    print(model)

    for col in ['DI_gender', 'DI_age', 'DI_race']:
        df_full_result[col] = df_full_result[col].apply(lambda x: abs((1 - x)/(1 + x)))
    

    ### Calcul de valeurs moyenne pour le full 
    result_avg = df_full_result.mean()
    cost_avg = df_full_cost.mean()
    cost_time_avg = df_full_cost_time.mean()

    # Ligne de valeurs moyennes de full 
    avg_data = pd.DataFrame({
        **{col + '_avg': [result_avg[col]] for col in result_avg.index},
        **{col + '_avg': [cost_avg[col]] for col in cost_avg.index},
        **{col + '_avg': [cost_time_avg[col]] for col in cost_time_avg.index},
        'dataset': [dataset],
        'model': [model],
        'system': ['Full'],
        'ratio': [1],
    })

    df_average_results = pd.concat([df_average_results, avg_data], ignore_index=True)

    # Lire les fichiers de système de selection par système et par ratio

    for directory_path_1 in systems_path:
       for directory_path_2 in ratio_path:
           directory = join(directory_path_1, directory_path_2)
           system = directory_path_1.split('/')[-1]           
        
           df_full_result_selection = pd.DataFrame()
           df_full_cost_selection = pd.DataFrame()
           df_full_cost_time_selection = pd.DataFrame()

           
           
           # Cost file processing
           for filename in os.listdir(directory):
                
                if "fair_metrics_" + model in filename:
                    file_path = os.path.join(directory, filename)
                    if model == 'MLP':
                       df_to_add_f = pd.read_csv(file_path).iloc[380:, 1:].abs()
                       
                    else:
                       df_to_add_f = pd.read_csv(file_path).iloc[130:, 1:].abs()
                    df_full_result_selection = pd.concat([df_full_result_selection, df_to_add_f], ignore_index=True)
                    

                if (("cost_metrics_" + model in filename) and (filename.endswith("_0.csv"))):
                    file_path = os.path.join(directory, filename)
                    if model == 'MLP':
                       df_to_add_t = pd.read_csv(file_path).iloc[380:, 1:].abs()
                    else:
                       df_to_add_t = pd.read_csv(file_path).iloc[130:, 1:].abs()
                    df_full_cost_time_selection = pd.concat([df_full_cost_time_selection, df_to_add_t], ignore_index=True)
    

        
                if "cost_metrics_" + model in filename:
                    file_path = os.path.join(directory, filename)
                    if model == 'MLP':
                       df_to_add_c = pd.read_csv(file_path).iloc[380:, 1:].abs()
                    else:
                       df_to_add_c = pd.read_csv(file_path).iloc[130:, 1:].abs()
                    df_full_cost_selection = pd.concat([df_full_cost_selection, df_to_add_c], ignore_index=True)
           
                
           for col in ['DI_gender', 'DI_age', 'DI_race']:
             df_full_result_selection[col] = df_full_result_selection[col].apply(lambda x: abs((1 - x)/(1 + x)))
           
           ### Calcul de valeurs moyenne pour le full 
           result_sel_avg = df_full_result_selection.mean()
           cost_sel_avg = df_full_cost_selection.mean()
           cost_time_sel_avg = df_full_cost_time_selection.mean()

           ## Faire les t-test
           # Cost
           # time
           test_time = evaluate_score(df_full_cost_time_selection['Full_training_time'].to_numpy(), df_full_cost_time['Full_training_time'].to_numpy()) 
           
           # Model quality
           # accuracy
           test_acc = evaluate_score(df_full_result['Accuracy'].to_numpy(), df_full_result_selection['Accuracy'].to_numpy()) 
           # F1-score
           test_f1 = evaluate_score(df_full_result['F1_score'].to_numpy(), df_full_result_selection['F1_score'].to_numpy()) 
           #precision
           test_precision = evaluate_score(df_full_result['Precision'].to_numpy(), df_full_result_selection['Precision'].to_numpy()) 
           # recall 
           test_recall = evaluate_score(df_full_result['Recall'].to_numpy(), df_full_result_selection['Recall'].to_numpy()) 

           # Fairness
           # SPD gender
           test_spd_g = evaluate_score(df_full_result_selection['SPD_gender'].to_numpy(), df_full_result['SPD_gender'].to_numpy())
           # EOD gender
           test_eod_g = evaluate_score(df_full_result_selection['EOD_gender'].to_numpy(), df_full_result['EOD_gender'].to_numpy())
           # AOD gender
           test_aod_g = evaluate_score(df_full_result_selection['AOD_gender'].to_numpy(), df_full_result['AOD_gender'].to_numpy())
           # DI gender
           test_di_g = evaluate_score(df_full_result_selection['DI_gender'].to_numpy(), df_full_result['DI_gender'].to_numpy())
           # DcI gender
           test_dci_g = evaluate_score(df_full_result_selection['DcI_gender'].to_numpy(), df_full_result['DcI_gender'].to_numpy())

           # SPD age
           test_spd_a = evaluate_score(df_full_result_selection['SPD_age'].to_numpy(), df_full_result['SPD_age'].to_numpy())
           # EOD gender
           test_eod_a = evaluate_score(df_full_result_selection['EOD_age'].to_numpy(), df_full_result['EOD_age'].to_numpy())
           # AOD gender
           test_aod_a = evaluate_score(df_full_result_selection['AOD_age'].to_numpy(), df_full_result['AOD_age'].to_numpy())
           # DI gender
           test_di_a = evaluate_score(df_full_result_selection['DI_age'].to_numpy(), df_full_result['DI_age'].to_numpy())
           # DcI gender
           test_dci_a = evaluate_score(df_full_result_selection['DcI_age'].to_numpy(), df_full_result['DcI_age'].to_numpy())

           # SPD race
           test_spd_r = evaluate_score(df_full_result_selection['SPD_race'].to_numpy(), df_full_result['SPD_race'].to_numpy())
           # EOD race
           test_eod_r = evaluate_score(df_full_result_selection['EOD_race'].to_numpy(), df_full_result['EOD_race'].to_numpy())
           # AOD race
           test_aod_r = evaluate_score(df_full_result_selection['AOD_race'].to_numpy(), df_full_result['AOD_race'].to_numpy())
           # DI race
           test_di_r = evaluate_score(df_full_result_selection['DI_race'].to_numpy(), df_full_result['DI_race'].to_numpy())
           # DcI race
           test_dci_r = evaluate_score(df_full_result_selection['DcI_race'].to_numpy(), df_full_result['DcI_race'].to_numpy())



           avg_sel_data = pd.DataFrame({
            **{col + '_avg': [result_sel_avg[col]] for col in result_sel_avg.index},
            **{col + '_avg': [cost_sel_avg[col]] for col in cost_sel_avg.index},
            **{col + '_avg': [cost_time_sel_avg[col]] for col in cost_time_sel_avg.index},
           'dataset': [dataset],
           'model': [model],
           'system': [system],
           'ratio': [directory_path_2.split('_')[-1]],
           'test_time' : [test_time], 
           'test_acc' : [test_acc], 
           'test_f1' : [test_f1], 
           'test_precision' : [test_precision], 
           'test_recall' : [test_recall],
           'test_SPD_gender' : [test_spd_g], 
           'test_EOD_gender' : [test_eod_g], 
           'test_AOD_gender' : [test_aod_g], 
           'test_DI_gender' : [test_di_g], 
           'test_DcI_gender' : [test_dci_g], 

           'test_SPD_age' : [test_spd_a],
           'test_EOD_age' : [test_eod_a],
           'test_AOD_age' : [test_aod_a], 
           'test_DI_age' : [test_di_a], 
           'test_DcI_age' : [test_dci_a], 

           'test_SPD_race' : [test_spd_r],
           'test_EOD_race' : [test_eod_r],
           'test_AOD_race' : [test_aod_r], 
           'test_DI_race' : [test_di_r],
           'test_DcI_race' : [test_dci_r],
            })
           
           df_average_results = pd.concat([df_average_results, avg_sel_data], ignore_index=True)




 
result_path = join(root, "test", f"{dataset}_ttest_results_epochs_intervals.csv")
df_average_results.to_csv(result_path, index=False)



**********************
Logreg


  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)


**********************
SVM


  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)


**********************
MLP


  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)


KDD dataset

In [4]:
import os
import pandas as pd
import numpy as np
# Define paths and initialize variables
root = "../../results/"
systems = ["CRAIGPB", "GLISTERPB", "GradMatchPB", "Random"]
dataset = "kdd" 
ratios = ["0.05", "0.1", "0.2", "0.3"]
models = ['Logreg', 'MLP', 'SVM']

systems_path = [join(root, f"{dataset}-selection", sys) for sys in systems]
ratio_path = [f"{dataset}_{ratio}" for ratio in ratios]

directory_full = join(root, f"{dataset}-selection", "Full", f"{dataset}_1")


# Initialize full result and cost DataFrames
df_full_result = pd.DataFrame()
df_full_cost = pd.DataFrame()
df_full_cost_time = pd.DataFrame()

i = 0
files = []

df_average_results = pd.DataFrame()

# Load full system results for comparisons
for model in models: 

    df_full_result = pd.DataFrame()
    df_full_cost = pd.DataFrame()
    df_full_cost_time = pd.DataFrame()

    for filename in os.listdir(directory_full):
        if "fair_metrics_"+ model in filename :
           file_path = os.path.join(directory_full, filename)
           if model == 'SVM':
              df_r = pd.read_csv(file_path).iloc[60:, 1:]
           else:
              df_r = pd.read_csv(file_path).iloc[60:, 1:]
           df_full_result = pd.concat([df_full_result, df_r.abs()], ignore_index=True)

        if "cost_metrics_" + model in filename and filename.endswith("_0.csv"):
            file_path = os.path.join(directory_full, filename)
            if model == 'SVM':
               df_ct = pd.read_csv(file_path).iloc[60:, 1:]
            else:
               df_ct = pd.read_csv(file_path).iloc[60:, 1:]
            df_full_cost_time = pd.concat([df_full_cost_time, df_ct.abs()], ignore_index=True)

        if "cost_metrics_" + model in filename :
            file_path = os.path.join(directory_full, filename)
            if model == 'SVM':
               df_c = pd.read_csv(file_path).iloc[60:, 1:]
            else:
               df_c = pd.read_csv(file_path).iloc[60:, 1:]
            df_full_cost = pd.concat([df_full_cost, df_c.abs()], ignore_index=True)

    print('**********************')
    print(model)

    for col in ['DI_gender', 'DI_age', 'DI_race']:
      df_full_result[col] = df_full_result[col].apply(lambda x: abs((1 - x)/(1 + x)))
    

    ### Calcul de valeurs moyenne pour le full 
    result_avg = df_full_result.mean()
    cost_avg = df_full_cost.mean()
    cost_time_avg = df_full_cost_time.mean()

    # Ligne de valeurs moyennes de full 
    avg_data = pd.DataFrame({
        **{col + '_avg': [result_avg[col]] for col in result_avg.index},
        **{col + '_avg': [cost_avg[col]] for col in cost_avg.index},
        **{col + '_avg': [cost_time_avg[col]] for col in cost_time_avg.index},
        'dataset': [dataset],
        'model': [model],
        'system': ['Full'],
        'ratio': [1],
    })

    df_average_results = pd.concat([df_average_results, avg_data], ignore_index=True)

    # Lire les fichiers de système de selection par système et par ratio

    for directory_path_1 in systems_path:
       for directory_path_2 in ratio_path:
           directory = join(directory_path_1, directory_path_2)
           system = directory_path_1.split('/')[-1]           
        
           df_full_result_selection = pd.DataFrame()
           df_full_cost_selection = pd.DataFrame()
           df_full_cost_time_selection = pd.DataFrame()
        
           # Cost file processing
           for filename in os.listdir(directory):
                if "fair_metrics_" + model in filename:
                    file_path = os.path.join(directory, filename)
                    if model == 'MLP':
                       df_to_add_f = pd.read_csv(file_path).iloc[60:, 1:].abs()
                    else:
                       df_to_add_f = pd.read_csv(file_path).iloc[60:, 1:].abs()
                    df_full_result_selection = pd.concat([df_full_result_selection, df_to_add_f], ignore_index=True)
                    

                if (("cost_metrics_" + model in filename) and (filename.endswith("_0.csv"))):
                    file_path = os.path.join(directory, filename)
                    if model == 'MLP':
                       df_to_add_t = pd.read_csv(file_path).iloc[60:, 1:].abs()
                    else:
                       df_to_add_t = pd.read_csv(file_path).iloc[60:, 1:].abs()
                    df_full_cost_time_selection = pd.concat([df_full_cost_time_selection, df_to_add_t], ignore_index=True)
    

        
                if "cost_metrics_" + model in filename:
                    file_path = os.path.join(directory, filename)
                    if model == 'MLP':
                       df_to_add_c = pd.read_csv(file_path).iloc[60:, 1:].abs()
                    else:
                       df_to_add_c = pd.read_csv(file_path).iloc[60:, 1:].abs()
                    df_full_cost_selection = pd.concat([df_full_cost_selection, df_to_add_c], ignore_index=True)
           
                
           for col in ['DI_gender', 'DI_age', 'DI_race']:
             df_full_result_selection[col] = df_full_result_selection[col].apply(lambda x: abs((1 - x)/(1 + x)))

           ### Calcul de valeurs moyenne pour le full 
           result_sel_avg = df_full_result_selection.mean()
           cost_sel_avg = df_full_cost_selection.mean()
           cost_time_sel_avg = df_full_cost_time_selection.mean()

           ## Faire les t-test
           # Cost
           # time
           test_time = evaluate_score(df_full_cost_time_selection['Full_training_time'].to_numpy(), df_full_cost_time['Full_training_time'].to_numpy()) 
           
           # Model quality
           # accuracy
           test_acc = evaluate_score(df_full_result['Accuracy'].to_numpy(), df_full_result_selection['Accuracy'].to_numpy()) 
           # F1-score
           test_f1 = evaluate_score(df_full_result['F1_score'].to_numpy(), df_full_result_selection['F1_score'].to_numpy()) 
           #precision
           test_precision = evaluate_score(df_full_result['Precision'].to_numpy(), df_full_result_selection['Precision'].to_numpy()) 
           # recall 
           test_recall = evaluate_score(df_full_result['Recall'].to_numpy(), df_full_result_selection['Recall'].to_numpy()) 

           # Fairness
           # SPD gender
           test_spd_g = evaluate_score(df_full_result_selection['SPD_gender'].to_numpy(), df_full_result['SPD_gender'].to_numpy())
           # EOD gender
           test_eod_g = evaluate_score(df_full_result_selection['EOD_gender'].to_numpy(), df_full_result['EOD_gender'].to_numpy())
           # AOD gender
           test_aod_g = evaluate_score(df_full_result_selection['AOD_gender'].to_numpy(), df_full_result['AOD_gender'].to_numpy())
           # DI gender
           test_di_g = evaluate_score(df_full_result_selection['DI_gender'].to_numpy(), df_full_result['DI_gender'].to_numpy())
           # DcI gender
           test_dci_g = evaluate_score(df_full_result_selection['DcI_gender'].to_numpy(), df_full_result['DcI_gender'].to_numpy())

           # SPD age
           test_spd_a = evaluate_score(df_full_result_selection['SPD_age'].to_numpy(), df_full_result['SPD_age'].to_numpy())
           # EOD gender
           test_eod_a = evaluate_score(df_full_result_selection['EOD_age'].to_numpy(), df_full_result['EOD_age'].to_numpy())
           # AOD gender
           test_aod_a = evaluate_score(df_full_result_selection['AOD_age'].to_numpy(), df_full_result['AOD_age'].to_numpy())
           # DI gender
           test_di_a = evaluate_score(df_full_result_selection['DI_age'].to_numpy(), df_full_result['DI_age'].to_numpy())
           # DcI gender
           test_dci_a = evaluate_score(df_full_result_selection['DcI_age'].to_numpy(), df_full_result['DcI_age'].to_numpy())

           # SPD race
           test_spd_r = evaluate_score(df_full_result_selection['SPD_race'].to_numpy(), df_full_result['SPD_race'].to_numpy())
           # EOD race
           test_eod_r = evaluate_score(df_full_result_selection['EOD_race'].to_numpy(), df_full_result['EOD_race'].to_numpy())
           # AOD race
           test_aod_r = evaluate_score(df_full_result_selection['AOD_race'].to_numpy(), df_full_result['AOD_race'].to_numpy())
           # DI race
           test_di_r = evaluate_score(df_full_result_selection['DI_race'].to_numpy(), df_full_result['DI_race'].to_numpy())
           # DcI race
           test_dci_r = evaluate_score(df_full_result_selection['DcI_race'].to_numpy(), df_full_result['DcI_race'].to_numpy())



           avg_sel_data = pd.DataFrame({
            **{col + '_avg': [result_sel_avg[col]] for col in result_sel_avg.index},
            **{col + '_avg': [cost_sel_avg[col]] for col in cost_sel_avg.index},
            **{col + '_avg': [cost_time_sel_avg[col]] for col in cost_time_sel_avg.index},
           'dataset': [dataset],
           'model': [model],
           'system': [system],
           'ratio': [directory_path_2.split('_')[-1]],
           'test_time' : [test_time], 
           'test_acc' : [test_acc], 
           'test_f1' : [test_f1], 
           'test_precision' : [test_precision], 
           'test_recall' : [test_recall],
           'test_SPD_gender' : [test_spd_g], 
           'test_EOD_gender' : [test_eod_g], 
           'test_AOD_gender' : [test_aod_g], 
           'test_DI_gender' : [test_di_g], 
           'test_DcI_gender' : [test_dci_g], 

           'test_SPD_age' : [test_spd_a],
           'test_EOD_age' : [test_eod_a],
           'test_AOD_age' : [test_aod_a], 
           'test_DI_age' : [test_di_a], 
           'test_DcI_age' : [test_dci_a], 

           'test_SPD_race' : [test_spd_r],
           'test_EOD_race' : [test_eod_r],
           'test_AOD_race' : [test_aod_r], 
           'test_DI_race' : [test_di_r],
           'test_DcI_race' : [test_dci_r],
            })
           
           df_average_results = pd.concat([df_average_results, avg_sel_data], ignore_index=True)




 
result_path = join(root, "test", f"{dataset}_ttest_results_epochs_intervals.csv")
df_average_results.to_csv(result_path, index=False)



**********************
Logreg


  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)


**********************
MLP


  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)


**********************
SVM


  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)


DC dataset

In [5]:
import os
import pandas as pd
import numpy as np
# Define paths and initialize variables
root = "../../results/"
systems = ["CRAIGPB", "GLISTERPB", "GradMatchPB", "Random"]
dataset = "dc" 
ratios = ["0.05", "0.1", "0.2", "0.3"]
models = ['Logreg', 'MLP', 'SVM']

systems_path = [join(root, f"{dataset}-selection", sys) for sys in systems]
ratio_path = [f"{dataset}_{ratio}" for ratio in ratios]

directory_full = join(root, f"{dataset}-selection", "Full", f"{dataset}_1")


# Initialize full result and cost DataFrames
df_full_result = pd.DataFrame()
df_full_cost = pd.DataFrame()
df_full_cost_time = pd.DataFrame()

i = 0
files = []

df_average_results = pd.DataFrame()

# Load full system results for comparisons
for model in models: 

    df_full_result = pd.DataFrame()
    df_full_cost = pd.DataFrame()
    df_full_cost_time = pd.DataFrame()

    for filename in os.listdir(directory_full):
        if "fair_metrics_"+ model in filename :
           file_path = os.path.join(directory_full, filename)
           if model == 'MLP':
               df_r = pd.read_csv(file_path).iloc[100:, 1:]
           elif model == 'SVM':
               df_r = pd.read_csv(file_path).iloc[100:, 1:]
           else:
               df_r = pd.read_csv(file_path).iloc[260:, 1:]
           df_full_result = pd.concat([df_full_result, df_r.abs()], ignore_index=True)

        if "cost_metrics_" + model in filename and filename.endswith("_0.csv"):
            file_path = os.path.join(directory_full, filename)
            if model == 'MLP':
               df_ct = pd.read_csv(file_path).iloc[100:, 1:]
            elif model == 'SVM':
               df_ct = pd.read_csv(file_path).iloc[100:, 1:]
            else:
               df_ct = pd.read_csv(file_path).iloc[260:, 1:]
            df_full_cost_time = pd.concat([df_full_cost_time, df_ct.abs()], ignore_index=True)

        if "cost_metrics_" + model in filename :
            file_path = os.path.join(directory_full, filename)
            if model == 'MLP':
               df_c = pd.read_csv(file_path).iloc[100:, 1:]
            elif model == 'SVM':
               df_c = pd.read_csv(file_path).iloc[100:, 1:]
            else:
               df_c = pd.read_csv(file_path).iloc[260:, 1:]
            df_full_cost = pd.concat([df_full_cost, df_c.abs()], ignore_index=True)

    print('**********************')
    print(model)

    for col in ['DI_gender', 'DI_age']:
      df_full_result[col] = df_full_result[col].apply(lambda x: abs((1 - x)/(1 + x)))
    

    ### Calcul de valeurs moyenne pour le full 
    result_avg = df_full_result.mean()
    cost_avg = df_full_cost.mean()
    cost_time_avg = df_full_cost_time.mean()

    # Ligne de valeurs moyennes de full 
    avg_data = pd.DataFrame({
        **{col + '_avg': [result_avg[col]] for col in result_avg.index},
        **{col + '_avg': [cost_avg[col]] for col in cost_avg.index},
        **{col + '_avg': [cost_time_avg[col]] for col in cost_time_avg.index},
        'dataset': [dataset],
        'model': [model],
        'system': ['Full'],
        'ratio': [1],
    })

    df_average_results = pd.concat([df_average_results, avg_data], ignore_index=True)

    # Lire les fichiers de système de selection par système et par ratio

    for directory_path_1 in systems_path:
       for directory_path_2 in ratio_path:
           directory = join(directory_path_1, directory_path_2)
           system = directory_path_1.split('/')[-1]           
        
           df_full_result_selection = pd.DataFrame()
           df_full_cost_selection = pd.DataFrame()
           df_full_cost_time_selection = pd.DataFrame()
        
           
           # Cost file processing
           for filename in os.listdir(directory):
                if "fair_metrics_" + model in filename:
                    file_path = os.path.join(directory, filename)
                    if model == 'MLP': 
                        df_to_add_f = pd.read_csv(file_path).iloc[100:, 1:].abs()
                    elif model == 'SVM':
                        df_to_add_f = pd.read_csv(file_path).iloc[100:, 1:].abs()
                    else:
                        df_to_add_f = pd.read_csv(file_path).iloc[260:, 1:].abs()
                    df_full_result_selection = pd.concat([df_full_result_selection, df_to_add_f], ignore_index=True)
                    

                if (("cost_metrics_" + model in filename) and (filename.endswith("_0.csv"))):
                    file_path = os.path.join(directory, filename)
                    if model == 'MLP': 
                        df_to_add_t = pd.read_csv(file_path).iloc[100:, 1:].abs()
                    elif model == 'SVM':
                        df_to_add_t = pd.read_csv(file_path).iloc[100:, 1:].abs()
                    else: 
                        df_to_add_t = pd.read_csv(file_path).iloc[260:, 1:].abs()
                    df_full_cost_time_selection = pd.concat([df_full_cost_time_selection, df_to_add_t], ignore_index=True)
    

                

                if "cost_metrics_" + model in filename:
                    file_path = os.path.join(directory, filename)
                    if model == 'MLP': 
                        df_to_add_c = pd.read_csv(file_path).iloc[100:, 1:].abs()
                    elif model == 'SVM':
                        df_to_add_c = pd.read_csv(file_path).iloc[100:, 1:].abs()
                    else:
                        df_to_add_c = pd.read_csv(file_path).iloc[260:, 1:].abs()
                    df_full_cost_selection = pd.concat([df_full_cost_selection, df_to_add_c], ignore_index=True)
           
                
           for col in ['DI_gender', 'DI_age']:
             df_full_result_selection[col] = df_full_result_selection[col].apply(lambda x: abs((1 - x)/(1 + x)))
           
           ### Calcul de valeurs moyenne pour le full 
           result_sel_avg = df_full_result_selection.mean()
           cost_sel_avg = df_full_cost_selection.mean()
           cost_time_sel_avg = df_full_cost_time_selection.mean()

           ## Faire les t-test
           # Cost
           # time
           test_time = evaluate_score(df_full_cost_time_selection['Full_training_time'].to_numpy(), df_full_cost_time['Full_training_time'].to_numpy()) 
           
           # Model quality
           # accuracy
           test_acc = evaluate_score(df_full_result['Accuracy'].to_numpy(), df_full_result_selection['Accuracy'].to_numpy()) 
           # F1-score
           test_f1 = evaluate_score(df_full_result['F1_score'].to_numpy(), df_full_result_selection['F1_score'].to_numpy()) 
           #precision
           test_precision = evaluate_score(df_full_result['Precision'].to_numpy(), df_full_result_selection['Precision'].to_numpy()) 
           # recall 
           test_recall = evaluate_score(df_full_result['Recall'].to_numpy(), df_full_result_selection['Recall'].to_numpy()) 

           # Fairness
           # SPD gender
           test_spd_g = evaluate_score(df_full_result_selection['SPD_gender'].to_numpy(), df_full_result['SPD_gender'].to_numpy())
           # EOD gender
           test_eod_g = evaluate_score(df_full_result_selection['EOD_gender'].to_numpy(), df_full_result['EOD_gender'].to_numpy())
           # AOD gender
           test_aod_g = evaluate_score(df_full_result_selection['AOD_gender'].to_numpy(), df_full_result['AOD_gender'].to_numpy())
           # DI gender
           test_di_g = evaluate_score(df_full_result_selection['DI_gender'].to_numpy(), df_full_result['DI_gender'].to_numpy())
           # DcI gender
           test_dci_g = evaluate_score(df_full_result_selection['DcI_gender'].to_numpy(), df_full_result['DcI_gender'].to_numpy())

           # SPD age
           test_spd_a = evaluate_score(df_full_result_selection['SPD_age'].to_numpy(), df_full_result['SPD_age'].to_numpy())
           # EOD age
           test_eod_a = evaluate_score(df_full_result_selection['EOD_age'].to_numpy(), df_full_result['EOD_age'].to_numpy())
           # AOD age
           test_aod_a = evaluate_score(df_full_result_selection['AOD_age'].to_numpy(), df_full_result['AOD_age'].to_numpy())
           # DI age
           test_di_a = evaluate_score(df_full_result_selection['DI_age'].to_numpy(), df_full_result['DI_age'].to_numpy())
           # DcI age
           test_dci_a = evaluate_score(df_full_result_selection['DcI_age'].to_numpy(), df_full_result['DcI_age'].to_numpy())

        



           avg_sel_data = pd.DataFrame({
            **{col + '_avg': [result_sel_avg[col]] for col in result_sel_avg.index},
            **{col + '_avg': [cost_sel_avg[col]] for col in cost_sel_avg.index},
            **{col + '_avg': [cost_time_sel_avg[col]] for col in cost_time_sel_avg.index},
           'dataset': [dataset],
           'model': [model],
           'system': [system],
           'ratio': [directory_path_2.split('_')[-1]],
           'test_time' : [test_time], 
           'test_acc' : [test_acc], 
           'test_f1' : [test_f1], 
           'test_precision' : [test_precision], 
           'test_recall' : [test_recall],
           'test_SPD_gender' : [test_spd_g], 
           'test_EOD_gender' : [test_eod_g], 
           'test_AOD_gender' : [test_aod_g], 
           'test_DI_gender' : [test_di_g], 
           'test_DcI_gender' : [test_dci_g], 

           'test_SPD_age' : [test_spd_a],
           'test_EOD_age' : [test_eod_a],
           'test_AOD_age' : [test_aod_a], 
           'test_DI_age' : [test_di_a], 
           'test_DcI_age' : [test_dci_a], 

           'test_SPD_race' : ' ',
           'test_EOD_race' : ' ',
           'test_AOD_race' : ' ', 
           'test_DI_race' : ' ',
           'test_DcI_race' : ' ',
            })
           
           df_average_results = pd.concat([df_average_results, avg_sel_data], ignore_index=True)




 

result_path = join(root, "test", f"{dataset}_ttest_results_epochs_intervals.csv")
df_average_results.to_csv(result_path, index=False)



**********************
Logreg


  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)


**********************
MLP


  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)


**********************
SVM


  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)


MobiAct dataset

In [6]:
import os
import pandas as pd
import numpy as np
# Define paths and initialize variables
root = "../../results/"
systems = ["CRAIGPB", "GLISTERPB", "GradMatchPB", "Random"]
dataset = "mobiact" 
ratios = ["0.05", "0.1", "0.2", "0.3"]
models = ['MLP']

systems_path = [join(root, f"{dataset}-selection", sys) for sys in systems]
ratio_path = [f"{dataset}_{ratio}" for ratio in ratios]

directory_full = join(root, f"{dataset}-selection", "Full", f"{dataset}_1")

# Initialize full result and cost DataFrames
df_full_result = pd.DataFrame()
df_full_cost = pd.DataFrame()
df_full_cost_time = pd.DataFrame()

i = 0
files = []

df_average_results = pd.DataFrame()

# Load full system results for comparisons
for model in models: 

    df_full_result = pd.DataFrame()
    df_full_cost = pd.DataFrame()
    df_full_cost_time = pd.DataFrame()

    for filename in os.listdir(directory_full):
        if "fair_metrics_" in filename :
           file_path = os.path.join(directory_full, filename)
           print(file_path)
           df_r = pd.read_csv(file_path).iloc[280:, 1:]
           df_full_result = pd.concat([df_full_result, df_r.abs()], ignore_index=True)

        if "cost_metrics_"  in filename and filename.endswith("_0.csv"):
            file_path = os.path.join(directory_full, filename)
            df_ct = pd.read_csv(file_path).iloc[280:, 1:]
            df_full_cost_time = pd.concat([df_full_cost_time, df_ct.abs()], ignore_index=True)

        if "cost_metrics_"  in filename :
            file_path = os.path.join(directory_full, filename)
            df_c = pd.read_csv(file_path).iloc[280:, 1:]
            df_full_cost = pd.concat([df_full_cost, df_c.abs()], ignore_index=True)

    print('**********************')
    print(model)

    for col in ['DI_gender', 'DI_age']:
      df_full_result[col] = df_full_result[col].apply(lambda x: abs((1 - x)/(1 + x)))

    ### Calcul de valeurs moyenne pour le full 
    result_avg = df_full_result.mean()
    cost_avg = df_full_cost.mean()
    cost_time_avg = df_full_cost_time.mean()

    # Ligne de valeurs moyennes de full 
    avg_data = pd.DataFrame({
        **{col + '_avg': [result_avg[col]] for col in result_avg.index},
        **{col + '_avg': [cost_avg[col]] for col in cost_avg.index},
        **{col + '_avg': [cost_time_avg[col]] for col in cost_time_avg.index},
        'dataset': [dataset],
        'model': [model],
        'system': ['Full'],
        'ratio': [1],
    })

    df_average_results = pd.concat([df_average_results, avg_data], ignore_index=True)

    # Lire les fichiers de système de selection par système et par ratio

    for directory_path_1 in systems_path:
       for directory_path_2 in ratio_path:
           directory = join(directory_path_1, directory_path_2)
           system = directory_path_1.split('/')[-1]           
        
           df_full_result_selection = pd.DataFrame()
           df_full_cost_selection = pd.DataFrame()
           df_full_cost_time_selection = pd.DataFrame()
        
           
           # Cost file processing
           for filename in os.listdir(directory):
                if "fair_metrics_"  in filename:
                    file_path = os.path.join(directory, filename)
                    df_to_add_f = pd.read_csv(file_path).iloc[280:, 1:].abs()
                    df_full_result_selection = pd.concat([df_full_result_selection, df_to_add_f], ignore_index=True)
                    

                if (("cost_metrics_"  in filename) and (filename.endswith("_0.csv"))):
                    file_path = os.path.join(directory, filename)
                    df_to_add_t = pd.read_csv(file_path).iloc[280:, 1:].abs()
                    df_full_cost_time_selection = pd.concat([df_full_cost_time_selection, df_to_add_t], ignore_index=True)
    

                

                if "cost_metrics_"  in filename:
                    file_path = os.path.join(directory, filename)
                    df_to_add_c = pd.read_csv(file_path).iloc[280:, 1:].abs()
                    df_full_cost_selection = pd.concat([df_full_cost_selection, df_to_add_c], ignore_index=True)
           
                
           for col in ['DI_gender', 'DI_age']:
             df_full_result_selection[col] = df_full_result_selection[col].apply(lambda x: abs((1 - x)/(1 + x)))
    
           ### Calcul de valeurs moyenne pour le full 
           result_sel_avg = df_full_result_selection.mean()
           cost_sel_avg = df_full_cost_selection.mean()
           cost_time_sel_avg = df_full_cost_time_selection.mean()

           ## Faire les t-test
           # Cost
           # time
           test_time = evaluate_score(df_full_cost_time_selection['Full_training_time'].to_numpy(), df_full_cost_time['Full_training_time'].to_numpy()) 
           
           # Model quality
           # accuracy
           test_acc = evaluate_score(df_full_result['Accuracy'].to_numpy(), df_full_result_selection['Accuracy'].to_numpy()) 
           # F1-score
           test_f1 = evaluate_score(df_full_result['F1_score'].to_numpy(), df_full_result_selection['F1_score'].to_numpy()) 
           #precision
           test_precision = evaluate_score(df_full_result['Precision'].to_numpy(), df_full_result_selection['Precision'].to_numpy()) 
           # recall 
           test_recall = evaluate_score(df_full_result['Recall'].to_numpy(), df_full_result_selection['Recall'].to_numpy()) 

           # Fairness
           # SPD gender
           test_spd_g = evaluate_score(df_full_result_selection['SPD_gender'].to_numpy(), df_full_result['SPD_gender'].to_numpy())
           # EOD gender
           test_eod_g = evaluate_score(df_full_result_selection['EOD_gender'].to_numpy(), df_full_result['EOD_gender'].to_numpy())
           # AOD gender
           test_aod_g = evaluate_score(df_full_result_selection['AOD_gender'].to_numpy(), df_full_result['AOD_gender'].to_numpy())
           # DI gender
           test_di_g = evaluate_score(df_full_result_selection['DI_gender'].to_numpy(), df_full_result['DI_gender'].to_numpy())
           # DcI gender
           test_dci_g = evaluate_score(df_full_result_selection['DcI_gender'].to_numpy(), df_full_result['DcI_gender'].to_numpy())

           # SPD age
           test_spd_a = evaluate_score(df_full_result_selection['SPD_age'].to_numpy(), df_full_result['SPD_age'].to_numpy())
           # EOD age
           test_eod_a = evaluate_score(df_full_result_selection['EOD_age'].to_numpy(), df_full_result['EOD_age'].to_numpy())
           # AOD age
           test_aod_a = evaluate_score(df_full_result_selection['AOD_age'].to_numpy(), df_full_result['AOD_age'].to_numpy())
           # DI age
           test_di_a = evaluate_score(df_full_result_selection['DI_age'].to_numpy(), df_full_result['DI_age'].to_numpy())
           # DcI age
           test_dci_a = evaluate_score(df_full_result_selection['DcI_age'].to_numpy(), df_full_result['DcI_age'].to_numpy())

        



           avg_sel_data = pd.DataFrame({
            **{col + '_avg': [result_sel_avg[col]] for col in result_sel_avg.index},
            **{col + '_avg': [cost_sel_avg[col]] for col in cost_sel_avg.index},
            **{col + '_avg': [cost_time_sel_avg[col]] for col in cost_time_sel_avg.index},
           'dataset': [dataset],
           'model': [model],
           'system': [system],
           'ratio': [directory_path_2.split('_')[-1]],
           'test_time' : [test_time], 
           'test_acc' : [test_acc], 
           'test_f1' : [test_f1], 
           'test_precision' : [test_precision], 
           'test_recall' : [test_recall],
           'test_SPD_gender' : [test_spd_g], 
           'test_EOD_gender' : [test_eod_g], 
           'test_AOD_gender' : [test_aod_g], 
           'test_DI_gender' : [test_di_g], 
           'test_DcI_gender' : [test_dci_g], 

           'test_SPD_age' : [test_spd_a],
           'test_EOD_age' : [test_eod_a],
           'test_AOD_age' : [test_aod_a], 
           'test_DI_age' : [test_di_a], 
           'test_DcI_age' : [test_dci_a], 

           'test_SPD_race' : ' ',
           'test_EOD_race' : ' ',
           'test_AOD_race' : ' ', 
           'test_DI_race' : ' ',
           'test_DcI_race' : ' ',
            })
           
           df_average_results = pd.concat([df_average_results, avg_sel_data], ignore_index=True)




result_path = join(root, "test", f"{dataset}_ttest_results_epochs_intervals.csv")
df_average_results.to_csv(result_path, index=False)

../../results/mobiact-selection/Full/mobiact_1/fair_metrics_MLP_Full_2.csv
../../results/mobiact-selection/Full/mobiact_1/fair_metrics_MLP_Full_3.csv
../../results/mobiact-selection/Full/mobiact_1/fair_metrics_MLP_Full_1.csv
../../results/mobiact-selection/Full/mobiact_1/fair_metrics_MLP_Full_0.csv
../../results/mobiact-selection/Full/mobiact_1/fair_metrics_MLP_Full_4.csv
**********************
MLP


  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)


CelebA 

In [7]:
import os
import pandas as pd
import numpy as np
# Define paths and initialize variables
root = "../../results/"
systems = ["CRAIGPB", "GLISTERPB", "GradMatchPB", "Random"]
dataset = "celeba" 
ratios = ["0.05", "0.1", "0.2", "0.3"]
models = ["ResNet18", "VGG"]

systems_path = [join(root, f"{dataset}-selection", sys) for sys in systems]
ratio_path = [f"{dataset}_{ratio}" for ratio in ratios]

directory_full = join(root, f"{dataset}-selection", "Full", f"{dataset}_1")
# Metrics tables
index_labels = ['negative-f', 'insignificant-f','positive-f','negative-u', 'insignificant-u','positive-u',
                'negative-c', 'insignificant-c','positive-c', 'insignificant-f-u and positive-c' ]
# Initialize influence table
cols = ['ratio', 'system', 'time', 'acc', 'F1_score', 'Precision', 'Recall', 'SPD_gender', 
        'EOD_gender', 'AOD_gender', 'DI_gender', 'DcI_gender', 'SPD_age', 'EOD_age', 
        'AOD_age', 'DI_age', 'DcI_age']
compute_influence = pd.DataFrame(columns=cols)


# Initialize full result and cost DataFrames
df_full_result = pd.DataFrame()
df_full_cost = pd.DataFrame()
df_full_cost_time = pd.DataFrame()

i = 0
files = []

df_average_results = pd.DataFrame()

# Load full system results for comparisons
for model in models: 

    df_full_result = pd.DataFrame()
    df_full_cost = pd.DataFrame()
    df_full_cost_time = pd.DataFrame()

    for filename in os.listdir(directory_full):
        if "fair_metrics_"+ model in filename :
           file_path = os.path.join(directory_full, filename)
           if model == 'ResNet18':
               df_r = pd.read_csv(file_path).iloc[130:, 1:]
           else:
               df_r = pd.read_csv(file_path).iloc[130:, 1:]
           df_full_result = pd.concat([df_full_result, df_r.abs()], ignore_index=True)

        if "cost_metrics_" + model in filename and filename.endswith("_0.csv"):
            file_path = os.path.join(directory_full, filename)
            if model == 'ResNet18':
                df_ct = pd.read_csv(file_path).iloc[130:, 1:]  
            else:
                df_ct = pd.read_csv(file_path).iloc[130:, 1:]  
            df_full_cost_time = pd.concat([df_full_cost_time, df_ct.abs()], ignore_index=True)

        if "cost_metrics_" + model in filename :
            file_path = os.path.join(directory_full, filename)
            if model == 'ResNet18':
                df_c = pd.read_csv(file_path).iloc[130:, 1:]
            else:
                df_c = pd.read_csv(file_path).iloc[130:, 1:]
            df_full_cost = pd.concat([df_full_cost, df_c.abs()], ignore_index=True)

    print('**********************')
    print(model)


    for col in ['DI_gender', 'DI_age']:
        df_full_result[col] = df_full_result[col].apply(lambda x: abs((1 - x)/(1 + x)))
    

    ### Calcul de valeurs moyenne pour le full 
    result_avg = df_full_result.mean()
    cost_avg = df_full_cost.mean()
    cost_time_avg = df_full_cost_time.mean()

    # Ligne de valeurs moyennes de full 
    avg_data = pd.DataFrame({
        **{col + '_avg': [result_avg[col]] for col in result_avg.index},
        **{col + '_avg': [cost_avg[col]] for col in cost_avg.index},
        **{col + '_avg': [cost_time_avg[col]] for col in cost_time_avg.index},
        'dataset': [dataset],
        'model': [model],
        'system': ['Full'],
        'ratio': [1],
    })

    df_average_results = pd.concat([df_average_results, avg_data], ignore_index=True)

    # Lire les fichiers de système de selection par système et par ratio

    for directory_path_1 in systems_path:
       for directory_path_2 in ratio_path:
           directory = join(directory_path_1, directory_path_2) 
           system = directory_path_1.split('/')[-1]           
        
           df_full_result_selection = pd.DataFrame()
           df_full_cost_selection = pd.DataFrame()
           df_full_cost_time_selection = pd.DataFrame()
        
           # Cost file processing
           for filename in os.listdir(directory):
                if "fair_metrics_" + model in filename:
                    file_path = os.path.join(directory, filename)
                    if model == 'SVM':
                        df_to_add_f = pd.read_csv(file_path).iloc[130:, 1:].abs()
                    else:
                        df_to_add_f = pd.read_csv(file_path).iloc[130:, 1:].abs()
                    df_full_result_selection = pd.concat([df_full_result_selection, df_to_add_f], ignore_index=True)
                    
                if (("cost_metrics_" + model in filename) and (filename.endswith("_0.csv"))):
                    file_path = os.path.join(directory, filename)
                    if model == 'SVM':
                        df_to_add_t = pd.read_csv(file_path).iloc[130:, 1:].abs()
                    else:
                        df_to_add_t = pd.read_csv(file_path).iloc[130:, 1:].abs()

                    df_full_cost_time_selection = pd.concat([df_full_cost_time_selection, df_to_add_t], ignore_index=True)
        
                if "cost_metrics_" + model in filename:
                    file_path = os.path.join(directory, filename)
                    if model == 'SVM':
                        df_to_add_c = pd.read_csv(file_path).iloc[130:, 1:].abs()
                    else:
                        df_to_add_c = pd.read_csv(file_path).iloc[130:, 1:].abs()
                    df_full_cost_selection = pd.concat([df_full_cost_selection, df_to_add_c], ignore_index=True)
           
                
           for col in ['DI_gender', 'DI_age']:
             df_full_result_selection[col] = df_full_result_selection[col].apply(lambda x: abs((1 - x)/(1 + x)))
           
           ### Calcul de valeurs moyenne pour le full 
           result_sel_avg = df_full_result_selection.mean()
           cost_sel_avg = df_full_cost_selection.mean()
           cost_time_sel_avg = df_full_cost_time_selection.mean()

           ## Faire les t-test
           # Cost
           # time
           test_time = evaluate_score(df_full_cost_time_selection['Full_training_time'].to_numpy(), df_full_cost_time['Full_training_time'].to_numpy()) 
           
           # Model quality
           # accuracy
           test_acc = evaluate_score(df_full_result['Accuracy'].to_numpy(), df_full_result_selection['Accuracy'].to_numpy()) 
           # F1-score
           test_f1 = evaluate_score(df_full_result['F1_score'].to_numpy(), df_full_result_selection['F1_score'].to_numpy()) 
           #precision
           test_precision = evaluate_score(df_full_result['Precision'].to_numpy(), df_full_result_selection['Precision'].to_numpy()) 
           # recall 
           test_recall = evaluate_score(df_full_result['Recall'].to_numpy(), df_full_result_selection['Recall'].to_numpy()) 

           # Fairness
           # SPD gender
           test_spd_g = evaluate_score(df_full_result_selection['SPD_gender'].to_numpy(), df_full_result['SPD_gender'].to_numpy())
           # EOD gender
           test_eod_g = evaluate_score(df_full_result_selection['EOD_gender'].to_numpy(), df_full_result['EOD_gender'].to_numpy())
           # AOD gender
           test_aod_g = evaluate_score(df_full_result_selection['AOD_gender'].to_numpy(), df_full_result['AOD_gender'].to_numpy())
           # DI gender
           test_di_g = evaluate_score(df_full_result_selection['DI_gender'].to_numpy(), df_full_result['DI_gender'].to_numpy())
           # DcI gender
           test_dci_g = evaluate_score(df_full_result_selection['DiscIndex_gender'].to_numpy(), df_full_result['DiscIndex_gender'].to_numpy())

           #SPD age
           test_spd_a = evaluate_score(df_full_result_selection['SPD_age'].to_numpy(), df_full_result['SPD_age'].to_numpy())
           # EOD age
           test_eod_a = evaluate_score(df_full_result_selection['EOD_age'].to_numpy(), df_full_result['EOD_age'].to_numpy())
           # AOD age
           test_aod_a = evaluate_score(df_full_result_selection['AOD_age'].to_numpy(), df_full_result['AOD_age'].to_numpy())
           # DI age
           test_di_a = evaluate_score(df_full_result_selection['DI_age'].to_numpy(), df_full_result['DI_age'].to_numpy())
           # DcI age
           test_dci_a = evaluate_score(df_full_result_selection['DiscIndex_age'].to_numpy(), df_full_result['DiscIndex_age'].to_numpy())



           avg_sel_data = pd.DataFrame({
            **{col + '_avg': [result_sel_avg[col]] for col in result_sel_avg.index},
            **{col + '_avg': [cost_sel_avg[col]] for col in cost_sel_avg.index},
            **{col + '_avg': [cost_time_sel_avg[col]] for col in cost_time_sel_avg.index},
           'dataset': [dataset],
           'model': [model],
           'system': [system],
           'ratio': [directory_path_2.split('_')[-1]],
           'test_time' : [test_time], 
           'test_acc' : [test_acc], 
           'test_f1' : [test_f1], 
           'test_precision' : [test_precision], 
           'test_recall' : [test_recall],

           'test_SPD_gender' : [test_spd_g], 
           'test_EOD_gender' : [test_eod_g], 
           'test_AOD_gender' : [test_aod_g], 
           'test_DI_gender' : [test_di_g], 
           'test_DcI_gender' : [test_dci_g], 

           'test_SPD_age' : [test_spd_a],
           'test_EOD_age' : [test_eod_a],
           'test_AOD_age' : [test_aod_a], 
           'test_DI_age' : [test_di_a], 
           'test_DcI_age' : [test_dci_a], 

           'test_SPD_race' : ' ',
           'test_EOD_race' : ' ',
           'test_AOD_race' : ' ', 
           'test_DI_race' : ' ', 
           'test_DcI_race' : ' ', 
            })
           
           df_average_results = pd.concat([df_average_results, avg_sel_data], ignore_index=True)




result_path = join(root, "test", f"{dataset}_ttest_results_epochs_intervals.csv")
df_average_results.to_csv(result_path, index=False)

**********************
ResNet18


  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)


**********************
VGG


  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)


FairFace

In [8]:
import os
import pandas as pd
import numpy as np
# Define paths and initialize variables
root = "../../results/"
systems = ["CRAIGPB", "GLISTERPB", "GradMatchPB", "Random"]
dataset = "fairface" 
ratios = ["0.05", "0.1", "0.2", "0.3"]
models = ["ResNet18", "VGG"]

systems_path = [join(root, f"{dataset}-selection", sys) for sys in systems]
ratio_path = [f"{dataset}_{ratio}" for ratio in ratios]

directory_full = join(root, f"{dataset}-selection", "Full", f"{dataset}_1")

# Metrics tables
index_labels = ['negative-f', 'insignificant-f','positive-f','negative-u', 'insignificant-u','positive-u',
                'negative-c', 'insignificant-c','positive-c', 'insignificant-f-u and positive-c' ]
# Initialize influence table
cols = ['ratio', 'system', 'time', 'acc', 'F1_score', 'Precision', 'Recall', 'SPD_gender', 
        'EOD_gender', 'AOD_gender', 'DI_gender', 'DcI_gender', 'SPD_age', 'EOD_age', 
        'AOD_age', 'DI_age', 'DcI_age']
compute_influence = pd.DataFrame(columns=cols)


# Initialize full result and cost DataFrames
df_full_result = pd.DataFrame()
df_full_cost = pd.DataFrame()
df_full_cost_time = pd.DataFrame()

i = 0
files = []

df_average_results = pd.DataFrame()

# Load full system results for comparisons
for model in models: 

    df_full_result = pd.DataFrame()
    df_full_cost = pd.DataFrame()
    df_full_cost_time = pd.DataFrame()

    for filename in os.listdir(directory_full):
        if "fair_metrics_"+ model in filename :
           file_path = os.path.join(directory_full, filename)
           if model == 'ResNet18':
               df_r = pd.read_csv(file_path).iloc[130:, 1:]
           else:
               df_r = pd.read_csv(file_path).iloc[130:, 1:]
           df_full_result = pd.concat([df_full_result, df_r.abs()], ignore_index=True)

        if "cost_metrics_" + model in filename and filename.endswith("_0.csv"):
            file_path = os.path.join(directory_full, filename)
            if model == 'ResNet18':
                df_ct = pd.read_csv(file_path).iloc[130:, 1:]  
            else:
                df_ct = pd.read_csv(file_path).iloc[130:, 1:]  
            df_full_cost_time = pd.concat([df_full_cost_time, df_ct.abs()], ignore_index=True)

        if "cost_metrics_" + model in filename :
            file_path = os.path.join(directory_full, filename)
            if model == 'ResNet18':
                df_c = pd.read_csv(file_path).iloc[130:, 1:]
            else:
                df_c = pd.read_csv(file_path).iloc[130:, 1:]
            df_full_cost = pd.concat([df_full_cost, df_c.abs()], ignore_index=True)
            print(df_full_cost.head())

    print('**********************')
    print(model)
    

    for col in ['DI_age', 'DI_race']:
        df_full_result[col] = df_full_result[col].apply(lambda x: abs((1 - x)/(1 + x)))

    ### Calcul de valeurs moyenne pour le full 
    result_avg = df_full_result.mean()
    cost_avg = df_full_cost.mean()
    cost_time_avg = df_full_cost_time.mean()

    # Ligne de valeurs moyennes de full 
    avg_data = pd.DataFrame({
        **{col + '_avg': [result_avg[col]] for col in result_avg.index},
        **{col + '_avg': [cost_avg[col]] for col in cost_avg.index},
        **{col + '_avg': [cost_time_avg[col]] for col in cost_time_avg.index},
        'dataset': [dataset],
        'model': [model],
        'system': ['Full'],
        'ratio': [1],
    })

    df_average_results = pd.concat([df_average_results, avg_data], ignore_index=True)

    # Lire les fichiers de système de selection par système et par ratio

    for directory_path_1 in systems_path:
       for directory_path_2 in ratio_path:
           directory = join(directory_path_1,directory_path_2)
           system = directory_path_1.split('/')[-1]           
        
           df_full_result_selection = pd.DataFrame()
           df_full_cost_selection = pd.DataFrame()
           df_full_cost_time_selection = pd.DataFrame()
        
           # Cost file processing
           for filename in os.listdir(directory):
                if "fair_metrics_" + model in filename:
                    file_path = os.path.join(directory, filename)
                    if model == 'ResNet18':
                        df_to_add_f = pd.read_csv(file_path).iloc[130:, 1:].abs()
                    else:
                        df_to_add_f = pd.read_csv(file_path).iloc[130:, 1:].abs()
                    df_full_result_selection = pd.concat([df_full_result_selection, df_to_add_f], ignore_index=True)
                    
                if (("cost_metrics_" + model in filename) and (filename.endswith("_0.csv"))):
                    file_path = os.path.join(directory, filename)
                    print(file_path)
                    if model == 'ResNet18':
                        df_to_add_t = pd.read_csv(file_path).iloc[130:, 1:].abs()
                    else:
                        df_to_add_t = pd.read_csv(file_path).iloc[130:, 1:].abs()

                    df_full_cost_time_selection = pd.concat([df_full_cost_time_selection, df_to_add_t], ignore_index=True)
        
                if "cost_metrics_" + model in filename:
                    file_path = os.path.join(directory, filename)
                    if model == 'ResNet18':
                        df_to_add_c = pd.read_csv(file_path).iloc[130:, 1:].abs()
                    else:
                        df_to_add_c = pd.read_csv(file_path).iloc[130:, 1:].abs()
                    df_full_cost_selection = pd.concat([df_full_cost_selection, df_to_add_c], ignore_index=True)
           
           
           for col in ['DI_age', 'DI_race']:
             df_full_result_selection[col] = df_full_result_selection[col].apply(lambda x: abs((1 - x)/(1 + x)))


           ### Calcul de valeurs moyenne pour le full 
           result_sel_avg = df_full_result_selection.mean()
           cost_sel_avg = df_full_cost_selection.mean()
           cost_time_sel_avg = df_full_cost_time_selection.mean()

           print(df_full_cost_time_selection.head())

           ## Faire les t-test
           # Cost
           # time
           test_time = evaluate_score(df_full_cost_time_selection['Full_training_time'].to_numpy(), df_full_cost_time['Full_training_time'].to_numpy()) 
           
           # Model quality
           # accuracy
           test_acc = evaluate_score(df_full_result['Accuracy'].to_numpy(), df_full_result_selection['Accuracy'].to_numpy()) 
           # F1-score
           test_f1 = evaluate_score(df_full_result['F1_score'].to_numpy(), df_full_result_selection['F1_score'].to_numpy()) 
           #precision
           test_precision = evaluate_score(df_full_result['Precision'].to_numpy(), df_full_result_selection['Precision'].to_numpy()) 
           # recall 
           test_recall = evaluate_score(df_full_result['Recall'].to_numpy(), df_full_result_selection['Recall'].to_numpy()) 

           # Fairness
           test_spd_r = evaluate_score(df_full_result_selection['SPD_race'].to_numpy(), df_full_result['SPD_race'].to_numpy())
           # EOD race
           test_eod_r = evaluate_score(df_full_result_selection['EOD_race'].to_numpy(), df_full_result['EOD_race'].to_numpy())
           # AOD race
           test_aod_r = evaluate_score(df_full_result_selection['AOD_race'].to_numpy(), df_full_result['AOD_race'].to_numpy())

           test_di_r = evaluate_score(df_full_result_selection['DI_race'].to_numpy(), df_full_result['DI_race'].to_numpy())
           # DcI age
           test_dci_r = evaluate_score(df_full_result_selection['DcI_race'].to_numpy(), df_full_result['DcI_race'].to_numpy())

           #SPD age
           test_spd_a = evaluate_score(df_full_result_selection['SPD_age'].to_numpy(), df_full_result['SPD_age'].to_numpy())
           # EOD age
           test_eod_a = evaluate_score(df_full_result_selection['EOD_age'].to_numpy(), df_full_result['EOD_age'].to_numpy())
           # AOD age
           test_aod_a = evaluate_score(df_full_result_selection['AOD_age'].to_numpy(), df_full_result['AOD_age'].to_numpy())
           # DI age
           test_di_a = evaluate_score(df_full_result_selection['DI_age'].to_numpy(), df_full_result['DI_age'].to_numpy())
           # DcI age
           test_dci_a = evaluate_score(df_full_result_selection['DcI_age'].to_numpy(), df_full_result['DcI_age'].to_numpy())



           avg_sel_data = pd.DataFrame({
            **{col + '_avg': [result_sel_avg[col]] for col in result_sel_avg.index},
            **{col + '_avg': [cost_sel_avg[col]] for col in cost_sel_avg.index},
            **{col + '_avg': [cost_time_sel_avg[col]] for col in cost_time_sel_avg.index},
           'dataset': [dataset],
           'model': [model],
           'system': [system],
           'ratio': [directory_path_2.split('_')[-1]],
           'test_time' : [test_time], 
           'test_acc' : [test_acc], 
           'test_f1' : [test_f1], 
           'test_precision' : [test_precision], 
           'test_recall' : [test_recall],

           'test_SPD_gender' : ' ', 
           'test_EOD_gender' : ' ', 
           'test_AOD_gender' : ' ', 
           'test_DI_gender' : ' ', 
           'test_DcI_gender' : ' ', 

           'test_SPD_age' : [test_spd_a],
           'test_EOD_age' : [test_eod_a],
           'test_AOD_age' : [test_aod_a], 
           'test_DI_age' : [test_di_a], 
           'test_DcI_age' : [test_dci_a], 

           'test_SPD_race' : [test_spd_r],
           'test_EOD_race' : [test_eod_r],
           'test_AOD_race' : [test_aod_r], 
           'test_DI_race' : [test_di_r],
           'test_DcI_race' : [test_dci_r],
            })
           
           df_average_results = pd.concat([df_average_results, avg_sel_data], ignore_index=True)




result_path = join(root, "test", f"{dataset}_ttest_results_epochs_intervals.csv")
df_average_results.to_csv(result_path, index=False)

   Model_training_time  Full_training_time      Loss
0            13.713502         2076.501714  0.000086
1            13.336160         2076.501714  0.000458
2            13.332054         2076.501714  0.000092
3            13.332910         2076.501714  0.000355
4            13.327102         2076.501714  0.000041
   Model_training_time  Full_training_time      Loss
0            13.713502         2076.501714  0.000086
1            13.336160         2076.501714  0.000458
2            13.332054         2076.501714  0.000092
3            13.332910         2076.501714  0.000355
4            13.327102         2076.501714  0.000041
   Model_training_time  Full_training_time      Loss
0            13.713502         2076.501714  0.000086
1            13.336160         2076.501714  0.000458
2            13.332054         2076.501714  0.000092
3            13.332910         2076.501714  0.000355
4            13.327102         2076.501714  0.000041
   Model_training_time  Full_training_time    

  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)


../../results/fairface-selection/GradMatchPB/fairface_0.1/cost_metrics_ResNet18_GradMatchPB_0.csv
   Model_training_time  Full_training_time      Loss
0             1.331688          303.441554  0.018343
1             1.328068          303.441554  0.020095
2             1.331537          303.441554  0.011384
3             1.337294          303.441554  0.008112
4             1.327590          303.441554  0.007747
../../results/fairface-selection/GradMatchPB/fairface_0.2/cost_metrics_ResNet18_GradMatchPB_0.csv
   Model_training_time  Full_training_time      Loss
0             2.648473          503.384566  0.016904
1             2.648428          503.384566  0.008046
2             2.676161          503.384566  0.012298
3             2.676300          503.384566  0.006334
4             2.718449          503.384566  0.005857
../../results/fairface-selection/GradMatchPB/fairface_0.3/cost_metrics_ResNet18_GradMatchPB_0.csv
   Model_training_time  Full_training_time      Loss
0             3.9

  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)


../../results/fairface-selection/CRAIGPB/fairface_0.1/cost_metrics_VGG_CRAIGPB_0.csv
   Model_training_time  Full_training_time      Loss
0             1.342347          319.685089  0.002246
1             1.356629          319.685089  0.002490
2             1.320164          319.685089  0.002241
3             1.334708          319.685089  0.001500
4             1.318306          319.685089  0.001428
../../results/fairface-selection/CRAIGPB/fairface_0.2/cost_metrics_VGG_CRAIGPB_0.csv
   Model_training_time  Full_training_time      Loss
0             2.634768           520.21787  0.001246
1             2.656782           520.21787  0.001051
2             2.631490           520.21787  0.001044
3             2.656668           520.21787  0.001094
4             2.637522           520.21787  0.001009
../../results/fairface-selection/CRAIGPB/fairface_0.3/cost_metrics_VGG_CRAIGPB_0.csv
   Model_training_time  Full_training_time      Loss
0             3.967357           724.36695  0.001011
1  

  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)


../../results/fairface-selection/GradMatchPB/fairface_0.3/cost_metrics_VGG_GradMatchPB_0.csv
   Model_training_time  Full_training_time      Loss
0             4.007452          703.103579  0.000394
1             4.030547          703.103579  0.001234
2             4.021805          703.103579  0.000596
3             4.013532          703.103579  0.000291
4             4.027077          703.103579  0.000364
../../results/fairface-selection/Random/fairface_0.05/cost_metrics_VGG_Random_0.csv
   Model_training_time  Full_training_time      Loss
0             0.670773          160.504532  0.003536
1             0.674973          160.504532  0.000429
2             0.672160          160.504532  0.000290
3             0.673770          160.504532  0.006743
4             0.667425          160.504532  0.000339
../../results/fairface-selection/Random/fairface_0.1/cost_metrics_VGG_Random_0.csv
   Model_training_time  Full_training_time      Loss
0             1.344417          265.225704  0.00013

  return hypotest_fun_in(*args, **kwds)


AudioMNIST


In [9]:
import os
import pandas as pd
import numpy as np
# Define paths and initialize variables
root = "../../results/"
systems = ["CRAIGPB", "GLISTERPB", "GradMatchPB", "Random"]
dataset = "audiomnist" 
ratios = ["0.05", "0.1", "0.2", "0.3"]
models = ["AudioLSTM", "AudioCNN"]

systems_path = [join(root, f"{dataset}-selection", sys) for sys in systems]
ratio_path = [f"{dataset}_{ratio}" for ratio in ratios]

directory_full = join(root, f"{dataset}-selection", "Full", f"{dataset}_1")
# Metrics tables
index_labels = ['negative-f', 'insignificant-f','positive-f','negative-u', 'insignificant-u','positive-u',
                'negative-c', 'insignificant-c','positive-c', 'insignificant-f-u and positive-c' ]
# Initialize influence table
cols = ['ratio', 'system', 'time', 'acc', 'F1_score', 'Precision', 'Recall', 'SPD_gender', 
        'EOD_gender', 'AOD_gender', 'DI_gender', 'DcI_gender', 'SPD_age', 'EOD_age', 
        'AOD_age', 'DI_age', 'DcI_age']
compute_influence = pd.DataFrame(columns=cols)


# Initialize full result and cost DataFrames
df_full_result = pd.DataFrame()
df_full_cost = pd.DataFrame()
df_full_cost_time = pd.DataFrame()

i = 0
files = []

df_average_results = pd.DataFrame()

# Load full system results for comparisons
for model in models: 

    df_full_result = pd.DataFrame()
    df_full_cost = pd.DataFrame()
    df_full_cost_time = pd.DataFrame()

    for filename in os.listdir(directory_full):
        if "fair_metrics_"+ model in filename :
           file_path = os.path.join(directory_full, filename)
           if model == 'AudioLSTM':
               df_r = pd.read_csv(file_path).iloc[130:150, 1:]
           else:
               df_r = pd.read_csv(file_path).iloc[130:150, 1:]
           df_full_result = pd.concat([df_full_result, df_r.abs()], ignore_index=True)

        if "cost_metrics_" + model in filename and filename.endswith("_0.csv"):
            file_path = os.path.join(directory_full, filename)
            if model == 'AudioLSTM':
                df_ct = pd.read_csv(file_path).iloc[130:150, 1:]  
            else:
                df_ct = pd.read_csv(file_path).iloc[130:150, 1:]  
            df_full_cost_time = pd.concat([df_full_cost_time, df_ct.abs()], ignore_index=True)

        if "cost_metrics_" + model in filename :
            file_path = os.path.join(directory_full, filename)
            if model == 'AudioLSTM':
                df_c = pd.read_csv(file_path).iloc[130:150, 1:]
            else:
                df_c = pd.read_csv(file_path).iloc[130:150, 1:]
            df_full_cost = pd.concat([df_full_cost, df_c.abs()], ignore_index=True)

    print('**********************')
    print(model)


    for col in ['DI_gender', 'DI_age']:
        df_full_result[col] = df_full_result[col].apply(lambda x: abs((1 - x)/(1 + x)))
    

    ### Calcul de valeurs moyenne pour le full 
    result_avg = df_full_result.mean()
    cost_avg = df_full_cost.mean()
    cost_time_avg = df_full_cost_time.mean()

    # Ligne de valeurs moyennes de full 
    avg_data = pd.DataFrame({
        **{col + '_avg': [result_avg[col]] for col in result_avg.index},
        **{col + '_avg': [cost_avg[col]] for col in cost_avg.index},
        **{col + '_avg': [cost_time_avg[col]] for col in cost_time_avg.index},
        'dataset': [dataset],
        'model': [model],
        'system': ['Full'],
        'ratio': [1],
    })

    df_average_results = pd.concat([df_average_results, avg_data], ignore_index=True)

    # Lire les fichiers de système de selection par système et par ratio

    for directory_path_1 in systems_path:
       for directory_path_2 in ratio_path:
           directory = join(directory_path_1, directory_path_2) 
           system = directory_path_1.split('/')[-1]           
        
           df_full_result_selection = pd.DataFrame()
           df_full_cost_selection = pd.DataFrame()
           df_full_cost_time_selection = pd.DataFrame()
        
           # Cost file processing
           for filename in os.listdir(directory):
                if "fair_metrics_" + model in filename:
                    file_path = os.path.join(directory, filename)
                    if model == 'AudioLSTM':
                        df_to_add_f = pd.read_csv(file_path).iloc[130:150, 1:].abs()
                    else:
                        df_to_add_f = pd.read_csv(file_path).iloc[130:150, 1:].abs()
                    df_full_result_selection = pd.concat([df_full_result_selection, df_to_add_f], ignore_index=True)
                    
                if (("cost_metrics_" + model in filename) and (filename.endswith("_0.csv"))):
                    file_path = os.path.join(directory, filename)
                    if model == 'AudioLSTM':
                        df_to_add_t = pd.read_csv(file_path).iloc[130:150, 1:].abs()
                    else:
                        df_to_add_t = pd.read_csv(file_path).iloc[130:150, 1:].abs()

                    df_full_cost_time_selection = pd.concat([df_full_cost_time_selection, df_to_add_t], ignore_index=True)
        
                if "cost_metrics_" + model in filename:
                    file_path = os.path.join(directory, filename)
                    if model == 'AudioLSTM':
                        df_to_add_c = pd.read_csv(file_path).iloc[130:150, 1:].abs()
                    else:
                        df_to_add_c = pd.read_csv(file_path).iloc[130:150, 1:].abs()
                    df_full_cost_selection = pd.concat([df_full_cost_selection, df_to_add_c], ignore_index=True)
           
                
           for col in ['DI_gender', 'DI_age']:
             df_full_result_selection[col] = df_full_result_selection[col].apply(lambda x: abs((1 - x)/(1 + x)))
           
           ### Calcul de valeurs moyenne pour le full 
           result_sel_avg = df_full_result_selection.mean()
           cost_sel_avg = df_full_cost_selection.mean()
           cost_time_sel_avg = df_full_cost_time_selection.mean()

           ## Faire les t-test
           # Cost
           # time
           test_time = evaluate_score(df_full_cost_time_selection['Full_training_time'].to_numpy(), df_full_cost_time['Full_training_time'].to_numpy()) 
           
           # Model quality
           # accuracy
           test_acc = evaluate_score(df_full_result['Accuracy'].to_numpy(), df_full_result_selection['Accuracy'].to_numpy()) 
           # F1-score
           test_f1 = evaluate_score(df_full_result['F1_score'].to_numpy(), df_full_result_selection['F1_score'].to_numpy()) 
           #precision
           test_precision = evaluate_score(df_full_result['Precision'].to_numpy(), df_full_result_selection['Precision'].to_numpy()) 
           # recall 
           test_recall = evaluate_score(df_full_result['Recall'].to_numpy(), df_full_result_selection['Recall'].to_numpy()) 

           # Fairness
           # SPD gender
           test_spd_g = evaluate_score(df_full_result_selection['SPD_gender'].to_numpy(), df_full_result['SPD_gender'].to_numpy())
           # EOD gender
           test_eod_g = evaluate_score(df_full_result_selection['EOD_gender'].to_numpy(), df_full_result['EOD_gender'].to_numpy())
           # AOD gender
           test_aod_g = evaluate_score(df_full_result_selection['AOD_gender'].to_numpy(), df_full_result['AOD_gender'].to_numpy())
           # DI gender
           test_di_g = evaluate_score(df_full_result_selection['DI_gender'].to_numpy(), df_full_result['DI_gender'].to_numpy())
           # DcI gender
           test_dci_g = evaluate_score(df_full_result_selection['DcI_gender'].to_numpy(), df_full_result['DcI_gender'].to_numpy())

           #SPD age
           test_spd_a = evaluate_score(df_full_result_selection['SPD_age'].to_numpy(), df_full_result['SPD_age'].to_numpy())
           # EOD age
           test_eod_a = evaluate_score(df_full_result_selection['EOD_age'].to_numpy(), df_full_result['EOD_age'].to_numpy())
           # AOD age
           test_aod_a = evaluate_score(df_full_result_selection['AOD_age'].to_numpy(), df_full_result['AOD_age'].to_numpy())
           # DI age
           test_di_a = evaluate_score(df_full_result_selection['DI_age'].to_numpy(), df_full_result['DI_age'].to_numpy())
           # DcI age
           test_dci_a = evaluate_score(df_full_result_selection['DcI_age'].to_numpy(), df_full_result['DcI_age'].to_numpy())



           avg_sel_data = pd.DataFrame({
            **{col + '_avg': [result_sel_avg[col]] for col in result_sel_avg.index},
            **{col + '_avg': [cost_sel_avg[col]] for col in cost_sel_avg.index},
            **{col + '_avg': [cost_time_sel_avg[col]] for col in cost_time_sel_avg.index},
           'dataset': [dataset],
           'model': [model],
           'system': [system],
           'ratio': [directory_path_2.split('_')[-1]],
           'test_time' : [test_time], 
           'test_acc' : [test_acc], 
           'test_f1' : [test_f1], 
           'test_precision' : [test_precision], 
           'test_recall' : [test_recall],

           'test_SPD_gender' : [test_spd_g], 
           'test_EOD_gender' : [test_eod_g], 
           'test_AOD_gender' : [test_aod_g], 
           'test_DI_gender' : [test_di_g], 
           'test_DcI_gender' : [test_dci_g], 

           'test_SPD_age' : [test_spd_a],
           'test_EOD_age' : [test_eod_a],
           'test_AOD_age' : [test_aod_a], 
           'test_DI_age' : [test_di_a], 
           'test_DcI_age' : [test_dci_a], 

           'test_SPD_race' : ' ',
           'test_EOD_race' : ' ',
           'test_AOD_race' : ' ', 
           'test_DI_race' : ' ', 
           'test_DcI_race' : ' ', 
            })
           
           df_average_results = pd.concat([df_average_results, avg_sel_data], ignore_index=True)




result_path = join(root, "test", f"{dataset}_ttest_results_epochs_intervals.csv")
df_average_results.to_csv(result_path, index=False)

**********************
AudioLSTM


  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)


**********************
AudioCNN


  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)


In [10]:
import os
import pandas as pd
import numpy as np
# Define paths and initialize variables
root = "../../results/"
systems = ["CRAIGPB", "GLISTERPB", "GradMatchPB", "Random"]
dataset = "voxceleb" 
ratios = ["0.05", "0.1", "0.2", "0.3"]
models = ["AudioLSTM", "AudioCNN"]

systems_path = [join(root, f"{dataset}-selection", sys) for sys in systems]
ratio_path = [f"{dataset}_{ratio}" for ratio in ratios]

directory_full = join(root, f"{dataset}-selection", "Full", f"{dataset}_1")

# Metrics tables
index_labels = ['negative-f', 'insignificant-f','positive-f','negative-u', 'insignificant-u','positive-u',
                'negative-c', 'insignificant-c','positive-c', 'insignificant-f-u and positive-c' ]
# Initialize influence table
cols = ['ratio', 'system', 'time', 'acc', 'F1_score', 'Precision', 'Recall', 'SPD_gender', 
        'EOD_gender', 'AOD_gender', 'DI_gender', 'DcI_gender', 'SPD_age', 'EOD_age', 
        'AOD_age', 'DI_age', 'DcI_age']
compute_influence = pd.DataFrame(columns=cols)


# Initialize full result and cost DataFrames
df_full_result = pd.DataFrame()
df_full_cost = pd.DataFrame()
df_full_cost_time = pd.DataFrame()

i = 0
files = []

df_average_results = pd.DataFrame()

# Load full system results for comparisons
for model in models: 

    df_full_result = pd.DataFrame()
    df_full_cost = pd.DataFrame()
    df_full_cost_time = pd.DataFrame()

    for filename in os.listdir(directory_full):
        if "fair_metrics_"+ model in filename :
           file_path = os.path.join(directory_full, filename)
           if model == 'AudioLSTM':
               df_r = pd.read_csv(file_path).iloc[130:150, 1:]
           else:
               df_r = pd.read_csv(file_path).iloc[130:150, 1:]
           df_full_result = pd.concat([df_full_result, df_r.abs()], ignore_index=True)

        if "cost_metrics_" + model in filename and filename.endswith("_0.csv"):
            file_path = os.path.join(directory_full, filename)
            if model == 'AudioLSTM':
                df_ct = pd.read_csv(file_path).iloc[130:150, 1:]  
            else:
                df_ct = pd.read_csv(file_path).iloc[130:150, 1:]  
            df_full_cost_time = pd.concat([df_full_cost_time, df_ct.abs()], ignore_index=True)

        if "cost_metrics_" + model in filename :
            file_path = os.path.join(directory_full, filename)
            if model == 'AudioLSTM':
                df_c = pd.read_csv(file_path).iloc[130:150, 1:]
            else:
                df_c = pd.read_csv(file_path).iloc[130:150, 1:]
            df_full_cost = pd.concat([df_full_cost, df_c.abs()], ignore_index=True)
            print(df_full_cost.head())

    print('**********************')
    print(model)
    
    for col in ['DI_race']:
        df_full_result[col] = df_full_result[col].apply(lambda x: abs((1 - x)/(1 + x)))

    ### Calcul de valeurs moyenne pour le full 
    result_avg = df_full_result.mean()
    cost_avg = df_full_cost.mean()
    cost_time_avg = df_full_cost_time.mean()

    # Ligne de valeurs moyennes de full 
    avg_data = pd.DataFrame({
        **{col + '_avg': [result_avg[col]] for col in result_avg.index},
        **{col + '_avg': [cost_avg[col]] for col in cost_avg.index},
        **{col + '_avg': [cost_time_avg[col]] for col in cost_time_avg.index},
        'dataset': [dataset],
        'model': [model],
        'system': ['Full'],
        'ratio': [1],
    })

    df_average_results = pd.concat([df_average_results, avg_data], ignore_index=True)

    # Lire les fichiers de système de selection par système et par ratio

    for directory_path_1 in systems_path:
       for directory_path_2 in ratio_path:
           directory = join(directory_path_1,directory_path_2)
           system = directory_path_1.split('/')[-1]           
        
           df_full_result_selection = pd.DataFrame()
           df_full_cost_selection = pd.DataFrame()
           df_full_cost_time_selection = pd.DataFrame()
        
           # Cost file processing
           for filename in os.listdir(directory):
                if "fair_metrics_" + model in filename:
                    file_path = os.path.join(directory, filename)
                    if model == 'AudioLSTM':
                        df_to_add_f = pd.read_csv(file_path).iloc[130:150, 1:].abs()
                    else:
                        df_to_add_f = pd.read_csv(file_path).iloc[130:150, 1:].abs()
                    df_full_result_selection = pd.concat([df_full_result_selection, df_to_add_f], ignore_index=True)
                    
                if (("cost_metrics_" + model in filename) and (filename.endswith("_0.csv"))):
                    file_path = os.path.join(directory, filename)
                    print(file_path)
                    if model == 'AudioLSTM':
                        df_to_add_t = pd.read_csv(file_path).iloc[130:150, 1:].abs()
                    else:
                        df_to_add_t = pd.read_csv(file_path).iloc[130:150, 1:].abs()

                    df_full_cost_time_selection = pd.concat([df_full_cost_time_selection, df_to_add_t], ignore_index=True)
        
                if "cost_metrics_" + model in filename:
                    file_path = os.path.join(directory, filename)
                    if model == 'AudioLSTM':
                        df_to_add_c = pd.read_csv(file_path).iloc[130:150, 1:].abs()
                    else:
                        df_to_add_c = pd.read_csv(file_path).iloc[130:150, 1:].abs()
                    df_full_cost_selection = pd.concat([df_full_cost_selection, df_to_add_c], ignore_index=True)
            

           for col in ['DI_race']:
             df_full_result_selection[col] = df_full_result_selection[col].apply(lambda x: abs((1 - x)/(1 + x)))
           
           ### Calcul de valeurs moyenne pour le full 
           result_sel_avg = df_full_result_selection.mean()
           cost_sel_avg = df_full_cost_selection.mean()
           cost_time_sel_avg = df_full_cost_time_selection.mean()

           print(df_full_cost_time_selection.head())

           ## Faire les t-test
           # Cost
           # time
           test_time = evaluate_score(df_full_cost_time_selection['Full_training_time'].to_numpy(), df_full_cost_time['Full_training_time'].to_numpy()) 
           
           # Model quality
           # accuracy
           test_acc = evaluate_score(df_full_result['Accuracy'].to_numpy(), df_full_result_selection['Accuracy'].to_numpy()) 
           # F1-score
           test_f1 = evaluate_score(df_full_result['F1_score'].to_numpy(), df_full_result_selection['F1_score'].to_numpy()) 
           #precision
           test_precision = evaluate_score(df_full_result['Precision'].to_numpy(), df_full_result_selection['Precision'].to_numpy()) 
           # recall 
           test_recall = evaluate_score(df_full_result['Recall'].to_numpy(), df_full_result_selection['Recall'].to_numpy()) 

           # Fairness
           test_spd_r = evaluate_score(df_full_result_selection['SPD_race'].to_numpy(), df_full_result['SPD_race'].to_numpy())
           # EOD race
           test_eod_r = evaluate_score(df_full_result_selection['EOD_race'].to_numpy(), df_full_result['EOD_race'].to_numpy())
           # AOD race
           test_aod_r = evaluate_score(df_full_result_selection['AOD_race'].to_numpy(), df_full_result['AOD_race'].to_numpy())

           test_di_r = evaluate_score(df_full_result_selection['DI_race'].to_numpy(), df_full_result['DI_race'].to_numpy())
           # DcI age
           test_dci_r = evaluate_score(df_full_result_selection['DcI_race'].to_numpy(), df_full_result['DcI_race'].to_numpy())




           avg_sel_data = pd.DataFrame({
            **{col + '_avg': [result_sel_avg[col]] for col in result_sel_avg.index},
            **{col + '_avg': [cost_sel_avg[col]] for col in cost_sel_avg.index},
            **{col + '_avg': [cost_time_sel_avg[col]] for col in cost_time_sel_avg.index},
           'dataset': [dataset],
           'model': [model],
           'system': [system],
           'ratio': [directory_path_2.split('_')[-1]],
           'test_time' : [test_time], 
           'test_acc' : [test_acc], 
           'test_f1' : [test_f1], 
           'test_precision' : [test_precision], 
           'test_recall' : [test_recall],

           'test_SPD_gender' : ' ', 
           'test_EOD_gender' : ' ', 
           'test_AOD_gender' : ' ', 
           'test_DI_gender' : ' ', 
           'test_DcI_gender' : ' ', 

           'test_SPD_age' : ' ',
           'test_EOD_age' : ' ',
           'test_AOD_age' : ' ', 
           'test_DI_age' : ' ', 
           'test_DcI_age' : ' ', 

           'test_SPD_race' : [test_spd_r],
           'test_EOD_race' : [test_eod_r],
           'test_AOD_race' : [test_aod_r], 
           'test_DI_race' : [test_di_r],
           'test_DcI_race' : [test_dci_r],
            })
           
           df_average_results = pd.concat([df_average_results, avg_sel_data], ignore_index=True)




result_path = join(root, "test", f"{dataset}_ttest_results_epochs_intervals.csv")
df_average_results.to_csv(result_path, index=False)

   Model_training_time  Full_training_time      Loss
0            31.414213         4821.864268  0.000769
1            31.154732         4821.864268  0.000215
2            31.335187         4821.864268  0.000058
3            31.090790         4821.864268  0.000267
4            31.114908         4821.864268  0.000465
   Model_training_time  Full_training_time      Loss
0            31.414213         4821.864268  0.000769
1            31.154732         4821.864268  0.000215
2            31.335187         4821.864268  0.000058
3            31.090790         4821.864268  0.000267
4            31.114908         4821.864268  0.000465
   Model_training_time  Full_training_time      Loss
0            31.414213         4821.864268  0.000769
1            31.154732         4821.864268  0.000215
2            31.335187         4821.864268  0.000058
3            31.090790         4821.864268  0.000267
4            31.114908         4821.864268  0.000465
   Model_training_time  Full_training_time    

  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)


../../results/voxceleb-selection/GradMatchPB/voxceleb_0.2/cost_metrics_AudioLSTM_GradMatchPB_0.csv
   Model_training_time  Full_training_time      Loss
0             6.449037         1208.215578  0.100037
1             6.427994         1208.215578  0.057706
2             6.420883         1208.215578  0.011524
3             6.447299         1208.215578  0.068133
4             6.442755         1208.215578  0.016838
../../results/voxceleb-selection/GradMatchPB/voxceleb_0.3/cost_metrics_AudioLSTM_GradMatchPB_0.csv
   Model_training_time  Full_training_time      Loss
0             9.294458         1635.173651  0.003544
1             9.370997         1635.173651  0.009210
2             9.386792         1635.173651  0.010678
3             9.422617         1635.173651  0.005467
4             9.390996         1635.173651  0.022780
../../results/voxceleb-selection/Random/voxceleb_0.05/cost_metrics_AudioLSTM_Random_0.csv
   Model_training_time  Full_training_time      Loss
0             1.541895 

  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)


../../results/voxceleb-selection/GLISTERPB/voxceleb_0.1/cost_metrics_AudioCNN_GLISTERPB_0.csv
   Model_training_time  Full_training_time      Loss
0             3.661091          795.111755  0.212507
1             3.697782          795.111755  0.245906
2             3.663695          795.111755  0.247357
3             3.662822          795.111755  0.245560
4             3.682633          795.111755  0.245729
../../results/voxceleb-selection/GLISTERPB/voxceleb_0.2/cost_metrics_AudioCNN_GLISTERPB_0.csv
   Model_training_time  Full_training_time      Loss
0             6.827720         1290.970048  0.157785
1             6.918035         1290.970048  0.178243
2             6.797033         1290.970048  0.169895
3             6.881335         1290.970048  0.168110
4             7.154806         1290.970048  0.173150
../../results/voxceleb-selection/GLISTERPB/voxceleb_0.3/cost_metrics_AudioCNN_GLISTERPB_0.csv
   Model_training_time  Full_training_time      Loss
0            10.530774       

  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)


Post-processing T-test results 

Filtering T-test results based on model utility and fairness constraints

Focus on cases where model accuracy is severely degraded, in such cases, fairness impact is considered insignificant.

In [11]:
datasets = ["ars", "dc", "kdd", "adult", "mobiact", "celeba", "fairface", 'audiomnist', "voxceleb"]
root = "../../results/"

In [14]:
import pandas as pd
from os.path import basename 

### No modification 

def false_positives(path= '../../results/results/test/ars_ttest_results_epochs_intervals.csv', x = 10):
    # Initialize the reference accuracy
    ref_accuracy = None
    # DataFrame to hold rows that are 'false better'
    false_better_df = pd.DataFrame()

    df = pd.read_csv(path) 

    # Columns to check for the value 'positive'
    columns_to_modify = ['test_SPD_gender', 'test_EOD_gender', 'test_AOD_gender', 'test_DI_gender', 'test_DcI_gender', 
                        'test_SPD_age', 'test_EOD_age', 'test_AOD_age', 'test_DI_age', 'test_DcI_age', 
                        'test_SPD_race', 'test_EOD_race', 'test_AOD_race', 'test_DI_race', 'test_DcI_race']

    for index, row in df.iterrows():
        if row['system'] == 'Full':
            ref_accuracy = row['Accuracy_avg']  # Set the reference accuracy
        elif ref_accuracy is not None:  # Ensure there is a reference to compare against
            current_accuracy = row['Accuracy_avg']
            ### Not applied  (x*100)
            if (current_accuracy <= ref_accuracy - (x*100) ) and row['ratio'] != 0.5:
                # Add row to DataFrame using pd.concat
                false_better_df = pd.concat([false_better_df, pd.DataFrame([row])], ignore_index=True)
                # Modify specific columns if their values are 'positive'
                for col in columns_to_modify:
                    if row[col] == 'positive':
                        df.at[index, col] = 'insignificant-p'


    # Save 'false better' rows to a new CSV file
    path_o= '../../results/test/false_better_' + str(x) + '_' + basename(path) 
    false_better_df.to_csv(path_o, index=False)


    # Save the modified DataFrame to a new CSV file
    path_m= '../../results/test/positive_insig_'+ str(x) + '_' + basename(path) 
    df.to_csv(path_m, index=False)

    return len(false_better_df)


In [15]:
k = 5

for d in datasets : 
    path = join(root, "test", f"{d}_ttest_results_epochs_intervals.csv")
    false_positives(path, k)

Fairness constraint: the impact is considered significant (positive or negative) if the difference between the full metric and the selection metric exceeds 1%, or 0.01 for DI.

In [16]:
import pandas as pd

def false_positives_fairness(path=None, thr_di=1, thr_other=0.01, output_false_positives=None, output=None):
    # Load the CSV file
    df = pd.read_csv(path)

    # Initialize DataFrames
    false_better_df = pd.DataFrame()

    # Columns to check for the value 'positive'
    columns_to_modify = [
        'test_SPD_gender', 'test_EOD_gender', 'test_AOD_gender', 'test_DI_gender', 'test_DcI_gender', 
        'test_SPD_age', 'test_EOD_age', 'test_AOD_age', 'test_DI_age', 'test_DcI_age', 
        'test_SPD_race', 'test_EOD_race', 'test_AOD_race', 'test_DI_race', 'test_DcI_race'
    ]

    # Columns with the average metrics
    columns_to_check = [
        'SPD_gender_avg', 'EOD_gender_avg', 'AOD_gender_avg', 'DI_gender_avg', 'DcI_gender_avg',
        'SPD_age_avg', 'EOD_age_avg', 'AOD_age_avg', 'DI_age_avg', 'DcI_age_avg',
        'SPD_race_avg', 'EOD_race_avg', 'AOD_race_avg', 'DI_race_avg', 'DcI_race_avg'
    ]

    # Initialize references dictionary
    references = {col: None for col in columns_to_check if col in df.columns}

    # Iterate through DataFrame
    for index, row in df.iterrows():
        if row['system'] == 'Full':
            references = {col: None for col in columns_to_check if col in df.columns}
            # Set the reference values for metrics when system is 'Full'
            for col in columns_to_check:
                if col in df.columns:
                    references[col] = row[col]
        else:
            # Check other rows where system is not 'Full'
            for test_col, avg_col in zip(columns_to_modify, columns_to_check):
                if test_col in df.columns and avg_col in df.columns and row[test_col] == 'positive':
                    threshold = thr_di if (('DI_gender_avg' in avg_col) or ('DI_age_avg' in avg_col) or ('DI_race_avg' in avg_col)) else thr_other  # Set threshold based on DI metrics
                    if references.get(avg_col) is not None and (references[avg_col] - row[avg_col] < threshold):
                        df.at[index, test_col] = 'insignificant-p2'
                        #if row['system'] != 'Random' and row['ratio'] != 0.5:
                        if row['ratio'] != 0.5:
                            # Add row to 'false better' DataFrame if certain conditions are met
                            false_better_df = pd.concat([false_better_df, pd.DataFrame([row])], ignore_index=True)

    # Save 'false better' rows to a new CSV file
    false_better_df.to_csv(output_false_positives, index=False)

    # Save the modified DataFrame to a new CSV file
    df.to_csv(output, index=False)

    return len(false_better_df)



In [17]:


for d in datasets : 
    file_path = join(root, "test", f"positive_insig_5_{d}_ttest_results_epochs_intervals.csv")
    result_path = join(root, "test", f"positive_insig_2_5_{d}_ttest_results_epochs_intervals.csv")
    output_path = join(root, "test", f"{d}_false_positives_with_regard_to_fairness.csv")

    size = false_positives_fairness(path=file_path, thr_di=0.01, thr_other=0.01, 
                               output_false_positives=output_path,
                               output=result_path)

    print(d, size) 

ars 79
dc 26
kdd 117
adult 57
mobiact 15
celeba 55
fairface 49
audiomnist 26
voxceleb 8


In [18]:
import pandas as pd

def concat_filter_csv(file_paths, output):
    # Définition des colonnes communes à extraire de chaque fichier
    common_columns = ['dataset', 'model', 'system', 'ratio', 'test_time', 'test_acc', 
                      'test_f1', 'test_precision', 'test_recall', 'test_SPD_gender', 
                      'test_EOD_gender', 'test_AOD_gender', 'test_DI_gender', 'test_DcI_gender', 
                      'test_SPD_age', 'test_EOD_age', 'test_AOD_age', 'test_DI_age', 'test_DcI_age', 
                      'test_SPD_race', 'test_EOD_race', 'test_AOD_race', 'test_DI_race', 'test_DcI_race']

    # Liste pour stocker les DataFrames
    df_list = []

    # Boucle sur chaque chemin de fichier dans la liste
    for path in file_paths:
        # Lecture du fichier CSV
        df = pd.read_csv(path)
        # Extraction des colonnes communes
        df_common = df[common_columns]
        # Ajout du DataFrame extrait à la liste
        df_list.append(df_common)

    # Concaténation des DataFrames
    result_df = pd.concat(df_list, ignore_index=True)

    # Remplacement de 'significant-p' par 'significant'
    result_df.replace('significant-p2', 'significant', inplace=True)
    result_df.to_csv(output + '-w-significant-p2.csv', index=False)

    # Filtrage des lignes où 'ratio' est '0.5' ou 'system' est 'Random'
    result_df = result_df[(result_df['ratio'] != 0.5) & (result_df['system'] != 'Full')]
    result_df.to_csv(output+'-2c-w-random.csv', index=False)

    result_df = result_df[(result_df['system'] != 'Random')]
    result_df.to_csv(output+'-2C-wo-random.csv', index=False)

    return result_df


In [19]:

file_paths = []
for d in datasets : 
    file_paths.append(join(root, "test", f"positive_insig_2_5_{d}_ttest_results_epochs_intervals.csv")) 

output = join(root, "test", 'ttest_5')
concat_filter_csv(file_paths, output)

Unnamed: 0,dataset,model,system,ratio,test_time,test_acc,test_f1,test_precision,test_recall,test_SPD_gender,...,test_SPD_age,test_EOD_age,test_AOD_age,test_DI_age,test_DcI_age,test_SPD_race,test_EOD_race,test_AOD_race,test_DI_race,test_DcI_race
1,ars,Logreg,CRAIGPB,0.05,positive,negative,negative,negative,negative,positive,...,,,,,,,,,,
2,ars,Logreg,CRAIGPB,0.10,positive,negative,negative,negative,negative,insignificant-p2,...,,,,,,,,,,
3,ars,Logreg,CRAIGPB,0.20,positive,negative,negative,negative,negative,insignificant,...,,,,,,,,,,
4,ars,Logreg,CRAIGPB,0.30,positive,negative,negative,negative,negative,insignificant-p2,...,,,,,,,,,,
5,ars,Logreg,GLISTERPB,0.05,positive,negative,negative,negative,negative,positive,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
348,voxceleb,AudioCNN,GLISTERPB,0.30,positive,negative,negative,negative,negative,,...,,,,,,insignificant,positive,positive,insignificant,positive
349,voxceleb,AudioCNN,GradMatchPB,0.05,positive,negative,negative,negative,negative,,...,,,,,,negative,positive,positive,negative,positive
350,voxceleb,AudioCNN,GradMatchPB,0.10,positive,negative,negative,negative,negative,,...,,,,,,negative,positive,positive,negative,positive
351,voxceleb,AudioCNN,GradMatchPB,0.20,positive,negative,negative,negative,negative,,...,,,,,,negative,positive,positive,negative,positive


Utility constraint: the impact is considered significant (positive or negative) if the difference between the full metric and the selection metric exceeds 1%.

In [20]:
import pandas as pd

def false_positives_utility(path=None,thr_acc=0.01, thr=0.01, output_false_positives=None, output=None):
    # Load the CSV file
    df = pd.read_csv(path)

    # Initialize DataFrames
    false_better_df = pd.DataFrame()

    # Columns to check for the value 'positive'
    columns_to_modify = [
        'test_acc', 'test_f1', 'test_precision', 'test_recall'
    ]

    # Columns with the average metrics
    columns_to_check = ['Accuracy_avg', 'F1_score_avg', 'Precision_avg', 'Recall_avg'
        
    ]

    
    # Initialize references dictionary
    references = {col: None for col in columns_to_check if col in df.columns}

    # Iterate through DataFrame
    for index, row in df.iterrows():
        if row['system'] == 'Full':
            references = {col: None for col in columns_to_check if col in df.columns}
            # Set the reference values for metrics when system is 'Full'
            for col in columns_to_check:
                if col in df.columns:
                    references[col] = row[col]
        else:
            # Check other rows where system is not 'Full'
            for test_col, avg_col in zip(columns_to_modify, columns_to_check):
                if test_col in df.columns and avg_col in df.columns and row[test_col] == 'positive':
                    threshold = thr_acc if (('Accuracy_avg' in avg_col)) else thr  # Set threshold based on DI metrics

                    if references.get(avg_col) is not None and (abs(references[avg_col] - row[avg_col]) < threshold):
                        df.at[index, test_col] = 'insignificant-pu'
                        #if row['system'] != 'Random' and row['ratio'] != 0.5:
                        if row['ratio'] != 0.5:
                            # Add row to 'false better' DataFrame if certain conditions are met
                            false_better_df = pd.concat([false_better_df, pd.DataFrame([row])], ignore_index=True)

    # Save 'false better' rows to a new CSV file
    false_better_df.to_csv(output_false_positives, index=False)

    # Save the modified DataFrame to a new CSV file
    df.to_csv(output, index=False)

    return len(false_better_df)



In [21]:
for d in datasets : 
    file_path = join(root, "test", f"positive_insig_2_5_{d}_ttest_results_epochs_intervals.csv")
    result_path = join(root, "test", f"positive_insig_3_5_{d}_ttest_results_epochs_intervals.csv")
    output_path = join(root, "test", f"{d}_false_positives_utility_with_regard_to_utility.csv")

    size = false_positives_utility(path=file_path, thr_acc=0.01, thr=0.01, 
                                   output_false_positives=output_path,
                                   output=result_path)

    print(d, size)

ars 7
dc 8
kdd 1
adult 5
mobiact 2
celeba 0
fairface 0
audiomnist 11
voxceleb 0


Conditions on both positive and negative impact 

In [22]:
import pandas as pd

def false_positives_fairness(path=None, thr_di=1, thr_other=0.01, output_false_positives=None, output=None):
    # Load the CSV file
    df = pd.read_csv(path)

    # Initialize DataFrames
    false_better_df = pd.DataFrame()

    # Columns to check for the value 'positive'
    columns_to_modify = [
        'test_SPD_gender', 'test_EOD_gender', 'test_AOD_gender', 'test_DI_gender', 'test_DcI_gender', 
        'test_SPD_age', 'test_EOD_age', 'test_AOD_age', 'test_DI_age', 'test_DcI_age', 
        'test_SPD_race', 'test_EOD_race', 'test_AOD_race', 'test_DI_race', 'test_DcI_race'
    ]

    # Columns with the average metrics
    columns_to_check = [
        'SPD_gender_avg', 'EOD_gender_avg', 'AOD_gender_avg', 'DI_gender_avg', 'DcI_gender_avg',
        'SPD_age_avg', 'EOD_age_avg', 'AOD_age_avg', 'DI_age_avg', 'DcI_age_avg',
        'SPD_race_avg', 'EOD_race_avg', 'AOD_race_avg', 'DI_race_avg', 'DcI_race_avg'
    ]

    # Initialize references dictionary
    references = {col: None for col in columns_to_check if col in df.columns}

    # Iterate through DataFrame
    for index, row in df.iterrows():
        if row['system'] == 'Full':
            references = {col: None for col in columns_to_check if col in df.columns}
            # Set the reference values for metrics when system is 'Full'
            for col in columns_to_check:
                if col in df.columns:
                    references[col] = row[col]
        else:
            # Check other rows where system is not 'Full'
            for test_col, avg_col in zip(columns_to_modify, columns_to_check):
                if test_col in df.columns and avg_col in df.columns and row[test_col] == 'negative':
                    threshold = thr_di if (('DI_gender_avg' in avg_col) or ('DI_age_avg' in avg_col) or ('DI_race_avg' in avg_col)) else thr_other  # Set threshold based on DI metrics
                    if references.get(avg_col) is not None and (abs(references[avg_col] - row[avg_col]) < threshold):
                        df.at[index, test_col] = 'insignificant-p3'
                        #if row['system'] != 'Random' and row['ratio'] != 0.5:
                        if row['ratio'] != 0.5:
                            # Add row to 'false better' DataFrame if certain conditions are met
                            false_better_df = pd.concat([false_better_df, pd.DataFrame([row])], ignore_index=True)

    # Save 'false better' rows to a new CSV file
    false_better_df.to_csv(output_false_positives, index=False)

    # Save the modified DataFrame to a new CSV file
    df.to_csv(output, index=False)

    return len(false_better_df)



In [23]:
for d in datasets : 
    file_path = join(root, "test", f"positive_insig_3_5_{d}_ttest_results_epochs_intervals.csv")
    result_path = join(root, "test", f"positive_insig_4_5_{d}_ttest_results_epochs_intervals.csv")
    output_path = join(root, "test", f"{d}_false_positives_with_regard_to_negative_fairness.csv")

    size = false_positives_fairness(path=file_path, thr_di=0.01, thr_other=0.01, 
                                output_false_positives=output_path,
                                output=result_path)
    
    print(d, size)


ars 94
dc 15
kdd 31
adult 34
mobiact 45
celeba 103
fairface 47
audiomnist 84
voxceleb 0


In [24]:
import pandas as pd

def false_positives_utility(path=None,thr_acc=0.01, thr=0.01, output_false_positives=None, output=None):
    # Load the CSV file
    df = pd.read_csv(path)

    # Initialize DataFrames
    false_better_df = pd.DataFrame()

    # Columns to check for the value 'positive'
    columns_to_modify = [
        'test_acc', 'test_f1', 'test_precision', 'test_recall'
    ]

    # Columns with the average metrics
    columns_to_check = ['Accuracy_avg', 'F1_score_avg', 'Precision_avg', 'Recall_avg'
        
    ]


    # Initialize references dictionary
    references = {col: None for col in columns_to_check if col in df.columns}

    # Iterate through DataFrame
    for index, row in df.iterrows():
        if row['system'] == 'Full':
            references = {col: None for col in columns_to_check if col in df.columns}
            # Set the reference values for metrics when system is 'Full'
            for col in columns_to_check:
                if col in df.columns:
                    references[col] = row[col]
        else:
            # Check other rows where system is not 'Full'
            for test_col, avg_col in zip(columns_to_modify, columns_to_check):
                if test_col in df.columns and avg_col in df.columns and row[test_col] == 'negative':
                    threshold = thr_acc if (('Accuracy_avg' in avg_col)) else thr  # Set threshold based on DI metrics

                    if references.get(avg_col) is not None and (abs(references[avg_col] - row[avg_col]) < threshold):
                        df.at[index, test_col] = 'insignificant-pu2'
                        #if row['system'] != 'Random' and row['ratio'] != 0.5:
                        if row['ratio'] != 0.5:
                            # Add row to 'false better' DataFrame if certain conditions are met
                            false_better_df = pd.concat([false_better_df, pd.DataFrame([row])], ignore_index=True)

    # Save 'false better' rows to a new CSV file
    false_better_df.to_csv(output_false_positives, index=False)

    # Save the modified DataFrame to a new CSV file
    df.to_csv(output, index=False)

    return len(false_better_df)



In [25]:
for d in datasets : 
    file_path = join(root, "test", f"positive_insig_4_5_{d}_ttest_results_epochs_intervals.csv")
    result_path = join(root, "test", f"positive_insig_5_5_{d}_ttest_results_epochs_intervals.csv")
    output_path = join(root, "test", f"{d}_false_negatives_utility_with_regard_to_utility.csv")

    size = false_positives_utility(path=file_path, thr_acc=0.01, thr=0.01, 
                                    output_false_positives=output_path,
                                    output=result_path)
    
    print(d, size)


ars 144
dc 40
kdd 59
adult 62
mobiact 11
celeba 57
fairface 1
audiomnist 67
voxceleb 18


Concat outputs 

In [26]:
import pandas as pd

def concat_filter_csv(file_paths, output):
    # Définition des colonnes communes à extraire de chaque fichier
    common_columns = ['dataset', 'model', 'system', 'ratio', 'test_time', 'test_acc', 
                      'test_f1', 'test_precision', 'test_recall', 'test_SPD_gender', 
                      'test_EOD_gender', 'test_AOD_gender', 'test_DI_gender', 'test_DcI_gender', 
                      'test_SPD_age', 'test_EOD_age', 'test_AOD_age', 'test_DI_age', 'test_DcI_age', 
                      'test_SPD_race', 'test_EOD_race', 'test_AOD_race', 'test_DI_race', 'test_DcI_race']

    # Liste pour stocker les DataFrames
    df_list = []

    # Boucle sur chaque chemin de fichier dans la liste
    for path in file_paths:
        # Lecture du fichier CSV
        df = pd.read_csv(path)
        # Extraction des colonnes communes
        df_common = df[common_columns]
        # Ajout du DataFrame extrait à la liste
        df_list.append(df_common)

    # Concaténation des DataFrames
    result_df = pd.concat(df_list, ignore_index=True)

    # Remplacement de 'significant-p' par 'significant'
    result_df.to_csv(output + '-w-significant-p-5C.csv', index=False)

    result_df.replace('insignificant-p', 'insignificant', inplace=True)

    result_df.replace('insignificant-p2', 'insignificant', inplace=True)

    result_df.replace('insignificant-p3', 'insignificant', inplace=True)

    result_df.replace('insignificant-pu', 'insignificant', inplace=True)

    result_df.replace('insignificant-pu2', 'insignificant', inplace=True)

    result_df.to_csv(output + '-wo-significant-p-5C.csv', index=False)

    # Filtrage des lignes où 'ratio' est '0.5' ou 'system' est 'Random'
    result_df = result_df[(result_df['ratio'] != 0.5) & (result_df['system'] != 'Full')]
    result_df.to_csv(output+'-5c-w-random.csv', index=False)

    result_df = result_df[(result_df['system'] != 'Random')]
    result_df.to_csv(output+'-5C-wo-random.csv', index=False)

    return result_df


In [27]:
file_paths = []
for d in datasets : 
    file_paths.append(join(root, "test", f"positive_insig_5_5_{d}_ttest_results_epochs_intervals.csv"))

output = join(root, "test", "ttest_5")
concat_filter_csv(file_paths, output)

Unnamed: 0,dataset,model,system,ratio,test_time,test_acc,test_f1,test_precision,test_recall,test_SPD_gender,...,test_SPD_age,test_EOD_age,test_AOD_age,test_DI_age,test_DcI_age,test_SPD_race,test_EOD_race,test_AOD_race,test_DI_race,test_DcI_race
1,ars,Logreg,CRAIGPB,0.05,positive,negative,negative,insignificant,negative,positive,...,,,,,,,,,,
2,ars,Logreg,CRAIGPB,0.10,positive,insignificant,insignificant,insignificant,insignificant,insignificant,...,,,,,,,,,,
3,ars,Logreg,CRAIGPB,0.20,positive,insignificant,insignificant,insignificant,insignificant,insignificant,...,,,,,,,,,,
4,ars,Logreg,CRAIGPB,0.30,positive,insignificant,insignificant,insignificant,insignificant,insignificant,...,,,,,,,,,,
5,ars,Logreg,GLISTERPB,0.05,positive,negative,negative,negative,negative,positive,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
348,voxceleb,AudioCNN,GLISTERPB,0.30,positive,negative,negative,negative,negative,,...,,,,,,insignificant,positive,positive,insignificant,positive
349,voxceleb,AudioCNN,GradMatchPB,0.05,positive,negative,negative,negative,negative,,...,,,,,,negative,positive,positive,negative,positive
350,voxceleb,AudioCNN,GradMatchPB,0.10,positive,negative,negative,negative,negative,,...,,,,,,negative,positive,positive,negative,positive
351,voxceleb,AudioCNN,GradMatchPB,0.20,positive,negative,negative,negative,negative,,...,,,,,,negative,positive,positive,negative,positive
