In [1]:
import numpy as np
import pandas as pd

In [2]:
from evaluate import calibrate_results, generate_everything_all_epochs

In [8]:
############################################
# select best model/epoch based on whole group performance
############################################

def select_model(exp_folder_name, epochs):
    """
    select model based on bal_acc and pr c1
    
    exp_folder_name: the output directory for the experiment
    there should be one file containing the model prediction 
    on the validation set and one file containing prediction 
    on the test set for each epoch/model
    
    cancer: type of cancer. bcs and lcs have different minority class
    """
    all_epochs = []
    
    for e in range(1, epochs+1):
        # select based on validation set, whole group threshold
        df = pd.read_csv(exp_folder_name + '/epo ' + str(e) + '/valid_whole_group_threshold_summary.csv')
        # base on whole group for now
        all_epochs.append(df.loc[df['Unnamed: 0'] == 'Whole'].values[0])
        
    cols = ['Group', 'Rec_C1', 'Prec_C1', 'PR_C1', 'F1_C1', 'Rec_C0', 'Prec_C0', 'PR_C0', 'F1_C0', 'Acc', 'Bal_Acc', 'ROC', 'threshold']
    df_all_epochs = pd.DataFrame(data=all_epochs, index=range(1, epochs+1), columns=cols)
    df_all_epochs.to_csv(exp_folder_name + '/all_epoch_metrics.csv')
    
    df_top_3 = df_all_epochs.nlargest(3, 'Bal_Acc')
    #print(df_top_3)
    df_best = df_top_3.nlargest(1, 'PR_C1')
    #print(df_best)
    best_index = df_best.index.tolist()[0]
    output = open(exp_folder_name + '/best_epoch_number.txt', 'w')
    output.write(str(best_index))
    output.close()

In [None]:
def average_5_runs(folder_name):
    """
    average the performance of 5 runs of each experiment and calculate std
    folder_name: the folder containing the files for each of the 5 runs
    """
    valid_arr = []
    test_arr = []
    
    for i in range(1, 6):
        current_folder = folder_name + '/' + folder_name.split('/')[-1] + '-' + str(i)
        
        file = open(current_folder + '/best_epoch_number.txt', 'r')
        best_epo = file.read()
        df = pd.read_csv(current_folder + '/epo ' + best_epo + '/valid_whole_group_threshold_summary.csv')
        df_test = pd.read_csv(current_folder + '/epo ' + best_epo + '/test_whole_group_threshold_summary.csv')
        
        df = df.drop(columns=['Unnamed: 0'])
        df_test = df_test.drop(columns=['Unnamed: 0'])
        
        valid_arr.append(df.values.flatten())
        test_arr.append(df_test.values.flatten())
        
    valid_mean = np.mean(valid_arr, axis=0)
    #valid_max = np.amax(valid_arr, axis=0)
    #valid_min = np.amin(valid_arr, axis=0)
    valid_std = np.std(valid_arr, axis=0)

    valid_mean = valid_mean.reshape(15, 12)
    #valid_max = valid_max.reshape(3, 12)
    #valid_min = valid_min.reshape(3, 12)
    valid_std = valid_std.reshape(15, 12)
    
    test_mean = np.mean(test_arr, axis=0)
    #test_max = np.amax(test_arr, axis=0)
    #test_min = np.amin(test_arr, axis=0)
    test_std = np.std(test_arr, axis=0)

    test_mean = test_mean.reshape(15, 12)
    #test_max = test_max.reshape(3, 12)
    #test_min = test_min.reshape(3, 12)
    test_std = test_std.reshape(15, 12)

    cols = ['Rec_C1', 'Prec_C1', 'PR_C1', 'F1_C1', 'Rec_C0', 'Prec_C0', 'PR_C0', 'F1_C0', 'Acc', 'Bal_Acc', 'ROC', 'threshold']
    rows = ['Whole', 'Gender_Male', 'Gender_Female', 'Ethnicity_White', 'Ethnicity_Black',\
             'Ethnicity_Hispanic', 'Ethnicity_Asian',\
             'Age<30', '30<=Age<40', '40<=Age<50',\
             '50<=Age<60', '60<=Age<70',\
             '70<=Age<80', '80<=Age<90', 'Age>=90']

    df_valid_mean = pd.DataFrame(data=valid_mean, index=rows, columns=cols)
    df_valid_std = pd.DataFrame(data=valid_std, index=rows, columns=cols)
    df_test_mean = pd.DataFrame(data=test_mean, index=rows, columns=cols)
    df_test_std = pd.DataFrame(data=test_std, index=rows, columns=cols)

    df_valid_mean.to_csv(folder_name + '/valid_whole_group_threshold_summary_mean.csv')
    df_valid_std.to_csv(folder_name + '/valid_whole_group_threshold_summary_std.csv')
    df_test_mean.to_csv(folder_name + '/test_whole_group_threshold_summary_mean.csv')
    df_test_std.to_csv(folder_name + '/test_whole_group_threshold_summary_std.csv')

In [None]:
def average_5_runs_subgroup(folder_name):
    """
    average the performance of 5 runs of each experiment and calculate std
    folder_name: the folder containing the files for each of the 5 runs
    """
    valid_arr = []
    test_arr = []
    
    for i in range(1, 6):
        current_folder = folder_name + '/' + folder_name.split('/')[-1] + '-' + str(i)
        
        file = open(current_folder + '/best_epoch_number.txt', 'r')
        best_epo = file.read()
        df = pd.read_csv(current_folder + '/epo ' + best_epo + '/valid_subgroup_threshold_summary.csv')
        df_test = pd.read_csv(current_folder + '/epo ' + best_epo + '/test_subgroup_threshold_summary.csv')
        
        df = df.drop(columns=['Unnamed: 0'])
        df_test = df_test.drop(columns=['Unnamed: 0'])
        
        valid_arr.append(df.values.flatten())
        test_arr.append(df_test.values.flatten())
        
    valid_mean = np.mean(valid_arr, axis=0)
    valid_std = np.std(valid_arr, axis=0)

    valid_mean = valid_mean.reshape(15, 12)
    valid_std = valid_std.reshape(15, 12)
    
    test_mean = np.mean(test_arr, axis=0)
    test_std = np.std(test_arr, axis=0)

    test_mean = test_mean.reshape(15, 12)
    test_std = test_std.reshape(15, 12)

    cols = ['Rec_C1', 'Prec_C1', 'PR_C1', 'F1_C1', 'Rec_C0', 'Prec_C0', 'PR_C0', 'F1_C0', 'Acc', 'Bal_Acc', 'ROC', 'threshold']
    rows = ['Whole', 'Gender_Male', 'Gender_Female', 'Ethnicity_White', 'Ethnicity_Black',\
             'Ethnicity_Hispanic', 'Ethnicity_Asian',\
             'Age<30', '30<=Age<40', '40<=Age<50',\
             '50<=Age<60', '60<=Age<70',\
             '70<=Age<80', '80<=Age<90', 'Age>=90']

    df_valid_mean = pd.DataFrame(data=valid_mean, index=rows, columns=cols)
    df_valid_std = pd.DataFrame(data=valid_std, index=rows, columns=cols)
    df_test_mean = pd.DataFrame(data=test_mean, index=rows, columns=cols)
    df_test_std = pd.DataFrame(data=test_std, index=rows, columns=cols)

    df_valid_mean.to_csv(folder_name + '/valid_subgroup_threshold_summary_mean.csv')
    df_valid_std.to_csv(folder_name + '/valid_subgroup_threshold_summary_std.csv')
    df_test_mean.to_csv(folder_name + '/test_subgroup_threshold_summary_mean.csv')
    df_test_std.to_csv(folder_name + '/test_subgroup_threshold_summary_std.csv')

In [6]:
def output_5_runs(folder_name):
    """
    put results of each individual runs into 1 file
    folder_name: the folder containing the files for each of the 5 runs
    """
    valid_arr = []
    test_arr = []
    
    for i in range(1, 6):
        current_folder = folder_name + '/' + folder_name.split('/')[-1] + '-' + str(i)
        
        file = open(current_folder + '/best_epoch_number.txt', 'r')
        best_epo = file.read()
        file.close()
        
        with open(current_folder + '/epo ' + best_epo + '/valid_whole_group_threshold_summary.csv', 'r') as v:
            valid_arr.append(v.read())
        with open(current_folder + '/epo ' + best_epo + '/test_whole_group_threshold_summary.csv', 'r') as t:
            test_arr.append(t.read())
        
    with open(folder_name + '/valid_5_run_results.csv', 'w') as vout:
        vout.write('\n\n'.join(valid_arr))
        
    with open(folder_name + '/test_5_run_results.csv', 'w') as tout:
        tout.write('\n\n'.join(test_arr))

In [9]:
def output_5_runs_subgroup(folder_name):
    """
    put results of each individual runs into 1 file
    folder_name: the folder containing the files for each of the 5 runs
    """
    valid_arr = []
    test_arr = []
    
    for i in range(1, 6):
        current_folder = folder_name + '/' + folder_name.split('/')[-1] + '-' + str(i)
        
        file = open(current_folder + '/best_epoch_number.txt', 'r')
        best_epo = file.read()
        file.close()
        
        with open(current_folder + '/epo ' + best_epo + '/valid_subgroup_threshold_summary.csv', 'r') as v:
            valid_arr.append(v.read())
        with open(current_folder + '/epo ' + best_epo + '/test_subgroup_threshold_summary.csv', 'r') as t:
            test_arr.append(t.read())
        
    with open(folder_name + '/valid_5_run_results_subgroup.csv', 'w') as vout:
        vout.write('\n\n'.join(valid_arr))
        
    with open(folder_name + '/test_5_run_results_subgroup.csv', 'w') as tout:
        tout.write('\n\n'.join(test_arr))

In [None]:
"""

def average_disparity_whole_group(folder_name):
    race_arr = []
    age_arr = []
    
    for i in range(1, 6):
        current_folder = folder_name + '/' + folder_name.split('/')[-1] + '-' + str(i)
        
        file = open(current_folder + '/best_epoch_number.txt', 'r')
        best_epo = file.read()
        df = pd.read_csv(current_folder + '/epo ' + best_epo + '/test_whole_group_threshold_summary.csv')
        
        df.set_index("Unnamed: 0", inplace=True)
        
        df_race = df.loc[["Ethnicity_White", "Ethnicity_Black", "Ethnicitiy_Hispanic", "Ethnicity_Asian"]]
        df_age = df.loc[['Age<30', '30<=Age<40', '40<=Age<50', '50<=Age<60', '60<=Age<70', '70<=Age<80', '80<=Age<90', 'Age>=90']]
        
        race_ratio = []
        age_ratio = []
        for m in ['Rec_C1', 'Prec_C1', 'PR_C1', 'F1_C1', 'Rec_C0', 'Prec_C0', 'PR_C0', 'F1_C0', 'Acc', 'Bal_Acc', 'ROC']:
            race_ratio.append(df_race[m].max() / df_race[m].min())
            age_ratio.append(df_age[m].max() / df_age[m].min())
            
        race_arr.append(race_ratio)
        age_arr.append(age_ratio)
        
    race_ratio_mean = np.mean(race_arr, axis=0)
    race_ratio_std = np.mean(race_arr, axis=0)
    age_ratio_mean = np.mean(age_arr, axis=0)
    age_ratio_std = np.mean(age_arr, axis=0)
        
    cols = ['Rec_C1', 'Prec_C1', 'PR_C1', 'F1_C1', 'Rec_C0', 'Prec_C0', 'PR_C0', 'F1_C0', 'Acc', 'Bal_Acc', 'ROC']
    rows = ['Race', 'Age']

    df_mean = pd.DataFrame(data=[race_ratio_mean, age_ratio_mean], index=rows, columns=cols)
    df_std = pd.DataFrame(data=[race_ratio_std, age_ratio_std], index=rows, columns=cols)
    
    df_mean.to_csv(folder_name + '/disparity_mean.csv')
    df_std.to_csv(folder_name + '/disparity_std.csv')
"""

In [7]:
output_5_runs('../experiments/bcs-original')

In [10]:
output_5_runs_subgroup('../experiments/bcs-original')

In [8]:
output_5_runs('../experiments/bcs-smote')
output_5_runs('../experiments/bcs-under')

In [11]:
output_5_runs('../experiments/bcs-adasyn')
output_5_runs('../experiments/bcs-gamma')
output_5_runs('../experiments/bcs-over')
output_5_runs('../experiments/bcs-nearmiss1')
output_5_runs('../experiments/bcs-nearmiss3')
output_5_runs('../experiments/bcs-distant')

In [13]:
output_5_runs('../experiments/lcs-original')
output_5_runs_subgroup('../experiments/lcs-original')
output_5_runs('../experiments/lcs-smote')
output_5_runs('../experiments/lcs-under')
output_5_runs('../experiments/lcs-adasyn')
output_5_runs('../experiments/lcs-gamma')
output_5_runs('../experiments/lcs-over')
output_5_runs('../experiments/lcs-nearmiss1')
output_5_runs('../experiments/lcs-nearmiss3')
output_5_runs('../experiments/lcs-distant')

In [14]:
output_5_runs('../experiments/lcs-reweight')

In [12]:
output_5_runs('../experiments/bcs-reweight')

In [None]:
# debugging and running

In [None]:
folder = '../experiments/bcs-smote-for-loss'
calibrate_results(folder, 25)
generate_everything_all_epochs(folder, 25, 'breast')
select_model(folder, 25)

In [7]:
select_model(folder, 25)

   Group    Rec_C1   Prec_C1     PR_C1     F1_C1    Rec_C0   Prec_C0  \
2  Whole  0.642449  0.549227  0.655363  0.592192  0.923491  0.946810   
7  Whole  0.668464  0.484591  0.623770  0.561867  0.896837  0.949091   
1  Whole  0.611358  0.586606  0.663839  0.598726  0.937486  0.943261   

      PR_C0     F1_C0       Acc   Bal_Acc       ROC  threshold  
2  0.978219  0.935005  0.887879  0.782970  0.887582       0.25  
7  0.974571  0.922225  0.867899  0.782651  0.872706       0.20  
1  0.980231  0.940364  0.896161  0.774422  0.894143       0.25  
   Group    Rec_C1   Prec_C1     PR_C1     F1_C1    Rec_C0   Prec_C0  \
1  Whole  0.611358  0.586606  0.663839  0.598726  0.937486  0.943261   

      PR_C0     F1_C0       Acc   Bal_Acc       ROC  threshold  
1  0.980231  0.940364  0.896161  0.774422  0.894143       0.25  


In [None]:
folder = '../experiments/lcs-smote-for-loss'
calibrate_results(folder, 25)
generate_everything_all_epochs(folder, 25, 'lung')
select_model(folder, 25)

In [None]:
for i in range(1, 6):
    folder = '../experiments/bcs-adasyn/bcs-adasyn-' + str(i)
    calibrate_results(folder, 25)
    generate_everything_all_epochs(folder, 25, 'breast')
    select_model(folder, 25)

In [None]:
for i in range(1, 6):
    folder = '../experiments/bcs-over/bcs-over-' + str(i)
    calibrate_results(folder, 25)
    generate_everything_all_epochs(folder, 25, 'breast')
    select_model(folder, 25)

In [None]:
for i in range(1, 6):
    folder = '../experiments/bcs-smote/bcs-smote-' + str(i)
    calibrate_results(folder, 25)
    generate_everything_all_epochs(folder, 25, 'breast')
    select_model(folder, 25)

In [None]:
for i in range(1, 6):
    folder = '../experiments/bcs-under/bcs-under-' + str(i)
    calibrate_results(folder, 25)
    generate_everything_all_epochs(folder, 25, 'breast')
    select_model(folder, 25)

In [None]:
for i in range(1, 6):
    folder = '../experiments/bcs-nearmiss1/bcs-nearmiss1-' + str(i)
    calibrate_results(folder, 25)
    generate_everything_all_epochs(folder, 25, 'breast')
    select_model(folder, 25)

In [None]:
for i in range(1, 6):
    folder = '../experiments/bcs-nearmiss3/bcs-nearmiss3-' + str(i)
    calibrate_results(folder, 25)
    generate_everything_all_epochs(folder, 25, 'breast')
    select_model(folder, 25)

In [None]:
for i in range(1, 6):
    folder = '../experiments/bcs-distant/bcs-distant-' + str(i)
    calibrate_results(folder, 25)
    generate_everything_all_epochs(folder, 25, 'breast')
    select_model(folder, 25)

In [None]:
for i in range(4, 6):
    folder = '../experiments/bcs-nearmiss3/bcs-nearmiss3-' + str(i)
    calibrate_results(folder, 25)
    generate_everything_all_epochs(folder, 25, 'breast')
    select_model(folder, 25)

In [None]:
average_5_runs('../experiments/bcs-distant')

In [None]:
average_5_runs('../experiments/bcs-under')
average_5_runs('../experiments/bcs-over')
average_5_runs('../experiments/bcs-smote')
average_5_runs('../experiments/bcs-adasyn')
average_5_runs('../experiments/bcs-nearmiss1')
average_5_runs('../experiments/bcs-nearmiss3')

In [None]:
average_5_runs('../experiments/bcs-nearmiss3')

In [None]:
folder = '../experiments/bcs-original/bcs-original-1'
calibrate_results(folder, 100)
generate_everything_all_epochs(folder, 100, 'breast')
select_model(folder, 100)

In [None]:
average_5_runs('../experiments/bcs-original')

In [None]:
for i in range(1, 6):
    folder = '../experiments/lcs-under/lcs-under-' + str(i)
    calibrate_results(folder, 25)
    generate_everything_all_epochs(folder, 25, 'lung')
    select_model(folder, 25)

In [None]:
for i in range(1, 6):
    folder = '../experiments/lcs-over/lcs-over-' + str(i)
    calibrate_results(folder, 25)
    generate_everything_all_epochs(folder, 25, 'lung')
    select_model(folder, 25)

In [None]:
for i in range(1, 6):
    folder = '../experiments/lcs-nearmiss1/lcs-nearmiss1-' + str(i)
    calibrate_results(folder, 25)
    generate_everything_all_epochs(folder, 25, 'lung')
    select_model(folder, 25)

In [None]:
for i in range(1, 6):
    folder = '../experiments/lcs-nearmiss3/lcs-nearmiss3-' + str(i)
    calibrate_results(folder, 25)
    generate_everything_all_epochs(folder, 25, 'lung')
    select_model(folder, 25)

In [None]:
average_5_runs('../experiments/lcs-under')
average_5_runs('../experiments/lcs-over')

In [None]:
for i in range(1, 6):
    folder = '../experiments/lcs-smote/lcs-smote-' + str(i)
    calibrate_results(folder, 25)
    generate_everything_all_epochs(folder, 25, 'lung')
    select_model(folder, 25)

In [None]:
for i in range(1, 6):
    folder = '../experiments/lcs-adasyn/lcs-adasyn-' + str(i)
    calibrate_results(folder, 25)
    generate_everything_all_epochs(folder, 25, 'lung')
    select_model(folder, 25)

In [None]:
average_5_runs('../experiments/lcs-nearmiss1')
average_5_runs('../experiments/lcs-nearmiss3')
average_5_runs('../experiments/lcs-smote')
average_5_runs('../experiments/lcs-adasyn')

In [None]:
for i in range(1, 6):
    folder = '../experiments/lcs-original/lcs-original-' + str(i)
    calibrate_results(folder, 25)
    generate_everything_all_epochs(folder, 25, 'lung')
    select_model(folder, 25)

In [None]:
for i in range(1, 6):
    folder = '../experiments/lcs-distant/lcs-distant-' + str(i)
    calibrate_results(folder, 25)
    generate_everything_all_epochs(folder, 25, 'lung')
    select_model(folder, 25)

In [None]:
average_5_runs('../experiments/lcs-original')
average_5_runs('../experiments/lcs-distant')

In [None]:
for i in range(1, 6):
    folder = '../experiments/lcs-gamma/lcs-gamma-' + str(i)
    calibrate_results(folder, 25)
    generate_everything_all_epochs(folder, 25, 'lung')
    select_model(folder, 25)

In [None]:
for i in range(1, 6):
    folder = '../experiments/bcs-gamma/bcs-gamma-' + str(i)
    calibrate_results(folder, 25)
    generate_everything_all_epochs(folder, 25, 'breast')
    select_model(folder, 25)

In [None]:
average_5_runs('../experiments/lcs-gamma')
average_5_runs('../experiments/bcs-gamma')

In [None]:
average_5_runs_subgroup('../experiments/bcs-original')

In [None]:
average_5_runs_subgroup('../experiments/lcs-original')

In [None]:
# lcs different param eval
for w in ['30', '50']:
    for i in range(1, 6):
        folder = '../experiments/lcs-original-different-parameters/lcs-original-2-' + w + '-0.1/lcs-original-2-' + w + '-0.1-' + str(i)
        calibrate_results(folder, 25)
        generate_everything_all_epochs(folder, 25, 'lung')
        select_model(folder, 25)
    average_5_runs('../experiments/lcs-original-different-parameters/lcs-original-2-' + w + '-0.1')

In [None]:
for l in ['5', '10']:
    for w in ['20', '30', '50']:
        for i in range(1, 6):
            folder = '../experiments/lcs-original-different-parameters/lcs-original-' + l + '-' + w + '-0.1/lcs-original-' + l + '-' + w + '-0.1-' + str(i)
            calibrate_results(folder, 25)
            generate_everything_all_epochs(folder, 25, 'lung')
            select_model(folder, 25)

In [None]:
for l in ['2', '5', '10']:
    for w in ['20', '30', '50']:
        for i in range(1, 6):
            folder = '../experiments/lcs-original-different-parameters/lcs-original-' + l + '-' + w + '-0.2/lcs-original-' + l + '-' + w + '-0.2-' + str(i)
            calibrate_results(folder, 25)
            generate_everything_all_epochs(folder, 25, 'lung')
            select_model(folder, 25)

In [None]:
for l in ['2', '5', '10']:
    for w in ['20', '30', '50']:
        for i in range(1, 6):
            folder = '../experiments/lcs-original-different-parameters/lcs-original-' + l + '-' + w + '-0.3/lcs-original-' + l + '-' + w + '-0.3-' + str(i)
            calibrate_results(folder, 25)
            generate_everything_all_epochs(folder, 25, 'lung')
            select_model(folder, 25)

In [None]:
# bcs 
for w in ['30', '50']:
    for i in range(1, 6):
        folder = '../experiments/bcs-original-different-parameters/bcs-original-10-' + w + '-0.3/bcs-original-10-' + w + '-0.3-' + str(i)
        calibrate_results(folder, 25)
        generate_everything_all_epochs(folder, 25, 'breast')
        select_model(folder, 25)
    average_5_runs('../experiments/bcs-original-different-parameters/bcs-original-10-' + w + '-0.3')

In [None]:
average_5_runs('../experiments/bcs-original-different-parameters/bcs-original-10-30-0.3')

In [None]:
for i in range(1, 6):
    folder = '../experiments/bcs-original-different-parameters/bcs-original-10-50-0.3/bcs-original-10-50-0.3-' + str(i)
    calibrate_results(folder, 25)
    generate_everything_all_epochs(folder, 25, 'breast')
    select_model(folder, 25)
average_5_runs('../experiments/bcs-original-different-parameters/bcs-original-10-50-0.3')

In [None]:
for i in range(1, 6):
    folder = '../experiments/bcs-reweight/bcs-reweight-' + str(i)
    calibrate_results(folder, 25)
    generate_everything_all_epochs(folder, 25, 'breast')
    select_model(folder, 25)
average_5_runs('../experiments/bcs-reweight')

In [None]:
for i in range(1, 6):
    folder = '../experiments/lcs-reweight/lcs-reweight-' + str(i)
    calibrate_results(folder, 25)
    generate_everything_all_epochs(folder, 25, 'lung')
    select_model(folder, 25)
average_5_runs('../experiments/lcs-reweight')

In [None]:
folder = '../experiments/bcs-dpreweight-asian'
calibrate_results(folder, 25)
generate_everything_all_epochs(folder, 25, 'breast')
select_model(folder, 25)