## Fairness  ML 

This script runs SMD and FMD for different n values to compare the solving time (Figure 2).

In [None]:
import sys

# Add the path to the FairnessML package to the system path
sys.path.append(r'..\..\src\FairnessML')
sys.path.append(r'..\..\src\RBTree') 

In [None]:
import os
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from FML_test_functions import n_num_test

In [None]:
# Change the working directory to the root of the data folder
current_path = os.getcwd()
parent_path = os.path.dirname(current_path)
parent_path2 = os.path.dirname(parent_path)
data_path = parent_path2 + '/data/adult/'

#Read Adult Data
poly_degree = 3 # poly_degree = 3,4
df = pd.read_csv(data_path + 'adult_processed_poly={}.csv'.format(poly_degree), index_col=0)

In [None]:
# change pandas dataframe to numpy array
df_np = df.to_numpy()

#Only use the first 3e4 samples

X = df_np[:,:-1] # columns other than the last column are the features
y = df_np[:,-1] # the last column is the label
n_samples = X.shape[0] # number of samples
# n_list_nt = np.linspace(10000,12000,3) # list of n values to run the experiment. You can change this to a smaller list for testing

"""
Parameter Setting for the Experiment. Modify the below listed parameters for the experiment.
    n_list_nt: list of n values to run the experiment. This should be the main parameter you may change for the experiment.
    c: Second and Third Constraint RHS
    K_nt: per-iteration sample size K
    repeats_nt: number of repeats for each n
    warm_start: warm start option for the algorithm
    feas_opt: if 1 solve only a single feasibility problem, if 0 solve feasibility problem until we have objective value that satisfies the objective tolerance
"""

n_list_nt = [5000,10000]
# set the seed for reproducibility
seed_nt = 12345 
np.random.seed(seed_nt)
c = 0.05 #Second and Third Constraint RHS

repeats_nt = 2 # number of repeats for each n
K_nt = 200 # parameter K for the algorithm
print_opt = 1 # print the output of the algorithm
warm_start = 0 # warm start option for the algorithm
feas_opt =  1 # feasibility option for the algorithm

SMD_run_time_arr = np.zeros(repeats_nt)
FMD_run_time_arr = np.zeros(repeats_nt)
SMD_real_T_arr = np.zeros(repeats_nt)
FMD_real_T_arr = np.zeros(repeats_nt)
SMD_T_arr = np.zeros(repeats_nt)
FMD_T_arr = np.zeros(repeats_nt)
SMD_solved = np.zeros(repeats_nt)
FMD_solved = np.zeros(repeats_nt)
SMD_dualgap = np.zeros(repeats_nt)
FMD_dualgap = np.zeros(repeats_nt)
SMD_dualgap_time = np.zeros(repeats_nt)
FMD_dualgap_time = np.zeros(repeats_nt)

for n_num in n_list_nt:
    for rep_idx in range(repeats_nt):
        #Create dataset randomly
        X_train, X_test, y_train, y_test = train_test_split(X, y, train_size= n_num / n_samples, shuffle=True, random_state=77)

        # Run the algorithm for different n values and save the results
        stat_SMD, stat_FMD, _ = n_num_test(int(n_num), K_nt, X_train, y_train, c, print_opt = print_opt, feas_opt = feas_opt, warm_start=warm_start)
        SMD_run_time_arr[rep_idx] = stat_SMD.total_solved_time
        FMD_run_time_arr[rep_idx] = stat_FMD.total_solved_time
        SMD_real_T_arr[rep_idx] = stat_SMD.real_T_list[0]
        FMD_real_T_arr[rep_idx] = stat_FMD.real_T_list[0]
        SMD_T_arr[rep_idx] = stat_SMD.max_iter
        FMD_T_arr[rep_idx] = stat_FMD.max_iter
        SMD_solved[rep_idx] = stat_SMD.solved
        FMD_solved[rep_idx] = stat_FMD.solved
        SMD_dualgap[rep_idx] = stat_SMD.dual_gap_list[0][0]
        FMD_dualgap[rep_idx] = stat_FMD.dual_gap_list[0][0]
        SMD_solved[rep_idx] = stat_SMD.solved
        FMD_solved[rep_idx] = stat_FMD.solved
        SMD_dualgap_time[rep_idx] = stat_SMD.dual_gap_time
        FMD_dualgap_time[rep_idx] = stat_FMD.dual_gap_time

    data_arr = np.vstack(
        (SMD_run_time_arr, SMD_real_T_arr, SMD_T_arr, SMD_solved, SMD_dualgap, SMD_dualgap_time,\
         FMD_run_time_arr, FMD_real_T_arr, FMD_T_arr, FMD_solved, FMD_dualgap, FMD_dualgap_time)).T
    df = pd.DataFrame(data_arr, columns=['SMD_Solve_time', 'SMD_Iter', 'SMD_Max_Iter', 'SMD_solved', 'SMD_gap', 'SMD_dual_time', \
                                         'FMD_Solve_time', 'FMD_Iter', 'FMD_Max_Iter','FMD_solved', 'FMD_gap', 'FMD_dual_time'])

    custom_path = '/results/FairnessML/n_num_result/n={}_K={}_poly={}.csv'.format(n_num, K_nt,poly_degree)
    save_path = parent_path2 + custom_path
    """
    df contains the following columns:
        Columns Regarding SMD:
            SMD_Solve_time: SMD solve time
            SMD_Iter: Number of iterations for SMD
            SMD_Max_Iter: Maximum number of iterations for SMD
            SMD_solved: 1 if SMD solved the problem, 0 otherwise
            SMD_gap: Last Iteration Duality gap for SMD
            SMD_dual_time: Total time Computing Duality gap for SMD
        Columns Regarding FMD:
            FMD_Solve_time: FMD solve time
            FMD_Iter: Number of iterations for FMD
            FMD_Max_Iter: Maximum number of iterations for FMD
            FMD_solved: 1 if FMD solved the problem, 0 otherwise
            FMD_gap: Last Iteration Duality gap for FMD
            FMD_dual_time: Total time Computing Duality gap for FMD
    """
    df.to_csv(save_path, index=False)


