## Fairness  ML 

This script runs multiple SMD with different K values and FMD to compare the saddle point gap of these models (Figure 1).

In [None]:
import sys

# Add the path to the FairnessML package to the system path
sys.path.append(r'..\..\src\FairnessML')
sys.path.append(r'..\..\src\RBTree') 

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math
from sklearn.model_selection import train_test_split
from FML_test_functions import K_test_time

In [None]:
# Change the working directory to the root of the data folder
current_path = os.getcwd()
parent_path = os.path.dirname(current_path)
parent_path2 = os.path.dirname(parent_path)
data_path = parent_path2 + '/data/adult/'

#Read Adult Data
poly_degree = 4 # poly_degree = 3,4
df = pd.read_csv(data_path + 'adult_processed_poly={}.csv'.format(poly_degree), index_col=0)

In [None]:
df_np = df.to_numpy()

#Only use the first 3e4 samples

X = df_np[:,:-1]
y = df_np[:,-1]
n_samples = X.shape[0]
n_sst = 45000

"""
Parameter Setting for the Experiment. Modify the below listed parameters for the experiment.
    K_list_sst: list of K values to run the experiment. This should be the main parameter you may change for the experiment. Always end with n_sst which denotes running the FMD method.
    dual_gap_freq_list: absolute frequency of checking dual gap, 250 for SMD, 50 for FMD. This should be the main parameter you may change for the experiment.
    repeats_nt: number of repeats for each n
    warm_start: warm start option for the algorithm
    feas_opt: if 1 solve only a single feasibility problem, if 0 solve feasibility problem until we have objective value that satisfies the objective tolerance
    time_cap_sst: time cap for each run
    time_freq_sst: frequency of calculating duality gap
 
"""

seed_sst = 1234
repeat_number = 3
np.random.seed(seed_sst+repeat_number)
#n_list_nt = np.linspace(10000,20000,11)
c = 0.05 #Second and Third Constraint RHS

repeats_sst = 2
K_list_sst = [100, 200, n_sst] # list of K values to test, always end with n_sst which denotes running the FMD method.
dual_gap_freq_list = [250,250,50] # absolute frequency of checking dual gap, 250 for SMD, 50 for FMD
print_opt = 1
warm_start = 0
feas_opt =  1
time_cap_sst = 120 # time cap for each run 
time_freq_sst = 5 # frequency of checking time


data_list = []
#Implement K_test_time
for rep_idx in range(repeats_sst):
    temp_list = []
    X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=n_sst / n_samples, shuffle=True,
                                                        random_state=seed_sst+repeat_number)
    for idx, K_sst in enumerate(K_list_sst):
        stat = K_test_time(n_sst, X_train, y_train, c, time_cap_sst, time_freq_sst,dual_gap_freq_list[idx], K_sst, print_opt)
        temp_list = [n_sst, poly_degree, K_sst, time_cap_sst, time_freq_sst]
        temp_list.append(stat.dual_gap_list[0])
        data_list.append(temp_list)

df = pd.DataFrame(data_list, columns=['n', 'deg', 'K', 'time cap', 'time freq', 'Avg_Dual_Gap'])
custom_path = '/results/FairnessML/K_time_result/K_test_time_n={}_poly={}_rdix={}.csv'.format(n_sst, poly_degree,
                                                                                   repeat_number)
save_path = parent_path2 + custom_path
df.to_csv(save_path, index=False)






