In [11]:
#general imports

import warnings
warnings.filterwarnings('ignore')
import pandas as pd
import numpy as np
import sklearn
import scipy.optimize
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC 
import random
from lightgbm import LGBMClassifier
from sklearn.neighbors import KNeighborsClassifier

#FMCLP imports

from final_fmclp_26_09_2022 import fmclp, synthetic_dataset, cuae

In [13]:
%%time
#we run experiments 100 times

fair_diff = []
fair_ratio = []
fair_variation = []
fair_accuracy = []
unfair_diff = []
unfair_ratio = []
unfair_variation = []
unfair_accuracy = []
for i in range(100):
    d = synthetic_dataset(200,influence = False)
    cl = LGBMClassifier()
    y = d.drop('target',axis=1)
    x = d['target']
    y_train,y_test,x_train,x_test = train_test_split(y,x)
    cl.fit(y_train,x_train)

    res = fmclp(dataset = synthetic_dataset(50000, influence = False), 
               estimator = cl, 
               number_iterations = 10, 
               prefit = True, 
               interior_classifier = 'rf',
               verbose = False, 
               multiplier= 20, 
               random_state = None)
    unfair_diff.append(res['fairness_of_initial_classifier']['diff'])
    unfair_ratio.append(res['fairness_of_initial_classifier']['ratio'])
    unfair_variation.append(res['fairness_of_initial_classifier']['variation'])
    unfair_accuracy.append(res['accuracy_of_initial_classifier'])
    
    fair_diff.append(res['fairness_of_fair_classifier']['diff'])
    fair_ratio.append(res['fairness_of_fair_classifier']['ratio'])
    fair_variation.append(res['fairness_of_fair_classifier']['variation'])
    fair_accuracy.append(res['accuracy_of_fair_classifier'])
                            
        
    name = f"synthetic_independent_trials/synthetic_independent_trial_№{i+1}.txt"
    file = open(name,'w')
    file.write(f"""unfair_total_diff: {res['fairness_of_initial_classifier']['diff']}
unfair_ratio: {res['fairness_of_initial_classifier']['ratio']}
unfair_variation: {res['fairness_of_initial_classifier']['variation']}
unfair_accuracy: {res['accuracy_of_initial_classifier']}
fair_diff: {res['fairness_of_fair_classifier']['diff']}
fair_ratio: {res['fairness_of_fair_classifier']['ratio']}
fair_variation: {res['fairness_of_fair_classifier']['variation']}
fair_accuracy: {res['accuracy_of_fair_classifier']}
interior_classifier: rf 
multiplier: 20 
    """)
    file.close()       
        
    res['fairness_of_fair_classifier']['df'].to_csv(f"synthetic_independent_trials/synthetic_independent_trial_№{i+1} cuae-metric-fair.csv")
    res['fairness_of_initial_classifier']['df'].to_csv(f"synthetic_independent_trials/synthetic_independent_trial_№{i+1} cuae-metric-unfair.csv")
    print(i+1)

fair_diff = np.array(fair_diff)
fair_ratio = np.array(fair_ratio)
fair_variation = np.array(fair_variation)                         
fair_accuracy = np.array(fair_accuracy)
unfair_diff = np.array(unfair_diff)
unfair_ratio = np.array(unfair_ratio)
unfair_variation = np.array(unfair_variation)
unfair_accuracy = np.array(unfair_accuracy)
                            
                            
                            
file = open('synthetic_independent_trials/synthetic_independent.txt','w')
file.write(
f"""dataset for initial classifier training: 200 
classifier: LGBMClassifier()
number_iterations: 10
multiplier:20
interior_classifier: rf
fair_diff: {fair_diff}
fair_ratio: {fair_ratio}
fair_variation: {fair_variation}
fair_accuracy: {fair_accuracy}
unfair_diff: {unfair_diff}
unfair_ratio: {unfair_ratio}
unfair_variation: {unfair_variation}
unfair_accuracy: {unfair_accuracy}""")
file.close()

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
CPU times: user 42min 12s, sys: 5min 58s, total: 48min 10s
Wall time: 29min 35s


In [18]:
fair_diff.mean(),unfair_diff.mean()

(0.04779203302700849, 0.04545846759141258)

In [19]:
fair_ratio.mean(),unfair_ratio.mean()

(1.0653070204760475, 1.0609648006046322)

In [20]:
fair_variation.mean(),unfair_variation.mean()

(0.17606039995957645, 0.17108630958678128)

In [21]:
fair_accuracy.mean(), unfair_accuracy.mean()

(0.8353136000000001, 0.8361776000000002)