In [29]:
import pandas as pd
import sklearn
import random, os 
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score
from fairness_evals import positive_rates, true_postive_rates, true_negative_rate, false_postive_rates

# Data

In [30]:
data_old = pd.read_csv(os.path.join('Processed_data', 'adult_occp.csv'))

In [31]:
data_old.keys()

Index(['age', 'workclass', 'education', 'marital.status', 'occupation', 'race',
       'sex', 'hours.per.week', 'income'],
      dtype='object')

In [32]:
data_old

Unnamed: 0,age,workclass,education,marital.status,occupation,race,sex,hours.per.week,income
0,82,Private,HS-grad,Widowed,Exec-managerial,White,Female,18,0
1,54,Private,7th-8th,Divorced,Machine-op-inspct,White,Female,40,0
2,41,Private,Some-college,Separated,Prof-specialty,White,Female,40,0
3,34,Private,HS-grad,Divorced,Other-service,White,Female,45,0
4,38,Private,10th,Separated,Adm-clerical,White,Male,40,0
...,...,...,...,...,...,...,...,...,...
30157,22,Private,Some-college,Never-married,Protective-serv,White,Male,40,0
30158,27,Private,Assoc,Married-civ-spouse,Tech-support,White,Female,38,0
30159,40,Private,HS-grad,Married,Machine-op-inspct,White,Male,40,1
30160,58,Private,HS-grad,Widowed,Adm-clerical,White,Female,40,0


In [33]:
DATA = data_old 
# DATA.drop(columns=['occupation'])
DATA_NAME = 'old' #! don't forget to change this accordingly 
FEAT_OF_INT = ['education', 'marital.status', 'sex', 'income'] #The features we are interested in
DATA.head()

Unnamed: 0,age,workclass,education,marital.status,occupation,race,sex,hours.per.week,income
0,82,Private,HS-grad,Widowed,Exec-managerial,White,Female,18,0
1,54,Private,7th-8th,Divorced,Machine-op-inspct,White,Female,40,0
2,41,Private,Some-college,Separated,Prof-specialty,White,Female,40,0
3,34,Private,HS-grad,Divorced,Other-service,White,Female,45,0
4,38,Private,10th,Separated,Adm-clerical,White,Male,40,0


## Encoder for categorical values

In [34]:
from sklearn.preprocessing import OneHotEncoder
cat_feats = list(DATA.select_dtypes("object").keys())  #names of all categorical features
# cat_feats.remove('income') #keep these values as is 
print(f"{len(cat_feats)} categorical features found: {cat_feats}")

cat_feat_encoder = OneHotEncoder(sparse_output=False, drop='if_binary').set_output(transform="pandas")
cat_feat_encoder = cat_feat_encoder.fit(DATA[cat_feats])

6 categorical features found: ['workclass', 'education', 'marital.status', 'occupation', 'race', 'sex']


## Split train, dev and test

In [37]:
train, dev = train_test_split(DATA, random_state= 1989)
# test = data_2018
print(f"Train size: {len(train)}, dev size: {len(dev)}")

Train size: 22621, dev size: 7541


In [38]:
x_train = train.drop(columns=['income'])
y_train = train['income']

x_dev  = dev.drop(columns=['income'])
y_dev  = dev['income']

# x_test  = test.drop(columns=['income'])
# y_test  = test['income']

In [39]:
from Dataverwerking.data_transform import cat_to_one_hot
x_train_encoded = cat_to_one_hot(x_train, cat_feats, cat_feat_encoder)
x_dev_encoded = cat_to_one_hot(x_dev, cat_feats, cat_feat_encoder)
# x_test_encoded = cat_to_one_hot(x_test, cat_feats, cat_feat_encoder)

In [40]:
x_train_encoded

Unnamed: 0,age,hours.per.week,workclass_Federal-gov,workclass_Local-gov,workclass_Private,workclass_Self-emp-inc,workclass_Self-emp-not-inc,workclass_State-gov,workclass_Without-pay,education_10th,...,occupation_Protective-serv,occupation_Sales,occupation_Tech-support,occupation_Transport-moving,race_Amer-Indian-Eskimo,race_Asian-Pac-Islander,race_Black,race_Other,race_White,sex_Male
15483,48,40,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0
16653,53,60,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0
7635,28,40,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0
16496,22,40,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0
7664,32,60,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
455,56,50,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
15397,49,40,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0
4924,45,80,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0
18180,40,40,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0


# Logistic regression

**params logreg**
* Penalty (=L2)
* C (=1): regularization strenght -> set with dev set 
* solver: ?

In [41]:
from sklearn.linear_model import LogisticRegression
logreg = LogisticRegression(solver = 'sag', max_iter= 1000).fit(x_train_encoded, y_train)



## Dev evaluation

In [42]:
y_pred_dev= list(logreg.predict(x_dev_encoded))

logreg_results_dev = dev.copy() #get all the data of the dev set

logreg_results_dev = logreg_results_dev.filter(items=FEAT_OF_INT) #keep only relevant info
logreg_results_dev['y_pred'] = y_pred_dev
logreg_results_dev

Unnamed: 0,education,marital.status,sex,income,y_pred
15688,Bachelors,Divorced,Female,0,0
4719,HS-grad,Married,Male,1,1
12320,HS-grad,Married,Male,0,0
9038,HS-grad,Divorced,Male,0,0
22761,HS-grad,Married,Male,0,0
...,...,...,...,...,...
1489,Masters,Divorced,Male,1,0
13261,Masters,Married,Male,1,1
29103,11th,Married,Male,0,0
26107,Bachelors,Married,Male,0,1


In [43]:
logreg_acc_dev = accuracy_score(y_dev, y_pred_dev)
logreg_rec_dev = recall_score(y_dev, y_pred_dev)
logreg_pre_dev = precision_score(y_dev, y_pred_dev)
logreg_f1_dev = f1_score(y_dev, y_pred_dev)
logreg_eval_dev = pd.DataFrame({'accuracy': [logreg_acc_dev], 'recall': [logreg_rec_dev], 'precision': [logreg_pre_dev], 'f1': [logreg_f1_dev]}, index=['dev'])
# logreg_eval_dev.to_csv(os.path.join('Results', f'dev_{DATA_NAME}_eval.csv'),sep='\t')
logreg_eval_dev

Unnamed: 0,accuracy,recall,precision,f1
dev,0.832648,0.547812,0.704656,0.616413


In [44]:
logreg_fair_eval_dev = pd.DataFrame()
logreg_fair_eval_dev['positive rates'] = positive_rates(logreg_results_dev, 'sex')
logreg_fair_eval_dev['true positive rates'] = true_postive_rates(logreg_results_dev, 'sex')
logreg_fair_eval_dev['true negative rates'] = true_negative_rate(logreg_results_dev, 'sex')
logreg_fair_eval_dev['false positive rates'] = false_postive_rates(logreg_results_dev, 'sex')
# logreg_fair_eval_dev.to_csv(os.path.join('Results', f'dev_{DATA_NAME}_fair_eval.csv'),sep='\t')
logreg_fair_eval_dev


Unnamed: 0,positive rates,true positive rates,true negative rates,false positive rates
Female,0.050021,0.298932,0.982694,0.017306
Male,0.257321,0.592357,0.890766,0.109234


# DO NOT RUN FROM HERE ON

In [28]:
DATA = pd.read_csv(os.path.join('Processed_data', 'adult_occp.csv'))
# DATA.drop(columns=['occupation'])
DATA_NAME = 'old_occp' #! don't forget to change this accordingly 
FEAT_OF_INT = ['education', 'marital.status', 'sex', 'income'] #The features we are interested in
DATA.head()
cat_feats = list(DATA.select_dtypes("object").keys())  #names of all categorical features
# cat_feats.remove('income') #keep these values as is 
print(f"{len(cat_feats)} categorical features found: {cat_feats}")
best_f1 = 0 
best_acc = 0

for drop in ['if_binary', 'first']:
    cat_feat_encoder = OneHotEncoder(sparse_output=False, drop=drop).set_output(transform="pandas")
    cat_feat_encoder = cat_feat_encoder.fit(DATA[cat_feats])
    for solver in ['sag', 'saga']:
        for max_iter in range(0, 2001, 50):
            print(f"\nencoding: {drop}, solver: {solver}, max_iter: {max_iter}")
            train, dev = train_test_split(DATA, random_state= 1989)

            x_train = train.drop(columns=['income'])
            y_train = train['income']

            x_dev  = dev.drop(columns=['income'])
            y_dev  = dev['income']

            x_train_encoded = cat_to_one_hot(x_train, cat_feats, cat_feat_encoder)
            x_dev_encoded = cat_to_one_hot(x_dev, cat_feats, cat_feat_encoder)

            logreg = LogisticRegression(solver = solver, max_iter= max_iter).fit(x_train_encoded, y_train)
            
            y_pred_dev= list(logreg.predict(x_dev_encoded))

            logreg_results_dev = dev.copy() #get all the data of the dev set

            logreg_results_dev = logreg_results_dev.filter(items=FEAT_OF_INT) #keep only relevant info
            logreg_results_dev['y_pred'] = y_pred_dev

            logreg_acc_dev = accuracy_score(y_dev, y_pred_dev)
            if logreg_acc_dev>best_acc: best_acc = logreg_acc_dev
            logreg_rec_dev = recall_score(y_dev, y_pred_dev)
            logreg_pre_dev = precision_score(y_dev, y_pred_dev)
            logreg_f1_dev = f1_score(y_dev, y_pred_dev)
            if logreg_f1_dev>best_f1: best_f1 = logreg_f1_dev
            logreg_eval_dev = pd.DataFrame({'accuracy': [logreg_acc_dev], 'recall': [logreg_rec_dev], 'precision': [logreg_pre_dev], 'f1': [logreg_f1_dev]}, index=['dev'])
            # logreg_eval_dev.to_csv(os.path.join('Results', f'dev_{DATA_NAME}_eval.csv'),sep='\t')
            display(logreg_eval_dev)
print(best_f1, best_acc)
# BEST: encoding: if_binary, solver: sag, max_iter: 1000-1150 -> 0.6164133738601824 || 0.832648
#       encoding: if_binary, solver: saga, max_iter: 1950-2000

6 categorical features found: ['workclass', 'education', 'marital.status', 'occupation', 'race', 'sex']

encoding: if_binary, solver: sag, max_iter: 0


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Unnamed: 0,accuracy,recall,precision,f1
dev,0.754542,0.0,0.0,0.0



encoding: if_binary, solver: sag, max_iter: 50




Unnamed: 0,accuracy,recall,precision,f1
dev,0.828405,0.525122,0.700793,0.600371



encoding: if_binary, solver: sag, max_iter: 100




Unnamed: 0,accuracy,recall,precision,f1
dev,0.831853,0.540789,0.705426,0.612232



encoding: if_binary, solver: sag, max_iter: 150




Unnamed: 0,accuracy,recall,precision,f1
dev,0.831985,0.541329,0.705634,0.612657



encoding: if_binary, solver: sag, max_iter: 200




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832781,0.54295,0.707746,0.614491



encoding: if_binary, solver: sag, max_iter: 250




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832648,0.54295,0.707248,0.614303



encoding: if_binary, solver: sag, max_iter: 300




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832913,0.544571,0.707368,0.615385



encoding: if_binary, solver: sag, max_iter: 350




Unnamed: 0,accuracy,recall,precision,f1
dev,0.833179,0.545651,0.707779,0.616229



encoding: if_binary, solver: sag, max_iter: 400




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832913,0.546191,0.706499,0.616088



encoding: if_binary, solver: sag, max_iter: 450




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832516,0.546731,0.704735,0.615759



encoding: if_binary, solver: sag, max_iter: 500




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832516,0.546731,0.704735,0.615759



encoding: if_binary, solver: sag, max_iter: 550




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832648,0.546731,0.705226,0.615946



encoding: if_binary, solver: sag, max_iter: 600




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832648,0.547272,0.704941,0.61618



encoding: if_binary, solver: sag, max_iter: 650




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832648,0.547272,0.704941,0.61618



encoding: if_binary, solver: sag, max_iter: 700




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832648,0.547272,0.704941,0.61618



encoding: if_binary, solver: sag, max_iter: 750




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832781,0.547272,0.705432,0.616368



encoding: if_binary, solver: sag, max_iter: 800




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832648,0.547272,0.704941,0.61618



encoding: if_binary, solver: sag, max_iter: 850




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832648,0.547272,0.704941,0.61618



encoding: if_binary, solver: sag, max_iter: 900




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832648,0.547272,0.704941,0.61618



encoding: if_binary, solver: sag, max_iter: 950




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832516,0.547272,0.704451,0.615993



encoding: if_binary, solver: sag, max_iter: 1000




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832648,0.547812,0.704656,0.616413



encoding: if_binary, solver: sag, max_iter: 1050




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832648,0.547812,0.704656,0.616413



encoding: if_binary, solver: sag, max_iter: 1100




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832648,0.547812,0.704656,0.616413



encoding: if_binary, solver: sag, max_iter: 1150




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832648,0.547812,0.704656,0.616413



encoding: if_binary, solver: sag, max_iter: 1200




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832516,0.547812,0.704167,0.616226



encoding: if_binary, solver: sag, max_iter: 1250




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832516,0.547812,0.704167,0.616226



encoding: if_binary, solver: sag, max_iter: 1300




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832516,0.547812,0.704167,0.616226



encoding: if_binary, solver: sag, max_iter: 1350




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832516,0.547812,0.704167,0.616226



encoding: if_binary, solver: sag, max_iter: 1400




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832516,0.547812,0.704167,0.616226



encoding: if_binary, solver: sag, max_iter: 1450




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832516,0.547812,0.704167,0.616226



encoding: if_binary, solver: sag, max_iter: 1500




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832516,0.547812,0.704167,0.616226



encoding: if_binary, solver: sag, max_iter: 1550




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832516,0.547812,0.704167,0.616226



encoding: if_binary, solver: sag, max_iter: 1600




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832516,0.547812,0.704167,0.616226



encoding: if_binary, solver: sag, max_iter: 1650




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832516,0.547812,0.704167,0.616226



encoding: if_binary, solver: sag, max_iter: 1700




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832516,0.547812,0.704167,0.616226



encoding: if_binary, solver: sag, max_iter: 1750




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832516,0.547812,0.704167,0.616226



encoding: if_binary, solver: sag, max_iter: 1800




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832516,0.547812,0.704167,0.616226



encoding: if_binary, solver: sag, max_iter: 1850




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832516,0.547812,0.704167,0.616226



encoding: if_binary, solver: sag, max_iter: 1900


Unnamed: 0,accuracy,recall,precision,f1
dev,0.832516,0.547812,0.704167,0.616226



encoding: if_binary, solver: sag, max_iter: 1950


Unnamed: 0,accuracy,recall,precision,f1
dev,0.832516,0.547812,0.704167,0.616226



encoding: if_binary, solver: sag, max_iter: 2000


Unnamed: 0,accuracy,recall,precision,f1
dev,0.832516,0.547812,0.704167,0.616226



encoding: if_binary, solver: saga, max_iter: 0


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Unnamed: 0,accuracy,recall,precision,f1
dev,0.754542,0.0,0.0,0.0



encoding: if_binary, solver: saga, max_iter: 50




Unnamed: 0,accuracy,recall,precision,f1
dev,0.82615,0.504052,0.70362,0.587347



encoding: if_binary, solver: saga, max_iter: 100




Unnamed: 0,accuracy,recall,precision,f1
dev,0.828007,0.52242,0.700725,0.598576



encoding: if_binary, solver: saga, max_iter: 150




Unnamed: 0,accuracy,recall,precision,f1
dev,0.829731,0.534306,0.700921,0.606376



encoding: if_binary, solver: saga, max_iter: 200




Unnamed: 0,accuracy,recall,precision,f1
dev,0.83172,0.539708,0.705508,0.61157



encoding: if_binary, solver: saga, max_iter: 250




Unnamed: 0,accuracy,recall,precision,f1
dev,0.83172,0.541329,0.704641,0.612282



encoding: if_binary, solver: saga, max_iter: 300




Unnamed: 0,accuracy,recall,precision,f1
dev,0.831985,0.541329,0.705634,0.612657



encoding: if_binary, solver: saga, max_iter: 350




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832516,0.54295,0.706751,0.614115



encoding: if_binary, solver: saga, max_iter: 400




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832781,0.54295,0.707746,0.614491



encoding: if_binary, solver: saga, max_iter: 450




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832648,0.54349,0.706957,0.614539



encoding: if_binary, solver: saga, max_iter: 500




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832648,0.54295,0.707248,0.614303



encoding: if_binary, solver: saga, max_iter: 550




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832648,0.54403,0.706667,0.614774



encoding: if_binary, solver: saga, max_iter: 600




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832913,0.544571,0.707368,0.615385



encoding: if_binary, solver: saga, max_iter: 650




Unnamed: 0,accuracy,recall,precision,f1
dev,0.833046,0.545111,0.707574,0.615807



encoding: if_binary, solver: saga, max_iter: 700




Unnamed: 0,accuracy,recall,precision,f1
dev,0.833179,0.545651,0.707779,0.616229



encoding: if_binary, solver: saga, max_iter: 750




Unnamed: 0,accuracy,recall,precision,f1
dev,0.833046,0.546191,0.706993,0.616276



encoding: if_binary, solver: saga, max_iter: 800




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832913,0.546191,0.706499,0.616088



encoding: if_binary, solver: saga, max_iter: 850




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832913,0.546731,0.706211,0.616322



encoding: if_binary, solver: saga, max_iter: 900




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832516,0.546731,0.704735,0.615759



encoding: if_binary, solver: saga, max_iter: 950




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832516,0.546731,0.704735,0.615759



encoding: if_binary, solver: saga, max_iter: 1000




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832516,0.546731,0.704735,0.615759



encoding: if_binary, solver: saga, max_iter: 1050




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832516,0.546731,0.704735,0.615759



encoding: if_binary, solver: saga, max_iter: 1100




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832648,0.546731,0.705226,0.615946



encoding: if_binary, solver: saga, max_iter: 1150




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832648,0.546731,0.705226,0.615946



encoding: if_binary, solver: saga, max_iter: 1200




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832648,0.547272,0.704941,0.61618



encoding: if_binary, solver: saga, max_iter: 1250




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832648,0.547272,0.704941,0.61618



encoding: if_binary, solver: saga, max_iter: 1300




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832648,0.547272,0.704941,0.61618



encoding: if_binary, solver: saga, max_iter: 1350




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832648,0.547272,0.704941,0.61618



encoding: if_binary, solver: saga, max_iter: 1400




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832648,0.547272,0.704941,0.61618



encoding: if_binary, solver: saga, max_iter: 1450




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832648,0.547272,0.704941,0.61618



encoding: if_binary, solver: saga, max_iter: 1500




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832781,0.547272,0.705432,0.616368



encoding: if_binary, solver: saga, max_iter: 1550




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832648,0.547272,0.704941,0.61618



encoding: if_binary, solver: saga, max_iter: 1600




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832648,0.547272,0.704941,0.61618



encoding: if_binary, solver: saga, max_iter: 1650




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832648,0.547272,0.704941,0.61618



encoding: if_binary, solver: saga, max_iter: 1700




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832648,0.547272,0.704941,0.61618



encoding: if_binary, solver: saga, max_iter: 1750




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832648,0.547272,0.704941,0.61618



encoding: if_binary, solver: saga, max_iter: 1800




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832648,0.547272,0.704941,0.61618



encoding: if_binary, solver: saga, max_iter: 1850




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832648,0.547272,0.704941,0.61618



encoding: if_binary, solver: saga, max_iter: 1900




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832516,0.547272,0.704451,0.615993



encoding: if_binary, solver: saga, max_iter: 1950




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832648,0.547812,0.704656,0.616413



encoding: if_binary, solver: saga, max_iter: 2000




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832648,0.547812,0.704656,0.616413



encoding: first, solver: sag, max_iter: 0


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Unnamed: 0,accuracy,recall,precision,f1
dev,0.754542,0.0,0.0,0.0



encoding: first, solver: sag, max_iter: 50




Unnamed: 0,accuracy,recall,precision,f1
dev,0.825885,0.517558,0.69521,0.593373



encoding: first, solver: sag, max_iter: 100




Unnamed: 0,accuracy,recall,precision,f1
dev,0.829068,0.532685,0.699291,0.604722



encoding: first, solver: sag, max_iter: 150




Unnamed: 0,accuracy,recall,precision,f1
dev,0.829731,0.537007,0.699507,0.607579



encoding: first, solver: sag, max_iter: 200




Unnamed: 0,accuracy,recall,precision,f1
dev,0.830261,0.539708,0.70007,0.609518



encoding: first, solver: sag, max_iter: 250




Unnamed: 0,accuracy,recall,precision,f1
dev,0.830394,0.540789,0.7,0.61018



encoding: first, solver: sag, max_iter: 300




Unnamed: 0,accuracy,recall,precision,f1
dev,0.831322,0.54295,0.702306,0.612431



encoding: first, solver: sag, max_iter: 350




Unnamed: 0,accuracy,recall,precision,f1
dev,0.831985,0.54403,0.704196,0.613837



encoding: first, solver: sag, max_iter: 400




Unnamed: 0,accuracy,recall,precision,f1
dev,0.831985,0.545651,0.703343,0.614542



encoding: first, solver: sag, max_iter: 450




Unnamed: 0,accuracy,recall,precision,f1
dev,0.831985,0.545651,0.703343,0.614542



encoding: first, solver: sag, max_iter: 500




Unnamed: 0,accuracy,recall,precision,f1
dev,0.83225,0.546191,0.704039,0.615151



encoding: first, solver: sag, max_iter: 550




Unnamed: 0,accuracy,recall,precision,f1
dev,0.83225,0.546191,0.704039,0.615151



encoding: first, solver: sag, max_iter: 600




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832118,0.546191,0.703549,0.614964



encoding: first, solver: sag, max_iter: 650




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832118,0.545651,0.703833,0.614729



encoding: first, solver: sag, max_iter: 700




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832118,0.545111,0.704117,0.614495



encoding: first, solver: sag, max_iter: 750




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832383,0.545111,0.705101,0.614869



encoding: first, solver: sag, max_iter: 800




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832648,0.546191,0.705513,0.615713



encoding: first, solver: sag, max_iter: 850




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832648,0.546191,0.705513,0.615713



encoding: first, solver: sag, max_iter: 900




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832516,0.546191,0.705021,0.615525



encoding: first, solver: sag, max_iter: 950




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832516,0.546731,0.704735,0.615759



encoding: first, solver: sag, max_iter: 1000




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832383,0.546191,0.70453,0.615338



encoding: first, solver: sag, max_iter: 1050




Unnamed: 0,accuracy,recall,precision,f1
dev,0.83225,0.546191,0.704039,0.615151



encoding: first, solver: sag, max_iter: 1100




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832118,0.546191,0.703549,0.614964



encoding: first, solver: sag, max_iter: 1150




Unnamed: 0,accuracy,recall,precision,f1
dev,0.831985,0.546191,0.70306,0.614777



encoding: first, solver: sag, max_iter: 1200




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832118,0.546191,0.703549,0.614964



encoding: first, solver: sag, max_iter: 1250




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832118,0.546191,0.703549,0.614964



encoding: first, solver: sag, max_iter: 1300




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832118,0.545651,0.703833,0.614729



encoding: first, solver: sag, max_iter: 1350




Unnamed: 0,accuracy,recall,precision,f1
dev,0.831853,0.545651,0.702853,0.614355



encoding: first, solver: sag, max_iter: 1400




Unnamed: 0,accuracy,recall,precision,f1
dev,0.831853,0.545651,0.702853,0.614355



encoding: first, solver: sag, max_iter: 1450




Unnamed: 0,accuracy,recall,precision,f1
dev,0.83172,0.545651,0.702364,0.614168



encoding: first, solver: sag, max_iter: 1500




Unnamed: 0,accuracy,recall,precision,f1
dev,0.831853,0.545651,0.702853,0.614355



encoding: first, solver: sag, max_iter: 1550




Unnamed: 0,accuracy,recall,precision,f1
dev,0.831853,0.545651,0.702853,0.614355



encoding: first, solver: sag, max_iter: 1600




Unnamed: 0,accuracy,recall,precision,f1
dev,0.83172,0.545651,0.702364,0.614168



encoding: first, solver: sag, max_iter: 1650


Unnamed: 0,accuracy,recall,precision,f1
dev,0.83172,0.545651,0.702364,0.614168



encoding: first, solver: sag, max_iter: 1700


Unnamed: 0,accuracy,recall,precision,f1
dev,0.83172,0.545651,0.702364,0.614168



encoding: first, solver: sag, max_iter: 1750


Unnamed: 0,accuracy,recall,precision,f1
dev,0.83172,0.545651,0.702364,0.614168



encoding: first, solver: sag, max_iter: 1800


Unnamed: 0,accuracy,recall,precision,f1
dev,0.83172,0.545651,0.702364,0.614168



encoding: first, solver: sag, max_iter: 1850


Unnamed: 0,accuracy,recall,precision,f1
dev,0.83172,0.545651,0.702364,0.614168



encoding: first, solver: sag, max_iter: 1900


Unnamed: 0,accuracy,recall,precision,f1
dev,0.83172,0.545651,0.702364,0.614168



encoding: first, solver: sag, max_iter: 1950


Unnamed: 0,accuracy,recall,precision,f1
dev,0.83172,0.545651,0.702364,0.614168



encoding: first, solver: sag, max_iter: 2000


Unnamed: 0,accuracy,recall,precision,f1
dev,0.83172,0.545651,0.702364,0.614168



encoding: first, solver: saga, max_iter: 0


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Unnamed: 0,accuracy,recall,precision,f1
dev,0.754542,0.0,0.0,0.0



encoding: first, solver: saga, max_iter: 50




Unnamed: 0,accuracy,recall,precision,f1
dev,0.824029,0.496488,0.699391,0.580727



encoding: first, solver: saga, max_iter: 100




Unnamed: 0,accuracy,recall,precision,f1
dev,0.825753,0.516478,0.695273,0.592684



encoding: first, solver: saga, max_iter: 150




Unnamed: 0,accuracy,recall,precision,f1
dev,0.828007,0.527283,0.69814,0.6008



encoding: first, solver: saga, max_iter: 200




Unnamed: 0,accuracy,recall,precision,f1
dev,0.828935,0.532145,0.699077,0.604294



encoding: first, solver: saga, max_iter: 250




Unnamed: 0,accuracy,recall,precision,f1
dev,0.829996,0.536467,0.700776,0.607711



encoding: first, solver: saga, max_iter: 300




Unnamed: 0,accuracy,recall,precision,f1
dev,0.829731,0.537007,0.699507,0.607579



encoding: first, solver: saga, max_iter: 350




Unnamed: 0,accuracy,recall,precision,f1
dev,0.830394,0.539708,0.700561,0.609704



encoding: first, solver: saga, max_iter: 400




Unnamed: 0,accuracy,recall,precision,f1
dev,0.830261,0.539708,0.70007,0.609518



encoding: first, solver: saga, max_iter: 450




Unnamed: 0,accuracy,recall,precision,f1
dev,0.830394,0.540249,0.70028,0.609942



encoding: first, solver: saga, max_iter: 500




Unnamed: 0,accuracy,recall,precision,f1
dev,0.830394,0.540789,0.7,0.61018



encoding: first, solver: saga, max_iter: 550




Unnamed: 0,accuracy,recall,precision,f1
dev,0.830924,0.54241,0.701117,0.611636



encoding: first, solver: saga, max_iter: 600




Unnamed: 0,accuracy,recall,precision,f1
dev,0.831322,0.54241,0.702589,0.612195



encoding: first, solver: saga, max_iter: 650




Unnamed: 0,accuracy,recall,precision,f1
dev,0.83172,0.54403,0.703212,0.613463



encoding: first, solver: saga, max_iter: 700




Unnamed: 0,accuracy,recall,precision,f1
dev,0.831985,0.54403,0.704196,0.613837



encoding: first, solver: saga, max_iter: 750




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832118,0.544571,0.704403,0.61426



encoding: first, solver: saga, max_iter: 800




Unnamed: 0,accuracy,recall,precision,f1
dev,0.831985,0.545651,0.703343,0.614542



encoding: first, solver: saga, max_iter: 850




Unnamed: 0,accuracy,recall,precision,f1
dev,0.831985,0.545651,0.703343,0.614542



encoding: first, solver: saga, max_iter: 900




Unnamed: 0,accuracy,recall,precision,f1
dev,0.831985,0.545651,0.703343,0.614542



encoding: first, solver: saga, max_iter: 950




Unnamed: 0,accuracy,recall,precision,f1
dev,0.831985,0.545651,0.703343,0.614542



encoding: first, solver: saga, max_iter: 1000




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832118,0.546191,0.703549,0.614964



encoding: first, solver: saga, max_iter: 1050




Unnamed: 0,accuracy,recall,precision,f1
dev,0.83225,0.546191,0.704039,0.615151



encoding: first, solver: saga, max_iter: 1100




Unnamed: 0,accuracy,recall,precision,f1
dev,0.83225,0.546191,0.704039,0.615151



encoding: first, solver: saga, max_iter: 1150




Unnamed: 0,accuracy,recall,precision,f1
dev,0.83225,0.546191,0.704039,0.615151



encoding: first, solver: saga, max_iter: 1200




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832118,0.546191,0.703549,0.614964



encoding: first, solver: saga, max_iter: 1250




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832118,0.546191,0.703549,0.614964



encoding: first, solver: saga, max_iter: 1300




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832118,0.545651,0.703833,0.614729



encoding: first, solver: saga, max_iter: 1350




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832118,0.545111,0.704117,0.614495



encoding: first, solver: saga, max_iter: 1400




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832118,0.545111,0.704117,0.614495



encoding: first, solver: saga, max_iter: 1450




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832383,0.545111,0.705101,0.614869



encoding: first, solver: saga, max_iter: 1500




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832383,0.545111,0.705101,0.614869



encoding: first, solver: saga, max_iter: 1550




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832383,0.545111,0.705101,0.614869



encoding: first, solver: saga, max_iter: 1600




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832648,0.546191,0.705513,0.615713



encoding: first, solver: saga, max_iter: 1650




Unnamed: 0,accuracy,recall,precision,f1
dev,0.832648,0.546191,0.705513,0.615713



encoding: first, solver: saga, max_iter: 1700


Unnamed: 0,accuracy,recall,precision,f1
dev,0.832516,0.545651,0.705307,0.615291



encoding: first, solver: saga, max_iter: 1750


Unnamed: 0,accuracy,recall,precision,f1
dev,0.832516,0.545651,0.705307,0.615291



encoding: first, solver: saga, max_iter: 1800


Unnamed: 0,accuracy,recall,precision,f1
dev,0.832516,0.545651,0.705307,0.615291



encoding: first, solver: saga, max_iter: 1850


Unnamed: 0,accuracy,recall,precision,f1
dev,0.832516,0.545651,0.705307,0.615291



encoding: first, solver: saga, max_iter: 1900


Unnamed: 0,accuracy,recall,precision,f1
dev,0.832516,0.545651,0.705307,0.615291



encoding: first, solver: saga, max_iter: 1950


Unnamed: 0,accuracy,recall,precision,f1
dev,0.832516,0.545651,0.705307,0.615291



encoding: first, solver: saga, max_iter: 2000


Unnamed: 0,accuracy,recall,precision,f1
dev,0.832516,0.545651,0.705307,0.615291


0.6164133738601824 0.8331786235247315
