In [1]:
from helpers import *

import numpy as np
import pandas as pd
import math
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn import model_selection
from sklearn.metrics import accuracy_score

from sklearn.linear_model import LogisticRegression
from CustomLogisticRegression import CustomLogisticRegression as CLR
import sklearn.discriminant_analysis as DA
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.naive_bayes import GaussianNB, BernoulliNB, ComplementNB, MultinomialNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC, LinearSVC

# Compute discrimination

In [15]:
def computeDiscrimination(X_test, prediction, sensitiveAttr):
    X_test = X_test.assign(prediction = prediction)
    protectedGroup = X_test[X_test[sensitiveAttr] == 1]
    unprotectedGroup = X_test[X_test[sensitiveAttr] == 0]
    proportionOfProtected = protectedGroup['prediction'].sum() / protectedGroup[sensitiveAttr].count()
    proportionOfUnprotected = unprotectedGroup['prediction'].sum() / protectedGroup[sensitiveAttr].count()
    discrim = abs(proportionOfProtected - proportionOfUnprotected)
    return discrim
    

# Run Helper

In [None]:
def run(X, y, X_test, y_test, SA):
#     kfold = model_selection.KFold(n_splits=5, random_state=7)
    kfold = model_selection.StratifiedKFold(n_splits=5)
    for i, model in enumerate(models):
        cv_result = model_selection.cross_val_score(model, X, y, cv=kfold, scoring='accuracy')

        model.fit(X, y)
        prediction_test = model.predict(X_test)

        acc_score_val = np.mean(cv_result)
        acc_score_test = accuracy_score(y_test, prediction_test)
        discr_score = computeDiscrimination(X_test, prediction_test, SA)
        print ('-'*40)
        print ('val: {0}: {1}'.format(names[i], acc_score_val))
        print ('test: {0}: {1}'.format(names[i], acc_score_test))
        print('{0} discrimination: {1}'.format(names[i], discr_score))
        

# Runner with the data splitting according to the report

In [3]:
def runAdultWithSplitting(X, y, X_test, y_test, SA):
    X_splits = np.array_split(X, 5)
    y_splits = np.array_split(y, 5)
    for x_split, y_split in zip(X_splits, y_splits):
        val_amount = math.floor((1/3) * x_split.shape[0]) # 1/3 of 1 split set
        X_val = x_split[0:val_amount]
        y_val = y_split[0:val_amount]
        X_train = x_split[val_amount:]
        y_train = y_split[val_amount:]
#         splitSize = x_split.shape[0]
#         X_train = x_split[0:splitSize-val_amount]
#         y_train = y_split[0:splitSize-val_amount]
#         X_val = x_split[splitSize-val_amount:]
#         y_val = y_split[splitSize-val_amount:]
        for i, model in enumerate(models):
            model.fit(X_train, y_train)
            prediction_val = model.predict(X_val)
            acc_score_val = np.mean(y_val == prediction_val)
            prediction_test = model.predict(X_test)
            acc_score_test = accuracy_score(y_test, prediction_test)
            discr_score = computeDiscrimination(X_test, prediction_test, SA)
            print ('-'*40)
            print ('val: {0}: {1}'.format(names[i], acc_score_val))
            print ('test: {0}: {1}'.format(names[i], acc_score_test))
            print('{0} discrimination: {1}'.format(names[i], discr_score))
        print ('-'*80)
        print ('-'*80)

In [4]:
def runGermanWithSplitting(X, y, SA):
    X_splits = np.array_split(X, 5)
    y_splits = np.array_split(y, 5)
    for x_split, y_split in zip(X_splits, y_splits):
        train_amount = math.floor(0.5 * x_split.shape[0]) # 50% of 1 split set (round)
        X_train = x_split[0:train_amount]
        y_train = y_split[0:train_amount]
        val_amount = math.floor(0.2 * x_split.shape[0]) # 20% of 1 split set
        X_val = x_split[train_amount:train_amount+val_amount]
        y_val = y_split[train_amount:train_amount+val_amount]
        X_test = x_split[train_amount+val_amount:] # 30% of 1 split set
        y_test = y_split[train_amount+val_amount:]
        for i, model in enumerate(models):
            model.fit(X_train, y_train)
            prediction_val = model.predict(X_val)
            acc_score_val = np.mean(y_val == prediction_val)
            prediction_test = model.predict(X_test)
            acc_score_test = accuracy_score(y_test, prediction_test)
            discr_score = computeDiscrimination(X_test, prediction_test, SA)
            print ('-'*40)
            print ('val: {0}: {1}'.format(names[i], acc_score_val))
            print ('test: {0}: {1}'.format(names[i], acc_score_test))
            print('{0} discrimination: {1}'.format(names[i], discr_score))
        print ('-'*80)
        print ('-'*80)

# Models to Run

In [5]:
models = []
names = ['LR(solver=lbfgs, fit_intercept=False, max_iter=700, penalty=none, C=1e10)',
         'LR(solver=lbfgs, fit_intercept=False, max_iter=1400, penalty=none, C=1e10)',
         'LR(solver=lbfgs, fit_intercept=False, max_iter=2100, penalty=none, C=1e10)',
         'LR(solver=newton-cg, fit_intercept=False, max_iter=100, penalty=none, C=1e10)',
         'LR(solver=newton-cg, fit_intercept=False, max_iter=500, penalty=none, C=1e10)',
         'LR(solver=newton-cg, fit_intercept=False, max_iter=1500, penalty=none, C=1e10)',
         'LR(solver=liblinear, fit_intercept=False, max_iter=1000, C=1e10)',
         'LR(solver=liblinear, fit_intercept=False, max_iter=2000, C=1e10)',
         'LR(solver=liblinear, fit_intercept=False, max_iter=3000, C=1e10)',
         'LR(solver=sag, fit_intercept=False, max_iter=1000, penalty=none, C=1e10)',
         'LR(solver=sag, fit_intercept=False, max_iter=3000, penalty=none, C=1e10)',
         'LR(solver=sag, fit_intercept=False, max_iter=5000, penalty=none, C=1e10)',
         'LR(solver=saga, fit_intercept=False, max_iter=700, penalty=none, C=1e10)',
         'LR(solver=saga, fit_intercept=False, max_iter=1400, penalty=none, C=1e10)',
         'LR(solver=saga, fit_intercept=False, max_iter=2100, penalty=none, C=1e10)',
         'Custom Logistic Regression(lr=0.1, max_itr=100)',
         'Custom Logistic Regression(lr=0.1, max_itr=500)',
         'Custom Logistic Regression(lr=0.1, max_itr=1500)',
         'Custom Logistic Regression(lr=0.01, max_itr=100)',
         'Custom Logistic Regression(lr=0.01, max_itr=500)',
         'Custom Logistic Regression(lr=0.01, max_itr=1500)',
         'Custom Logistic Regression(lr=0.001, max_itr=100)',
         'Custom Logistic Regression(lr=0.001, max_itr=500)',
         'Custom Logistic Regression(lr=0.001, max_itr=1500)',
         'Custom Logistic Regression(lr=0.0001, max_itr=100)',
         'Custom Logistic Regression(lr=0.0001, max_itr=500)',
         'Custom Logistic Regression(lr=0.0001, max_itr=1500)',
#          'Linear Discriminant Analysis',
#          'Quadratic Discriminant Analysis',
#          'Random Forest',
#          'Neural Network',
#          'Gaussian NB',
#          'Bernoulli NB',
#          'Complement NB',
#          'Multinomial NB',
#          'Random Forest',
#          'K Neighbors Classifier',
#          'SVM',
#          'LinearSVC'
]

models.append(LogisticRegression(solver='lbfgs', fit_intercept=False, max_iter=700, penalty='none'))
models.append(LogisticRegression(solver='lbfgs', fit_intercept=False, max_iter=1400, penalty='none'))
models.append(LogisticRegression(solver='lbfgs', fit_intercept=False, max_iter=2100, penalty='none'))
models.append(LogisticRegression(solver='newton-cg', fit_intercept=False, max_iter=100, penalty='none'))
models.append(LogisticRegression(solver='newton-cg', fit_intercept=False, max_iter=500, penalty='none'))
models.append(LogisticRegression(solver='newton-cg', fit_intercept=False, max_iter=1500, penalty='none'))
models.append(LogisticRegression(solver='liblinear', fit_intercept=False, max_iter=1000, C=1e10))
models.append(LogisticRegression(solver='liblinear', fit_intercept=False, max_iter=2000, C=1e10))
models.append(LogisticRegression(solver='liblinear', fit_intercept=False, max_iter=3000, C=1e10))
models.append(LogisticRegression(solver='sag', fit_intercept=False, max_iter=1000, penalty='none'))
models.append(LogisticRegression(solver='sag', fit_intercept=False, max_iter=3000, penalty='none'))
models.append(LogisticRegression(solver='sag', fit_intercept=False, max_iter=5000, penalty='none'))
models.append(LogisticRegression(solver='saga', fit_intercept=False, max_iter=700, penalty='none'))
models.append(LogisticRegression(solver='saga', fit_intercept=False, max_iter=1400, penalty='none'))
models.append(LogisticRegression(solver='saga', fit_intercept=False, max_iter=2100, penalty='none'))
models.append(CLR(0.1, 100))
models.append(CLR(0.1, 500))
models.append(CLR(0.1, 1500))
models.append(CLR(0.01, 100))
models.append(CLR(0.01, 500))
models.append(CLR(0.01, 1500))
models.append(CLR(0.001, 100))
models.append(CLR(0.001, 500))
models.append(CLR(0.001, 1500))
models.append(CLR(0.0001, 100))
models.append(CLR(0.0001, 500))
models.append(CLR(0.0001, 1500))
# models.append(DA.LinearDiscriminantAnalysis())
# models.append(DA.QuadraticDiscriminantAnalysis())
# models.append(RandomForestClassifier(n_estimators=100))
# models.append(MLPClassifier())
# models.append(GaussianNB())
# models.append(BernoulliNB())
# models.append(ComplementNB())
# models.append(MultinomialNB())
# models.append(DecisionTreeClassifier())
# models.append(KNeighborsClassifier(n_neighbors=50))
# models.append(SVC())
# models.append(LinearSVC())

# Adult Dataset

In [6]:
df_adult, pct = load_adult('datasets/adult/adult.data')
X_adult = df_adult.iloc[:, :-1]
y_adult = df_adult.iloc[:, -1]
print('percentage of corrupt rows: {0:.1f}%'.format((1-pct)*100))

df_adult_test, pct = load_adult('datasets/adult/adult.test')
X_adult_test = df_adult_test.iloc[:, :-1]
y_adult_test = df_adult_test.iloc[:, -1]
print('percentage of corrupt rows in testing: {0:.1f}%'.format((1-pct)*100))

percentage of corrupt rows: 7.4%
percentage of corrupt rows in testing: 7.5%


# expanding

In [7]:
X_adult_all = X_adult.append(X_adult_test)
X_adult_all_expand = pd.get_dummies(X_adult_all)
X_expand = X_adult_all_expand[0:X_adult.shape[0]]
X_expand_test = X_adult_all_expand[X_adult.shape[0]:]

# LabelEncoder

In [None]:
# encoders = {"workclass": preprocessing.LabelEncoder(), 
#             "education": preprocessing.LabelEncoder(), 
#             "marital-status": preprocessing.LabelEncoder(), 
#             "occupation": preprocessing.LabelEncoder(), 
#             "relationship": preprocessing.LabelEncoder(), 
#             "race": preprocessing.LabelEncoder(), 
#             "sex": preprocessing.LabelEncoder(), 
#             "native-country": preprocessing.LabelEncoder()}

# X_encoded = encode(X_adult, encoders)
# X_encoded_test = encode(X_adult_test, encoders)

In [8]:
# run(X_expand, y_adult, X_expand_test, y_adult_test, 'sex')
runAdultWithSplitting(X_expand, y_adult, X_expand_test, y_adult_test, 'sex')

284
4913
1097
4913
----------------------------------------
val: LR(solver=lbfgs, fit_intercept=False, max_iter=700, penalty=none, C=1e10): 0.7876678269517653
test: LR(solver=lbfgs, fit_intercept=False, max_iter=700, penalty=none, C=1e10): 0.7944887118193891
LR(solver=lbfgs, fit_intercept=False, max_iter=700, penalty=none, C=1e10) discrimination: 0.1654793405251374
284
4913
1097
4913
----------------------------------------
val: LR(solver=lbfgs, fit_intercept=False, max_iter=1400, penalty=none, C=1e10): 0.7876678269517653
test: LR(solver=lbfgs, fit_intercept=False, max_iter=1400, penalty=none, C=1e10): 0.7944887118193891
LR(solver=lbfgs, fit_intercept=False, max_iter=1400, penalty=none, C=1e10) discrimination: 0.1654793405251374
284
4913
1097
4913
----------------------------------------
val: LR(solver=lbfgs, fit_intercept=False, max_iter=2100, penalty=none, C=1e10): 0.7876678269517653
test: LR(solver=lbfgs, fit_intercept=False, max_iter=2100, penalty=none, C=1e10): 0.7944887118193891




421
4913
2645
4913
----------------------------------------
val: LR(solver=newton-cg, fit_intercept=False, max_iter=100, penalty=none, C=1e10): 0.8398806563898558
test: LR(solver=newton-cg, fit_intercept=False, max_iter=100, penalty=none, C=1e10): 0.8443559096945551
LR(solver=newton-cg, fit_intercept=False, max_iter=100, penalty=none, C=1e10) discrimination: 0.4526765723590474




446
4913
2708
4913
----------------------------------------
val: LR(solver=newton-cg, fit_intercept=False, max_iter=500, penalty=none, C=1e10): 0.8368970661362506
test: LR(solver=newton-cg, fit_intercept=False, max_iter=500, penalty=none, C=1e10): 0.8423638778220451
LR(solver=newton-cg, fit_intercept=False, max_iter=500, penalty=none, C=1e10) discrimination: 0.46041115408100963




446
4913
2708
4913
----------------------------------------
val: LR(solver=newton-cg, fit_intercept=False, max_iter=1500, penalty=none, C=1e10): 0.8368970661362506
test: LR(solver=newton-cg, fit_intercept=False, max_iter=1500, penalty=none, C=1e10): 0.8423638778220451
LR(solver=newton-cg, fit_intercept=False, max_iter=1500, penalty=none, C=1e10) discrimination: 0.46041115408100963
285
4913
1095
4913
----------------------------------------
val: LR(solver=liblinear, fit_intercept=False, max_iter=1000, C=1e10): 0.7866732968672302
test: LR(solver=liblinear, fit_intercept=False, max_iter=1000, C=1e10): 0.7944223107569721
LR(solver=liblinear, fit_intercept=False, max_iter=1000, C=1e10) discrimination: 0.1648687156523509
285
4913
1095
4913
----------------------------------------
val: LR(solver=liblinear, fit_intercept=False, max_iter=2000, C=1e10): 0.7866732968672302
test: LR(solver=liblinear, fit_intercept=False, max_iter=2000, C=1e10): 0.7944223107569721
LR(solver=liblinear, fit_intercept



203
4913
782
4913
----------------------------------------
val: LR(solver=sag, fit_intercept=False, max_iter=1000, penalty=none, C=1e10): 0.7787170561909498
test: LR(solver=sag, fit_intercept=False, max_iter=1000, penalty=none, C=1e10): 0.7873173970783532
LR(solver=sag, fit_intercept=False, max_iter=1000, penalty=none, C=1e10) discrimination: 0.11785060044779157




259
4913
952
4913
----------------------------------------
val: LR(solver=sag, fit_intercept=False, max_iter=3000, penalty=none, C=1e10): 0.7797115862754849
test: LR(solver=sag, fit_intercept=False, max_iter=3000, penalty=none, C=1e10): 0.7894422310756972
LR(solver=sag, fit_intercept=False, max_iter=3000, penalty=none, C=1e10) discrimination: 0.141054345613678
273
4913
1014
4913
----------------------------------------
val: LR(solver=sag, fit_intercept=False, max_iter=5000, penalty=none, C=1e10): 0.7812033814022874
test: LR(solver=sag, fit_intercept=False, max_iter=5000, penalty=none, C=1e10): 0.7905046480743692
LR(solver=sag, fit_intercept=False, max_iter=5000, penalty=none, C=1e10) discrimination: 0.15082434357826174




186
4913
728
4913
----------------------------------------
val: LR(solver=saga, fit_intercept=False, max_iter=700, penalty=none, C=1e10): 0.7812033814022874
test: LR(solver=saga, fit_intercept=False, max_iter=700, penalty=none, C=1e10): 0.7872509960159363
LR(solver=saga, fit_intercept=False, max_iter=700, penalty=none, C=1e10) discrimination: 0.1103195603500916




198
4913
768
4913
----------------------------------------
val: LR(solver=saga, fit_intercept=False, max_iter=1400, penalty=none, C=1e10): 0.7802088513177524
test: LR(solver=saga, fit_intercept=False, max_iter=1400, penalty=none, C=1e10): 0.7872509960159363
LR(solver=saga, fit_intercept=False, max_iter=1400, penalty=none, C=1e10) discrimination: 0.11601872582943212


  return 1.0 / (1 + np.exp(-a))


206
4913
788
4913
----------------------------------------
val: LR(solver=saga, fit_intercept=False, max_iter=2100, penalty=none, C=1e10): 0.7792143212332173
test: LR(solver=saga, fit_intercept=False, max_iter=2100, penalty=none, C=1e10): 0.7872509960159363
LR(solver=saga, fit_intercept=False, max_iter=2100, penalty=none, C=1e10) discrimination: 0.11846122532057805
0.0
4913
10.0
4913
----------------------------------------
val: Custom Logistic Regression(lr=0.1, max_itr=100): 0.7419194430631526
test: Custom Logistic Regression(lr=0.1, max_itr=100): 0.7549800796812749
Custom Logistic Regression(lr=0.1, max_itr=100) discrimination: 0.0020354162426216163
7.0
4913
40.0
4913
----------------------------------------
val: Custom Logistic Regression(lr=0.1, max_itr=500): 0.7439085032322228
test: Custom Logistic Regression(lr=0.1, max_itr=500): 0.7574369189907039
Custom Logistic Regression(lr=0.1, max_itr=500) discrimination: 0.006716873600651334
60.0
4913
250.0
4913
--------------------------



367
4913
2659
4913
----------------------------------------
val: LR(solver=newton-cg, fit_intercept=False, max_iter=100, penalty=none, C=1e10): 0.852312282446544
test: LR(solver=newton-cg, fit_intercept=False, max_iter=100, penalty=none, C=1e10): 0.8436918990703851
LR(solver=newton-cg, fit_intercept=False, max_iter=100, penalty=none, C=1e10) discrimination: 0.46651740280887444




371
4913
2669
4913
----------------------------------------
val: LR(solver=newton-cg, fit_intercept=False, max_iter=500, penalty=none, C=1e10): 0.8513177523620089
test: LR(solver=newton-cg, fit_intercept=False, max_iter=500, penalty=none, C=1e10): 0.8419654714475432
LR(solver=newton-cg, fit_intercept=False, max_iter=500, penalty=none, C=1e10) discrimination: 0.46773865255444735




371
4913
2669
4913
----------------------------------------
val: LR(solver=newton-cg, fit_intercept=False, max_iter=1500, penalty=none, C=1e10): 0.8513177523620089
test: LR(solver=newton-cg, fit_intercept=False, max_iter=1500, penalty=none, C=1e10): 0.8419654714475432
LR(solver=newton-cg, fit_intercept=False, max_iter=1500, penalty=none, C=1e10) discrimination: 0.46773865255444735
296
4913
1115
4913
----------------------------------------
val: LR(solver=liblinear, fit_intercept=False, max_iter=1000, C=1e10): 0.7966185977125808
test: LR(solver=liblinear, fit_intercept=False, max_iter=1000, C=1e10): 0.7939575033200531
LR(solver=liblinear, fit_intercept=False, max_iter=1000, C=1e10) discrimination: 0.16670059027071035
296
4913
1115
4913
----------------------------------------
val: LR(solver=liblinear, fit_intercept=False, max_iter=2000, C=1e10): 0.7966185977125808
test: LR(solver=liblinear, fit_intercept=False, max_iter=2000, C=1e10): 0.7939575033200531
LR(solver=liblinear, fit_intercep



216
4913
830
4913
----------------------------------------
val: LR(solver=sag, fit_intercept=False, max_iter=1000, penalty=none, C=1e10): 0.7916459472899056
test: LR(solver=sag, fit_intercept=False, max_iter=1000, penalty=none, C=1e10): 0.7865869853917663
LR(solver=sag, fit_intercept=False, max_iter=1000, penalty=none, C=1e10) discrimination: 0.12497455729696721




278
4913
1029
4913
----------------------------------------
val: LR(solver=sag, fit_intercept=False, max_iter=3000, penalty=none, C=1e10): 0.7941322725012432
test: LR(solver=sag, fit_intercept=False, max_iter=3000, penalty=none, C=1e10): 0.7898406374501992
LR(solver=sag, fit_intercept=False, max_iter=3000, penalty=none, C=1e10) discrimination: 0.15285975982088337
284
4913
1057
4913
----------------------------------------
val: LR(solver=sag, fit_intercept=False, max_iter=5000, penalty=none, C=1e10): 0.7961213326703133
test: LR(solver=sag, fit_intercept=False, max_iter=5000, penalty=none, C=1e10): 0.7903718459495352
LR(solver=sag, fit_intercept=False, max_iter=5000, penalty=none, C=1e10) discrimination: 0.15733767555465095




197
4913
765
4913
----------------------------------------
val: LR(solver=saga, fit_intercept=False, max_iter=700, penalty=none, C=1e10): 0.7941322725012432
test: LR(solver=saga, fit_intercept=False, max_iter=700, penalty=none, C=1e10): 0.7869853917662682
LR(solver=saga, fit_intercept=False, max_iter=700, penalty=none, C=1e10) discrimination: 0.1156116425809078




208
4913
802
4913
----------------------------------------
val: LR(solver=saga, fit_intercept=False, max_iter=1400, penalty=none, C=1e10): 0.7916459472899056
test: LR(solver=saga, fit_intercept=False, max_iter=1400, penalty=none, C=1e10): 0.7868525896414342
LR(solver=saga, fit_intercept=False, max_iter=1400, penalty=none, C=1e10) discrimination: 0.12090372481172398


  return 1.0 / (1 + np.exp(-a))


219
4913
837
4913
----------------------------------------
val: LR(solver=saga, fit_intercept=False, max_iter=2100, penalty=none, C=1e10): 0.7916459472899056
test: LR(solver=saga, fit_intercept=False, max_iter=2100, penalty=none, C=1e10): 0.7867197875166002
LR(solver=saga, fit_intercept=False, max_iter=2100, penalty=none, C=1e10) discrimination: 0.12578872379401587
4913.0
4913
10147.0
4913
----------------------------------------
val: Custom Logistic Regression(lr=0.1, max_itr=100): 0.2347090999502735
test: Custom Logistic Regression(lr=0.1, max_itr=100): 0.2456839309428951
Custom Logistic Regression(lr=0.1, max_itr=100) discrimination: 1.065336861388154
40.0
4913
183.0
4913
----------------------------------------
val: Custom Logistic Regression(lr=0.1, max_itr=500): 0.7747389358528095
test: Custom Logistic Regression(lr=0.1, max_itr=500): 0.7668658698539177
Custom Logistic Regression(lr=0.1, max_itr=500) discrimination: 0.029106452269489113
14.0
4913
93.0
4913
-----------------------



369
4913
2700
4913
----------------------------------------
val: LR(solver=newton-cg, fit_intercept=False, max_iter=100, penalty=none, C=1e10): 0.8462686567164179
test: LR(solver=newton-cg, fit_intercept=False, max_iter=100, penalty=none, C=1e10): 0.8430942895086322
LR(solver=newton-cg, fit_intercept=False, max_iter=100, penalty=none, C=1e10) discrimination: 0.4744555261550987




365
4913
2725
4913
----------------------------------------
val: LR(solver=newton-cg, fit_intercept=False, max_iter=500, penalty=none, C=1e10): 0.8422885572139304
test: LR(solver=newton-cg, fit_intercept=False, max_iter=500, penalty=none, C=1e10): 0.8420982735723772
LR(solver=newton-cg, fit_intercept=False, max_iter=500, penalty=none, C=1e10) discrimination: 0.4803582332587014




365
4913
2725
4913
----------------------------------------
val: LR(solver=newton-cg, fit_intercept=False, max_iter=1500, penalty=none, C=1e10): 0.8422885572139304
test: LR(solver=newton-cg, fit_intercept=False, max_iter=1500, penalty=none, C=1e10): 0.8420982735723772
LR(solver=newton-cg, fit_intercept=False, max_iter=1500, penalty=none, C=1e10) discrimination: 0.4803582332587014
273
4913
1060
4913
----------------------------------------
val: LR(solver=liblinear, fit_intercept=False, max_iter=1000, C=1e10): 0.7970149253731343
test: LR(solver=liblinear, fit_intercept=False, max_iter=1000, C=1e10): 0.7926294820717131
LR(solver=liblinear, fit_intercept=False, max_iter=1000, C=1e10) discrimination: 0.16018725829432118
273
4913
1060
4913
----------------------------------------
val: LR(solver=liblinear, fit_intercept=False, max_iter=2000, C=1e10): 0.7970149253731343
test: LR(solver=liblinear, fit_intercept=False, max_iter=2000, C=1e10): 0.7926294820717131
LR(solver=liblinear, fit_intercept



209
4913
806
4913
----------------------------------------
val: LR(solver=sag, fit_intercept=False, max_iter=1000, penalty=none, C=1e10): 0.7870646766169154
test: LR(solver=sag, fit_intercept=False, max_iter=1000, penalty=none, C=1e10): 0.7861221779548473
LR(solver=sag, fit_intercept=False, max_iter=1000, penalty=none, C=1e10) discrimination: 0.1215143496845105




235
4913
892
4913
----------------------------------------
val: LR(solver=sag, fit_intercept=False, max_iter=3000, penalty=none, C=1e10): 0.7875621890547264
test: LR(solver=sag, fit_intercept=False, max_iter=3000, penalty=none, C=1e10): 0.7865205843293492
LR(solver=sag, fit_intercept=False, max_iter=3000, penalty=none, C=1e10) discrimination: 0.1337268471402402
261
4913
958
4913
----------------------------------------
val: LR(solver=sag, fit_intercept=False, max_iter=5000, penalty=none, C=1e10): 0.7895522388059701
test: LR(solver=sag, fit_intercept=False, max_iter=5000, penalty=none, C=1e10): 0.7871845949535192
LR(solver=sag, fit_intercept=False, max_iter=5000, penalty=none, C=1e10) discrimination: 0.14186851211072665




184
4913
721
4913
----------------------------------------
val: LR(solver=saga, fit_intercept=False, max_iter=700, penalty=none, C=1e10): 0.790547263681592
test: LR(solver=saga, fit_intercept=False, max_iter=700, penalty=none, C=1e10): 0.7874501992031873
LR(solver=saga, fit_intercept=False, max_iter=700, penalty=none, C=1e10) discrimination: 0.10930185222878078




202
4913
782
4913
----------------------------------------
val: LR(solver=saga, fit_intercept=False, max_iter=1400, penalty=none, C=1e10): 0.7880597014925373
test: LR(solver=saga, fit_intercept=False, max_iter=1400, penalty=none, C=1e10): 0.7864541832669323
LR(solver=saga, fit_intercept=False, max_iter=1400, penalty=none, C=1e10) discrimination: 0.11805414207205372


  return 1.0 / (1 + np.exp(-a))


210
4913
807
4913
----------------------------------------
val: LR(solver=saga, fit_intercept=False, max_iter=2100, penalty=none, C=1e10): 0.7870646766169154
test: LR(solver=saga, fit_intercept=False, max_iter=2100, penalty=none, C=1e10): 0.7861221779548473
LR(solver=saga, fit_intercept=False, max_iter=2100, penalty=none, C=1e10) discrimination: 0.12151434968451047
4913.0
4913
10147.0
4913
----------------------------------------
val: Custom Logistic Regression(lr=0.1, max_itr=100): 0.24577114427860697
test: Custom Logistic Regression(lr=0.1, max_itr=100): 0.2456839309428951
Custom Logistic Regression(lr=0.1, max_itr=100) discrimination: 1.065336861388154
19.0
4913
108.0
4913
----------------------------------------
val: Custom Logistic Regression(lr=0.1, max_itr=500): 0.7601990049751244
test: Custom Logistic Regression(lr=0.1, max_itr=500): 0.7627490039840638
Custom Logistic Regression(lr=0.1, max_itr=500) discrimination: 0.018115204559332385
32.0
4913
148.0
4913
---------------------



464
4913
2756
4913
----------------------------------------
val: LR(solver=newton-cg, fit_intercept=False, max_iter=100, penalty=none, C=1e10): 0.8482587064676617
test: LR(solver=newton-cg, fit_intercept=False, max_iter=100, penalty=none, C=1e10): 0.8428950863213811
LR(solver=newton-cg, fit_intercept=False, max_iter=100, penalty=none, C=1e10) discrimination: 0.4665174028088744




465
4913
2756
4913
----------------------------------------
val: LR(solver=newton-cg, fit_intercept=False, max_iter=500, penalty=none, C=1e10): 0.8432835820895522
test: LR(solver=newton-cg, fit_intercept=False, max_iter=500, penalty=none, C=1e10): 0.8412350597609561
LR(solver=newton-cg, fit_intercept=False, max_iter=500, penalty=none, C=1e10) discrimination: 0.4663138611846122




465
4913
2756
4913
----------------------------------------
val: LR(solver=newton-cg, fit_intercept=False, max_iter=1500, penalty=none, C=1e10): 0.8432835820895522
test: LR(solver=newton-cg, fit_intercept=False, max_iter=1500, penalty=none, C=1e10): 0.8412350597609561
LR(solver=newton-cg, fit_intercept=False, max_iter=1500, penalty=none, C=1e10) discrimination: 0.4663138611846122
303
4913
1134
4913
----------------------------------------
val: LR(solver=liblinear, fit_intercept=False, max_iter=1000, C=1e10): 0.7865671641791044
test: LR(solver=liblinear, fit_intercept=False, max_iter=1000, C=1e10): 0.7930278884462152
LR(solver=liblinear, fit_intercept=False, max_iter=1000, C=1e10) discrimination: 0.1691430897618563
303
4913
1134
4913
----------------------------------------
val: LR(solver=liblinear, fit_intercept=False, max_iter=2000, C=1e10): 0.7865671641791044
test: LR(solver=liblinear, fit_intercept=False, max_iter=2000, C=1e10): 0.7930278884462152
LR(solver=liblinear, fit_intercept=



215
4913
825
4913
----------------------------------------
val: LR(solver=sag, fit_intercept=False, max_iter=1000, penalty=none, C=1e10): 0.781592039800995
test: LR(solver=sag, fit_intercept=False, max_iter=1000, penalty=none, C=1e10): 0.7861885790172642
LR(solver=sag, fit_intercept=False, max_iter=1000, penalty=none, C=1e10) discrimination: 0.12416039079991859




270
4913
983
4913
----------------------------------------
val: LR(solver=sag, fit_intercept=False, max_iter=3000, penalty=none, C=1e10): 0.7820895522388059
test: LR(solver=sag, fit_intercept=False, max_iter=3000, penalty=none, C=1e10): 0.7881142098273572
LR(solver=sag, fit_intercept=False, max_iter=3000, penalty=none, C=1e10) discrimination: 0.14512517809892123
284
4913
1039
4913
----------------------------------------
val: LR(solver=sag, fit_intercept=False, max_iter=5000, penalty=none, C=1e10): 0.7835820895522388
test: LR(solver=sag, fit_intercept=False, max_iter=5000, penalty=none, C=1e10): 0.7886454183266932
LR(solver=sag, fit_intercept=False, max_iter=5000, penalty=none, C=1e10) discrimination: 0.15367392631793203




201
4913
769
4913
----------------------------------------
val: LR(solver=saga, fit_intercept=False, max_iter=700, penalty=none, C=1e10): 0.7845771144278607
test: LR(solver=saga, fit_intercept=False, max_iter=700, penalty=none, C=1e10): 0.7864541832669323
LR(solver=saga, fit_intercept=False, max_iter=700, penalty=none, C=1e10) discrimination: 0.11561164258090781




211
4913
814
4913
----------------------------------------
val: LR(solver=saga, fit_intercept=False, max_iter=1400, penalty=none, C=1e10): 0.7840796019900498
test: LR(solver=saga, fit_intercept=False, max_iter=1400, penalty=none, C=1e10): 0.7861221779548473
LR(solver=saga, fit_intercept=False, max_iter=1400, penalty=none, C=1e10) discrimination: 0.12273559943008344


  return 1.0 / (1 + np.exp(-a))


216
4913
827
4913
----------------------------------------
val: LR(solver=saga, fit_intercept=False, max_iter=2100, penalty=none, C=1e10): 0.7820895522388059
test: LR(solver=saga, fit_intercept=False, max_iter=2100, penalty=none, C=1e10): 0.7862549800796813
LR(solver=saga, fit_intercept=False, max_iter=2100, penalty=none, C=1e10) discrimination: 0.12436393242418073
2.0
4913
18.0
4913
----------------------------------------
val: Custom Logistic Regression(lr=0.1, max_itr=100): 0.7512437810945274
test: Custom Logistic Regression(lr=0.1, max_itr=100): 0.7556440903054449
Custom Logistic Regression(lr=0.1, max_itr=100) discrimination: 0.003256665988194586
26.0
4913
134.0
4913
----------------------------------------
val: Custom Logistic Regression(lr=0.1, max_itr=500): 0.7606965174129353
test: Custom Logistic Regression(lr=0.1, max_itr=500): 0.7642762284196547
Custom Logistic Regression(lr=0.1, max_itr=500) discrimination: 0.021982495420313452
60.0
4913
253.0
4913
-------------------------



348
4913
2698
4913
----------------------------------------
val: LR(solver=newton-cg, fit_intercept=False, max_iter=100, penalty=none, C=1e10): 0.8482587064676617
test: LR(solver=newton-cg, fit_intercept=False, max_iter=100, penalty=none, C=1e10): 0.846347941567065
LR(solver=newton-cg, fit_intercept=False, max_iter=100, penalty=none, C=1e10) discrimination: 0.4783228170160798




352
4913
2714
4913
----------------------------------------
val: LR(solver=newton-cg, fit_intercept=False, max_iter=500, penalty=none, C=1e10): 0.8472636815920398
test: LR(solver=newton-cg, fit_intercept=False, max_iter=500, penalty=none, C=1e10): 0.8443559096945551
LR(solver=newton-cg, fit_intercept=False, max_iter=500, penalty=none, C=1e10) discrimination: 0.4807653165072257




352
4913
2714
4913
----------------------------------------
val: LR(solver=newton-cg, fit_intercept=False, max_iter=1500, penalty=none, C=1e10): 0.8472636815920398
test: LR(solver=newton-cg, fit_intercept=False, max_iter=1500, penalty=none, C=1e10): 0.8443559096945551
LR(solver=newton-cg, fit_intercept=False, max_iter=1500, penalty=none, C=1e10) discrimination: 0.4807653165072257
326
4913
1230
4913
----------------------------------------
val: LR(solver=liblinear, fit_intercept=False, max_iter=1000, C=1e10): 0.7910447761194029
test: LR(solver=liblinear, fit_intercept=False, max_iter=1000, C=1e10): 0.7911022576361222
LR(solver=liblinear, fit_intercept=False, max_iter=1000, C=1e10) discrimination: 0.1840016283329941
326
4913
1230
4913
----------------------------------------
val: LR(solver=liblinear, fit_intercept=False, max_iter=2000, C=1e10): 0.7910447761194029
test: LR(solver=liblinear, fit_intercept=False, max_iter=2000, C=1e10): 0.7911022576361222
LR(solver=liblinear, fit_intercept=



235
4913
884
4913
----------------------------------------
val: LR(solver=sag, fit_intercept=False, max_iter=1000, penalty=none, C=1e10): 0.7860696517412935
test: LR(solver=sag, fit_intercept=False, max_iter=1000, penalty=none, C=1e10): 0.7853253652058433
LR(solver=sag, fit_intercept=False, max_iter=1000, penalty=none, C=1e10) discrimination: 0.1320985141461429




303
4913
1102
4913
----------------------------------------
val: LR(solver=sag, fit_intercept=False, max_iter=3000, penalty=none, C=1e10): 0.7865671641791044
test: LR(solver=sag, fit_intercept=False, max_iter=3000, penalty=none, C=1e10): 0.7889110225763613
LR(solver=sag, fit_intercept=False, max_iter=3000, penalty=none, C=1e10) discrimination: 0.16262975778546712
311
4913
1148
4913
----------------------------------------
val: LR(solver=sag, fit_intercept=False, max_iter=5000, penalty=none, C=1e10): 0.7860696517412935
test: LR(solver=sag, fit_intercept=False, max_iter=5000, penalty=none, C=1e10): 0.7899734395750332
LR(solver=sag, fit_intercept=False, max_iter=5000, penalty=none, C=1e10) discrimination: 0.17036433950742927




210
4913
812
4913
----------------------------------------
val: LR(solver=saga, fit_intercept=False, max_iter=700, penalty=none, C=1e10): 0.7880597014925373
test: LR(solver=saga, fit_intercept=False, max_iter=700, penalty=none, C=1e10): 0.7859229747675963
LR(solver=saga, fit_intercept=False, max_iter=700, penalty=none, C=1e10) discrimination: 0.1225320578058213




225
4913
852
4913
----------------------------------------
val: LR(solver=saga, fit_intercept=False, max_iter=1400, penalty=none, C=1e10): 0.7875621890547264
test: LR(solver=saga, fit_intercept=False, max_iter=1400, penalty=none, C=1e10): 0.7854581673306773
LR(solver=saga, fit_intercept=False, max_iter=1400, penalty=none, C=1e10) discrimination: 0.12762059841237533


  return 1.0 / (1 + np.exp(-a))


238
4913
891
4913
----------------------------------------
val: LR(solver=saga, fit_intercept=False, max_iter=2100, penalty=none, C=1e10): 0.7855721393034826
test: LR(solver=saga, fit_intercept=False, max_iter=2100, penalty=none, C=1e10): 0.7854581673306773
LR(solver=saga, fit_intercept=False, max_iter=2100, penalty=none, C=1e10) discrimination: 0.13291268064319153
2.0
4913
22.0
4913
----------------------------------------
val: Custom Logistic Regression(lr=0.1, max_itr=100): 0.755223880597015
test: Custom Logistic Regression(lr=0.1, max_itr=100): 0.7559096945551129
Custom Logistic Regression(lr=0.1, max_itr=100) discrimination: 0.004070832485243232
12.0
4913
79.0
4913
----------------------------------------
val: Custom Logistic Regression(lr=0.1, max_itr=500): 0.7597014925373134
test: Custom Logistic Regression(lr=0.1, max_itr=500): 0.7603585657370517
Custom Logistic Regression(lr=0.1, max_itr=500) discrimination: 0.013637288825564827
64.0
4913
275.0
4913
---------------------------

# German Dataset

In [9]:
# load german dataset
df_german = load_german('datasets/german/german.data')
X_german = df_german.iloc[:, :-1]
y_german = df_german.iloc[:, -1]

X_german.loc[X_german['Age'] <= 25, 'Age'] = 1
X_german.loc[X_german['Age'] > 25, 'Age'] = 0

# One hot encoder
X_german_encoded = pd.get_dummies(X_german)

In [None]:
# X_train, X_test, y_train, y_test = train_test_split(X_german_encoded, y_german, test_size=0.3, shuffle=False)
# X_train, X_test, y_train, y_test = train_test_split(X_german, y_german, test_size=0.3, random_state=42)


In [10]:
# run(X_train, y_train, X_test, y_test, 'Age')
runGermanWithSplitting(X_german_encoded, y_german, 'Age');

2
8
10
8
----------------------------------------
val: LR(solver=lbfgs, fit_intercept=False, max_iter=700, penalty=none, C=1e10): 0.675
test: LR(solver=lbfgs, fit_intercept=False, max_iter=700, penalty=none, C=1e10): 0.65
LR(solver=lbfgs, fit_intercept=False, max_iter=700, penalty=none, C=1e10) discrimination: 1.0
2
8
10
8
----------------------------------------
val: LR(solver=lbfgs, fit_intercept=False, max_iter=1400, penalty=none, C=1e10): 0.675
test: LR(solver=lbfgs, fit_intercept=False, max_iter=1400, penalty=none, C=1e10): 0.65
LR(solver=lbfgs, fit_intercept=False, max_iter=1400, penalty=none, C=1e10) discrimination: 1.0
2
8
10
8
----------------------------------------
val: LR(solver=lbfgs, fit_intercept=False, max_iter=2100, penalty=none, C=1e10): 0.675
test: LR(solver=lbfgs, fit_intercept=False, max_iter=2100, penalty=none, C=1e10): 0.65
LR(solver=lbfgs, fit_intercept=False, max_iter=2100, penalty=none, C=1e10) discrimination: 1.0
2
8
12
8
-------------------------------------



0
8
0
8
----------------------------------------
val: LR(solver=sag, fit_intercept=False, max_iter=3000, penalty=none, C=1e10): 0.725
test: LR(solver=sag, fit_intercept=False, max_iter=3000, penalty=none, C=1e10): 0.65
LR(solver=sag, fit_intercept=False, max_iter=3000, penalty=none, C=1e10) discrimination: 0.0




0
8
0
8
----------------------------------------
val: LR(solver=sag, fit_intercept=False, max_iter=5000, penalty=none, C=1e10): 0.725
test: LR(solver=sag, fit_intercept=False, max_iter=5000, penalty=none, C=1e10): 0.65
LR(solver=sag, fit_intercept=False, max_iter=5000, penalty=none, C=1e10) discrimination: 0.0
0
8
0
8
----------------------------------------
val: LR(solver=saga, fit_intercept=False, max_iter=700, penalty=none, C=1e10): 0.725
test: LR(solver=saga, fit_intercept=False, max_iter=700, penalty=none, C=1e10): 0.65
LR(solver=saga, fit_intercept=False, max_iter=700, penalty=none, C=1e10) discrimination: 0.0
0
8
0
8
----------------------------------------
val: LR(solver=saga, fit_intercept=False, max_iter=1400, penalty=none, C=1e10): 0.725
test: LR(solver=saga, fit_intercept=False, max_iter=1400, penalty=none, C=1e10): 0.65
LR(solver=saga, fit_intercept=False, max_iter=1400, penalty=none, C=1e10) discrimination: 0.0


  return 1.0 / (1 + np.exp(-a))


0
8
0
8
----------------------------------------
val: LR(solver=saga, fit_intercept=False, max_iter=2100, penalty=none, C=1e10): 0.725
test: LR(solver=saga, fit_intercept=False, max_iter=2100, penalty=none, C=1e10): 0.65
LR(solver=saga, fit_intercept=False, max_iter=2100, penalty=none, C=1e10) discrimination: 0.0
8.0
8
52.0
8
----------------------------------------
val: Custom Logistic Regression(lr=0.1, max_itr=100): 0.275
test: Custom Logistic Regression(lr=0.1, max_itr=100): 0.35
Custom Logistic Regression(lr=0.1, max_itr=100) discrimination: 5.5
0.0
8
0.0
8
----------------------------------------
val: Custom Logistic Regression(lr=0.1, max_itr=500): 0.725
test: Custom Logistic Regression(lr=0.1, max_itr=500): 0.65
Custom Logistic Regression(lr=0.1, max_itr=500) discrimination: 0.0
0.0
8
0.0
8
----------------------------------------
val: Custom Logistic Regression(lr=0.1, max_itr=1500): 0.725
test: Custom Logistic Regression(lr=0.1, max_itr=1500): 0.65
Custom Logistic Regression(



4
15
13
15
----------------------------------------
val: LR(solver=lbfgs, fit_intercept=False, max_iter=1400, penalty=none, C=1e10): 0.6
test: LR(solver=lbfgs, fit_intercept=False, max_iter=1400, penalty=none, C=1e10): 0.7
LR(solver=lbfgs, fit_intercept=False, max_iter=1400, penalty=none, C=1e10) discrimination: 0.6000000000000001
4
15
13
15
----------------------------------------
val: LR(solver=lbfgs, fit_intercept=False, max_iter=2100, penalty=none, C=1e10): 0.6
test: LR(solver=lbfgs, fit_intercept=False, max_iter=2100, penalty=none, C=1e10): 0.7
LR(solver=lbfgs, fit_intercept=False, max_iter=2100, penalty=none, C=1e10) discrimination: 0.6000000000000001
7
15
17
15
----------------------------------------
val: LR(solver=newton-cg, fit_intercept=False, max_iter=100, penalty=none, C=1e10): 0.6
test: LR(solver=newton-cg, fit_intercept=False, max_iter=100, penalty=none, C=1e10): 0.65
LR(solver=newton-cg, fit_intercept=False, max_iter=100, penalty=none, C=1e10) discrimination: 0.66666666



0
15
0
15
----------------------------------------
val: LR(solver=sag, fit_intercept=False, max_iter=1000, penalty=none, C=1e10): 0.625
test: LR(solver=sag, fit_intercept=False, max_iter=1000, penalty=none, C=1e10): 0.7833333333333333
LR(solver=sag, fit_intercept=False, max_iter=1000, penalty=none, C=1e10) discrimination: 0.0
0
15
0
15
----------------------------------------
val: LR(solver=sag, fit_intercept=False, max_iter=3000, penalty=none, C=1e10): 0.625
test: LR(solver=sag, fit_intercept=False, max_iter=3000, penalty=none, C=1e10): 0.7833333333333333
LR(solver=sag, fit_intercept=False, max_iter=3000, penalty=none, C=1e10) discrimination: 0.0




0
15
0
15
----------------------------------------
val: LR(solver=sag, fit_intercept=False, max_iter=5000, penalty=none, C=1e10): 0.625
test: LR(solver=sag, fit_intercept=False, max_iter=5000, penalty=none, C=1e10): 0.7833333333333333
LR(solver=sag, fit_intercept=False, max_iter=5000, penalty=none, C=1e10) discrimination: 0.0
0
15
0
15
----------------------------------------
val: LR(solver=saga, fit_intercept=False, max_iter=700, penalty=none, C=1e10): 0.625
test: LR(solver=saga, fit_intercept=False, max_iter=700, penalty=none, C=1e10): 0.7833333333333333
LR(solver=saga, fit_intercept=False, max_iter=700, penalty=none, C=1e10) discrimination: 0.0
0
15
0
15
----------------------------------------
val: LR(solver=saga, fit_intercept=False, max_iter=1400, penalty=none, C=1e10): 0.625
test: LR(solver=saga, fit_intercept=False, max_iter=1400, penalty=none, C=1e10): 0.7833333333333333
LR(solver=saga, fit_intercept=False, max_iter=1400, penalty=none, C=1e10) discrimination: 0.0


  return 1.0 / (1 + np.exp(-a))


0
15
0
15
----------------------------------------
val: LR(solver=saga, fit_intercept=False, max_iter=2100, penalty=none, C=1e10): 0.625
test: LR(solver=saga, fit_intercept=False, max_iter=2100, penalty=none, C=1e10): 0.7833333333333333
LR(solver=saga, fit_intercept=False, max_iter=2100, penalty=none, C=1e10) discrimination: 0.0
0.0
15
0.0
15
----------------------------------------
val: Custom Logistic Regression(lr=0.1, max_itr=100): 0.625
test: Custom Logistic Regression(lr=0.1, max_itr=100): 0.7833333333333333
Custom Logistic Regression(lr=0.1, max_itr=100) discrimination: 0.0
0.0
15
0.0
15
----------------------------------------
val: Custom Logistic Regression(lr=0.1, max_itr=500): 0.625
test: Custom Logistic Regression(lr=0.1, max_itr=500): 0.7833333333333333
Custom Logistic Regression(lr=0.1, max_itr=500) discrimination: 0.0
15.0
15
45.0
15
----------------------------------------
val: Custom Logistic Regression(lr=0.1, max_itr=1500): 0.375
test: Custom Logistic Regression(lr=0



0
13
0
13
----------------------------------------
val: LR(solver=sag, fit_intercept=False, max_iter=3000, penalty=none, C=1e10): 0.675
test: LR(solver=sag, fit_intercept=False, max_iter=3000, penalty=none, C=1e10): 0.5333333333333333
LR(solver=sag, fit_intercept=False, max_iter=3000, penalty=none, C=1e10) discrimination: 0.0




0
13
0
13
----------------------------------------
val: LR(solver=sag, fit_intercept=False, max_iter=5000, penalty=none, C=1e10): 0.675
test: LR(solver=sag, fit_intercept=False, max_iter=5000, penalty=none, C=1e10): 0.5333333333333333
LR(solver=sag, fit_intercept=False, max_iter=5000, penalty=none, C=1e10) discrimination: 0.0
0
13
0
13
----------------------------------------
val: LR(solver=saga, fit_intercept=False, max_iter=700, penalty=none, C=1e10): 0.675
test: LR(solver=saga, fit_intercept=False, max_iter=700, penalty=none, C=1e10): 0.5333333333333333
LR(solver=saga, fit_intercept=False, max_iter=700, penalty=none, C=1e10) discrimination: 0.0
0
13
0
13
----------------------------------------
val: LR(solver=saga, fit_intercept=False, max_iter=1400, penalty=none, C=1e10): 0.675
test: LR(solver=saga, fit_intercept=False, max_iter=1400, penalty=none, C=1e10): 0.5333333333333333
LR(solver=saga, fit_intercept=False, max_iter=1400, penalty=none, C=1e10) discrimination: 0.0


  return 1.0 / (1 + np.exp(-a))


0
13
0
13
----------------------------------------
val: LR(solver=saga, fit_intercept=False, max_iter=2100, penalty=none, C=1e10): 0.675
test: LR(solver=saga, fit_intercept=False, max_iter=2100, penalty=none, C=1e10): 0.5333333333333333
LR(solver=saga, fit_intercept=False, max_iter=2100, penalty=none, C=1e10) discrimination: 0.0
13.0
13
47.0
13
----------------------------------------
val: Custom Logistic Regression(lr=0.1, max_itr=100): 0.325
test: Custom Logistic Regression(lr=0.1, max_itr=100): 0.4666666666666667
Custom Logistic Regression(lr=0.1, max_itr=100) discrimination: 2.6153846153846154
13.0
13
47.0
13
----------------------------------------
val: Custom Logistic Regression(lr=0.1, max_itr=500): 0.325
test: Custom Logistic Regression(lr=0.1, max_itr=500): 0.4666666666666667
Custom Logistic Regression(lr=0.1, max_itr=500) discrimination: 2.6153846153846154
0.0
13
0.0
13
----------------------------------------
val: Custom Logistic Regression(lr=0.1, max_itr=1500): 0.675
test:



9
15
8
15
----------------------------------------
val: LR(solver=lbfgs, fit_intercept=False, max_iter=1400, penalty=none, C=1e10): 0.75
test: LR(solver=lbfgs, fit_intercept=False, max_iter=1400, penalty=none, C=1e10): 0.6666666666666666
LR(solver=lbfgs, fit_intercept=False, max_iter=1400, penalty=none, C=1e10) discrimination: 0.06666666666666665




9
15
9
15
----------------------------------------
val: LR(solver=lbfgs, fit_intercept=False, max_iter=2100, penalty=none, C=1e10): 0.725
test: LR(solver=lbfgs, fit_intercept=False, max_iter=2100, penalty=none, C=1e10): 0.65
LR(solver=lbfgs, fit_intercept=False, max_iter=2100, penalty=none, C=1e10) discrimination: 0.0
9
15
5
15
----------------------------------------
val: LR(solver=newton-cg, fit_intercept=False, max_iter=100, penalty=none, C=1e10): 0.75
test: LR(solver=newton-cg, fit_intercept=False, max_iter=100, penalty=none, C=1e10): 0.6833333333333333
LR(solver=newton-cg, fit_intercept=False, max_iter=100, penalty=none, C=1e10) discrimination: 0.26666666666666666
9
15
5
15
----------------------------------------
val: LR(solver=newton-cg, fit_intercept=False, max_iter=500, penalty=none, C=1e10): 0.75
test: LR(solver=newton-cg, fit_intercept=False, max_iter=500, penalty=none, C=1e10): 0.6833333333333333
LR(solver=newton-cg, fit_intercept=False, max_iter=500, penalty=none, C=1e10) 



0
15
0
15
----------------------------------------
val: LR(solver=sag, fit_intercept=False, max_iter=3000, penalty=none, C=1e10): 0.625
test: LR(solver=sag, fit_intercept=False, max_iter=3000, penalty=none, C=1e10): 0.7166666666666667
LR(solver=sag, fit_intercept=False, max_iter=3000, penalty=none, C=1e10) discrimination: 0.0




0
15
0
15
----------------------------------------
val: LR(solver=sag, fit_intercept=False, max_iter=5000, penalty=none, C=1e10): 0.625
test: LR(solver=sag, fit_intercept=False, max_iter=5000, penalty=none, C=1e10): 0.7166666666666667
LR(solver=sag, fit_intercept=False, max_iter=5000, penalty=none, C=1e10) discrimination: 0.0
0
15
0
15
----------------------------------------
val: LR(solver=saga, fit_intercept=False, max_iter=700, penalty=none, C=1e10): 0.625
test: LR(solver=saga, fit_intercept=False, max_iter=700, penalty=none, C=1e10): 0.7166666666666667
LR(solver=saga, fit_intercept=False, max_iter=700, penalty=none, C=1e10) discrimination: 0.0
0
15
0
15
----------------------------------------
val: LR(solver=saga, fit_intercept=False, max_iter=1400, penalty=none, C=1e10): 0.625
test: LR(solver=saga, fit_intercept=False, max_iter=1400, penalty=none, C=1e10): 0.7166666666666667
LR(solver=saga, fit_intercept=False, max_iter=1400, penalty=none, C=1e10) discrimination: 0.0


  return 1.0 / (1 + np.exp(-a))


0
15
0
15
----------------------------------------
val: LR(solver=saga, fit_intercept=False, max_iter=2100, penalty=none, C=1e10): 0.625
test: LR(solver=saga, fit_intercept=False, max_iter=2100, penalty=none, C=1e10): 0.7166666666666667
LR(solver=saga, fit_intercept=False, max_iter=2100, penalty=none, C=1e10) discrimination: 0.0
0.0
15
0.0
15
----------------------------------------
val: Custom Logistic Regression(lr=0.1, max_itr=100): 0.625
test: Custom Logistic Regression(lr=0.1, max_itr=100): 0.7166666666666667
Custom Logistic Regression(lr=0.1, max_itr=100) discrimination: 0.0
0.0
15
0.0
15
----------------------------------------
val: Custom Logistic Regression(lr=0.1, max_itr=500): 0.625
test: Custom Logistic Regression(lr=0.1, max_itr=500): 0.7166666666666667
Custom Logistic Regression(lr=0.1, max_itr=500) discrimination: 0.0
0.0
15
0.0
15
----------------------------------------
val: Custom Logistic Regression(lr=0.1, max_itr=1500): 0.625
test: Custom Logistic Regression(lr=0.1



0
8
0
8
----------------------------------------
val: LR(solver=sag, fit_intercept=False, max_iter=3000, penalty=none, C=1e10): 0.625
test: LR(solver=sag, fit_intercept=False, max_iter=3000, penalty=none, C=1e10): 0.7166666666666667
LR(solver=sag, fit_intercept=False, max_iter=3000, penalty=none, C=1e10) discrimination: 0.0




0
8
0
8
----------------------------------------
val: LR(solver=sag, fit_intercept=False, max_iter=5000, penalty=none, C=1e10): 0.625
test: LR(solver=sag, fit_intercept=False, max_iter=5000, penalty=none, C=1e10): 0.7166666666666667
LR(solver=sag, fit_intercept=False, max_iter=5000, penalty=none, C=1e10) discrimination: 0.0
0
8
0
8
----------------------------------------
val: LR(solver=saga, fit_intercept=False, max_iter=700, penalty=none, C=1e10): 0.625
test: LR(solver=saga, fit_intercept=False, max_iter=700, penalty=none, C=1e10): 0.7166666666666667
LR(solver=saga, fit_intercept=False, max_iter=700, penalty=none, C=1e10) discrimination: 0.0
0
8
0
8
----------------------------------------
val: LR(solver=saga, fit_intercept=False, max_iter=1400, penalty=none, C=1e10): 0.625
test: LR(solver=saga, fit_intercept=False, max_iter=1400, penalty=none, C=1e10): 0.7166666666666667
LR(solver=saga, fit_intercept=False, max_iter=1400, penalty=none, C=1e10) discrimination: 0.0


  return 1.0 / (1 + np.exp(-a))


0
8
0
8
----------------------------------------
val: LR(solver=saga, fit_intercept=False, max_iter=2100, penalty=none, C=1e10): 0.625
test: LR(solver=saga, fit_intercept=False, max_iter=2100, penalty=none, C=1e10): 0.7166666666666667
LR(solver=saga, fit_intercept=False, max_iter=2100, penalty=none, C=1e10) discrimination: 0.0
8.0
8
52.0
8
----------------------------------------
val: Custom Logistic Regression(lr=0.1, max_itr=100): 0.375
test: Custom Logistic Regression(lr=0.1, max_itr=100): 0.2833333333333333
Custom Logistic Regression(lr=0.1, max_itr=100) discrimination: 5.5
0.0
8
0.0
8
----------------------------------------
val: Custom Logistic Regression(lr=0.1, max_itr=500): 0.625
test: Custom Logistic Regression(lr=0.1, max_itr=500): 0.7166666666666667
Custom Logistic Regression(lr=0.1, max_itr=500) discrimination: 0.0
0.0
8
0.0
8
----------------------------------------
val: Custom Logistic Regression(lr=0.1, max_itr=1500): 0.625
test: Custom Logistic Regression(lr=0.1, max_i

# The most similar model

In [47]:
models = []
names = ['LR(solver=lbfgs, fit_intercept=False, max_iter=10000, penalty=none, C=1e10)',
]

models.append(LogisticRegression(solver='lbfgs', fit_intercept=False, max_iter=10000, penalty='none'))

In [48]:
runAdultWithSplitting(X_expand, y_adult, X_expand_test, y_adult_test, 'sex')

----------------------------------------
val: LR(solver=lbfgs, fit_intercept=False, max_iter=10000, penalty=none, C=1e10): 0.7876678269517653
test: LR(solver=lbfgs, fit_intercept=False, max_iter=10000, penalty=none, C=1e10): 0.7944887118193891
LR(solver=lbfgs, fit_intercept=False, max_iter=10000, penalty=none, C=1e10) discrimination: 0.1654793405251374
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
----------------------------------------
val: LR(solver=lbfgs, fit_intercept=False, max_iter=10000, penalty=none, C=1e10): 0.7951268025857782
test: LR(solver=lbfgs, fit_intercept=False, max_iter=10000, penalty=none, C=1e10): 0.7935590969455512
LR(solver=lbfgs, fit_intercept=False, max_iter=10000, penalty=none, C=1e10) discrimination: 0.16792184001628335
--------------------------------------------------------------------------------
----------------------------------------------

In [49]:
runGermanWithSplitting(X_german_encoded, y_german, 'Age');

----------------------------------------
val: LR(solver=lbfgs, fit_intercept=False, max_iter=10000, penalty=none, C=1e10): 0.675
test: LR(solver=lbfgs, fit_intercept=False, max_iter=10000, penalty=none, C=1e10): 0.65
LR(solver=lbfgs, fit_intercept=False, max_iter=10000, penalty=none, C=1e10) discrimination: 1.0
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
----------------------------------------
val: LR(solver=lbfgs, fit_intercept=False, max_iter=10000, penalty=none, C=1e10): 0.6
test: LR(solver=lbfgs, fit_intercept=False, max_iter=10000, penalty=none, C=1e10): 0.7
LR(solver=lbfgs, fit_intercept=False, max_iter=10000, penalty=none, C=1e10) discrimination: 0.6000000000000001
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
--------------------------------------