In [9]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from os import path
from sklearn.metrics import balanced_accuracy_score, roc_auc_score
import sklearn.preprocessing
from sklearn import preprocessing
from sklearn.preprocessing import Normalizer
import src.lib.utility_classfier as uclf
import src.lib.optimal_threhold_related as thres
import src.lib.fairness_tests as fair

In [10]:
data_path='/Users/lifuchen/Desktop/research/data.csv'
df = pd.read_csv(data_path)

In [11]:
y = df.Class.values
X = df.drop(['GRID','Class'], axis=1)
X.shape

(109490, 87)

In [12]:
def save_prediction(classifier, characteristic, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_white_scaled, y_val_white, X_test_white_scaled, y_test_white, X_val_black_scaled, y_val_black, X_test_black_scaled, y_test_black):
    method_to_call = getattr(uclf, classifier)
    y_val_score = method_to_call(X_train_scaled, y_train,X_val_scaled, y_val)
    y_test_score = method_to_call(X_train_scaled, y_train,X_test_scaled, y_test)

    y_val_score_white = method_to_call(X_train_scaled, y_train, X_val_white_scaled, y_val_white)
    y_test_score_white = method_to_call(X_train_scaled, y_train,X_test_white_scaled, y_test_white)

    y_val_score_black = method_to_call(X_train_scaled, y_train, X_val_black_scaled, y_val_black)
    y_test_score_black = method_to_call(X_train_scaled, y_train,X_test_black_scaled, y_test_black)

    my_dict = dict(val_score = y_val_score, test_score = y_test_score, val_1_score = y_val_score_white, test_1_score = y_test_score_white, val_2_score = y_val_score_black, test_2_score = y_test_score_black)
    overall_prediction = pd.DataFrame.from_dict(my_dict, orient='index')
    overall_prediction = overall_prediction.transpose()

    result_path='/Users/lifuchen/Desktop/research/predictions/'
    filename = str(classifier) + str(characteristic) + "prediction.csv"
    overall_prediction.to_csv(path.join(result_path, filename), index=False)


In [13]:
def get_result (classifier, characteristic, records, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_white_scaled, y_val_white, X_test_white_scaled, y_test_white, X_val_black_scaled, y_val_black, X_test_black_scaled, y_test_black):
    result_path='/Users/lifuchen/Desktop/research/predictions/'
    filename = str(classifier) + characteristic + "prediction.csv"
    prediction = pd.read_csv(path.join(result_path, filename))

    y_val_score = prediction['val_score'][prediction['val_score'].notna()]
    y_test_score = prediction['test_score'][prediction['test_score'].notna()]

    y_val_score_white = prediction['val_1_score'][prediction['val_1_score'].notna()]
    y_test_score_white = prediction['test_1_score'][prediction['test_1_score'].notna()]

    y_val_score_black = prediction['val_2_score'][prediction['val_2_score'].notna()]
    y_test_score_black = prediction['test_2_score'][prediction['test_2_score'].notna()]

    threshold, ba_val, ba_test = balance_accuracy (y_val, y_val_score,y_test, y_test_score)
    auroc = roc_auc_score(y_test, y_test_score)
    precision, recall, tpr, tnr, pd_overall = thres.calculate_precision_metrics(y_test, y_test_score,threshold)

    threshold_white, ba_val_white, ba_test_white = balance_accuracy (y_val_white, y_val_score_white,y_test_white, y_test_score_white)
    precision_white, recall_white, tpr_white, tnr_white, pd_white = thres.calculate_precision_metrics(y_test_white, y_test_score_white,threshold_white)

    threshold_black, ba_val_black, ba_test_black = balance_accuracy (y_val_black, y_val_score_black, y_test_black, y_test_score_black)
    precision_black, recall_black, tpr_black, tnr_black, pd_black = thres.calculate_precision_metrics(y_test_black, y_test_score_black,threshold_black)

    eod = fair.get_EOD(y_test_white, y_test_score_white,threshold_white, y_test_black, y_test_score_black, threshold_black)
    sp = fair.get_SP(y_test_white, y_test_score_white,threshold_white, y_test_black, y_test_score_black, threshold_black)

    records.append({
        'auroc': auroc,
        'overall threshold': threshold,
        'white threshold': threshold_white,
        'black threshold': threshold_black,
        'overall ba validation': ba_val,
        'overall ba test': ba_test,
        'white ba validation': ba_val_white,
        'white ba test': ba_test_white,
        'black ba validation': ba_val_black,
        'black ba test': ba_test_black,
        'overall precision':precision,
        'overall recall':recall,
        'overall tpr':tpr,
        'overall tnr':tnr,
        'overall pd':pd_overall,
        'white precision':precision_white,
        'white recall':recall_white,
        'white tpr':tpr_white,
        'white tnr':tnr_white,
        'white pd':pd_white,
        'black precision':precision_black,
        'black recall':recall_black,
        'black tpr':tpr_black,
        'black tnr':tnr_black,
        'black pd':pd_black,
        'eod': eod,
        'di': sp,
        })

In [14]:
def balance_accuracy (y_val, y_val_score,y_test, y_test_score):
    
    threshold, _ = thres.get_optimal_threshold_Jvalue (y_val, y_val_score)
    print ("Optimal threshold by J value is ",threshold)

    ba_val = thres.calculate_balanced_accuracy(y_val, y_val_score, threshold)
    print ("Balanced accuracy score of val is ", ba_val)

    ba_test = thres.calculate_balanced_accuracy(y_test, y_test_score, threshold)
    print ("Balanced accuracy score of test is ",ba_test)

    return threshold, ba_val, ba_test

In [15]:
def fairness_metrics (X, y, attribute, random_state):
    X_train, y_train, X_val, y_val, X_test, y_test, X_val_white, X_val_black, y_val_white, y_val_black, X_test_white, X_test_black, y_test_white, y_test_black \
        = fair.split_by_trait_no_protected_trait(X, y, attribute, random_state)
    print("X train", X_train.shape[0])
    print("Y train", y_train.shape[0])
    print(X_val.shape[0], X_val_white.shape[0], X_val_black.shape[0])
    print(y_val.shape[0], y_val_white.shape[0], y_val_black.shape[0])
    print(X_test.shape[0], X_test_white.shape[0], X_test_black.shape[0])
    print(y_test.shape[0], y_test_white.shape[0], y_test_black.shape[0])

    max_abs_scaler = preprocessing.MaxAbsScaler()
    X_train_scaled = max_abs_scaler.fit_transform(X_train)
    X_test_scaled = max_abs_scaler.transform(X_test)
    X_test_white_scaled = max_abs_scaler.transform(X_test_white)
    X_test_black_scaled = max_abs_scaler.transform(X_test_black)
    X_val_scaled = max_abs_scaler.transform(X_val)
    X_val_white_scaled = max_abs_scaler.transform(X_val_white)
    X_val_black_scaled = max_abs_scaler.transform(X_val_black)

    characteristic = attribute + str(random_state)
    save_prediction ("logic_regression", characteristic, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_white_scaled, y_val_white, X_test_white_scaled, y_test_white, X_val_black_scaled, y_val_black, X_test_black_scaled, y_test_black)
    save_prediction ("random_forest", characteristic, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_white_scaled, y_val_white, X_test_white_scaled, y_test_white, X_val_black_scaled, y_val_black, X_test_black_scaled, y_test_black)
    save_prediction ("decision_tree", characteristic, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_white_scaled, y_val_white, X_test_white_scaled, y_test_white, X_val_black_scaled, y_val_black, X_test_black_scaled, y_test_black)
    save_prediction ("gradiant_boosting", characteristic, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_white_scaled, y_val_white, X_test_white_scaled, y_test_white, X_val_black_scaled, y_val_black, X_test_black_scaled, y_test_black)

    get_result ("logic_regression", characteristic, records_lr, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_white_scaled, y_val_white, X_test_white_scaled, y_test_white, X_val_black_scaled, y_val_black, X_test_black_scaled, y_test_black)
    get_result ("random_forest", characteristic, records_rf, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_white_scaled, y_val_white, X_test_white_scaled, y_test_white, X_val_black_scaled, y_val_black, X_test_black_scaled, y_test_black)
    get_result ("decision_tree", characteristic, records_dt, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_white_scaled, y_val_white, X_test_white_scaled, y_test_white, X_val_black_scaled, y_val_black, X_test_black_scaled, y_test_black)
    get_result ("gradiant_boosting", characteristic, records_gbt, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_white_scaled, y_val_white, X_test_white_scaled, y_test_white, X_val_black_scaled, y_val_black, X_test_black_scaled, y_test_black)

In [16]:
records_lr = []
records_rf = []
records_dt = []
records_gbt = []
for random_state in range(10):
    fairness_metrics (X, y, "Race_W", random_state)

result_lr = pd.DataFrame(records_lr)
result_rf = pd.DataFrame(records_rf)
result_dt = pd.DataFrame(records_dt)
result_gbt = pd.DataFrame(records_gbt)


X train 65694
Y train 65694
21898 18899 2999
21898 18899 2999
21898 18968 2930
21898 18968 2930


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2588873633479377
0.26198310268376185
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19904
           1       0.46      0.03      0.06      1994

    accuracy                           0.91     21898
   macro avg       0.69      0.51      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19825    79]
 [ 1927    67]]
done in 0.581849s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2588873633479377
0.2622422310280552
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19934
           1       0.42      0.03      0.06      1964

    accuracy                           0.91     21898
   macro avg       0.67      0.51      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19848    86]
 [ 1901    63]]
done in 0.572001s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2588873633479377
0.2628451217946567
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17180
           1       0.47      0.03      0.06      1719

    accuracy                           0.91     18899
   macro avg       0.69      0.51      0.51     18899
weighted avg       0.87      0.91      0.87     18899

Confusion_matrix
[[17116    64]
 [ 1663    56]]
done in 0.573609s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2588873633479377
0.2627415373134105
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17260
           1       0.42      0.03      0.06      1708

    accuracy                           0.91     18968
   macro avg       0.66      0.51      0.50     18968
weighted avg       0.87      0.91      0.87     18968

Confusion_matrix
[[17187    73]
 [ 1656    52]]
done in 0.621934s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2588873633479377
0.2565508588768923
Classification report
              precision    recall  f1-score   support

           0       0.91      0.99      0.95      2724
           1       0.42      0.04      0.07       275

    accuracy                           0.91      2999
   macro avg       0.67      0.52      0.51      2999
weighted avg       0.87      0.91      0.87      2999

Confusion_matrix
[[2709   15]
 [ 264   11]]
done in 0.679354s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2588873633479377
0.2590098618742601
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.95      2674
           1       0.46      0.04      0.08       256

    accuracy                           0.91      2930
   macro avg       0.69      0.52      0.52      2930
weighted avg       0.88      0.91      0.88      2930

Confusion_matrix
[[2661   13]
 [ 245   11]]
done in 0.767632s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19904
           1       0.50      0.00      0.00      1994

    accuracy                           0.91     21898
   macro avg       0.70      0.50      0.48     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19899     5]
 [ 1989     5]]
done in 21.629770s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19934
    

  _warn_prf(average, modifier, msg_start, len(result))


0.26448598669059525
0.2778694234760347
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17260
           1       0.00      0.00      0.00      1708

    accuracy                           0.91     18968
   macro avg       0.45      0.50      0.48     18968
weighted avg       0.83      0.91      0.87     18968

Confusion_matrix
[[17257     3]
 [ 1708     0]]
done in 0.690709s
0.26448598669059525
0.27820099960359335
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95      2724
           1       0.00      0.00      0.00       275

    accuracy                           0.91      2999
   macro avg       0.45      0.50      0.48      2999
weighted avg       0.82      0.91      0.86      2999

Confusion_matrix
[[2723    1]
 [ 275    0]]
done in 0.644208s
0.26448598669059525
0.270807273264847
Classification report
              precision    recall  

  _warn_prf(average, modifier, msg_start, len(result))


Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19904
           1       0.46      0.02      0.03      1994

    accuracy                           0.91     21898
   macro avg       0.68      0.51      0.49     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19864    40]
 [ 1960    34]]
done in 36.725209s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19934
           1       0.45      0.02      0.05      1964

    accuracy                           0.91     21898
   macro avg       0.68      0.51      0.50     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19876    58]
 [ 1917    47]]
done in 36.344575s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17180
           1       0.48      0.02    

threshold:0.6000000000000001, J-value:0.003
threshold:0.7000000000000001, J-value:0.0
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.7110900495116892
Balanced accuracy score of test is  0.6999248305974746
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.46699999999999997
threshold:0.2, J-value:0.32699999999999996
threshold:0.30000000000000004, J-value:0.165
threshold:0.4, J-value:0.085
threshold:0.5, J-value:0.009
threshold:0.6000000000000001, J-value:0.0
threshold:0.7000000000000001, J-value:0.0
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.7335142170604725
Balanced accuracy score of test is  0.6902536579094989
True positive rate of class 1 is  0.674
True positive rate of class 2 is  0.645
Positive prediction rate of class 1 is  0.311
Positive prediction rate of class 2 is  0.297
X train 65694
Y train 65694
21898 18825 30

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26215139120322445
0.2559736159575194
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.95     19980
           1       0.48      0.04      0.07      1918

    accuracy                           0.91     21898
   macro avg       0.70      0.52      0.51     21898
weighted avg       0.88      0.91      0.88     21898

Confusion_matrix
[[19899    81]
 [ 1842    76]]
done in 1.246605s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26215139120322445
0.25789795973082535
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19972
           1       0.44      0.04      0.07      1926

    accuracy                           0.91     21898
   macro avg       0.68      0.52      0.51     21898
weighted avg       0.87      0.91      0.88     21898

Confusion_matrix
[[19884    88]
 [ 1858    68]]
done in 1.308818s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26215139120322445
0.25548109580288664
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.95     17178
           1       0.47      0.04      0.07      1647

    accuracy                           0.91     18825
   macro avg       0.69      0.52      0.51     18825
weighted avg       0.88      0.91      0.88     18825

Confusion_matrix
[[17109    69]
 [ 1587    60]]
done in 1.148488s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26215139120322445
0.26075571606831777
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17196
           1       0.44      0.04      0.07      1687

    accuracy                           0.91     18883
   macro avg       0.68      0.52      0.51     18883
weighted avg       0.87      0.91      0.87     18883

Confusion_matrix
[[17120    76]
 [ 1627    60]]
done in 1.517511s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26215139120322445
0.2589907626906667
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.95      2802
           1       0.57      0.06      0.11       271

    accuracy                           0.91      3073
   macro avg       0.74      0.53      0.53      3073
weighted avg       0.89      0.91      0.88      3073

Confusion_matrix
[[2790   12]
 [ 255   16]]
done in 1.377695s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26215139120322445
0.23999977965756908
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96      2776
           1       0.40      0.03      0.06       239

    accuracy                           0.92      3015
   macro avg       0.66      0.51      0.51      3015
weighted avg       0.88      0.92      0.89      3015

Confusion_matrix
[[2764   12]
 [ 231    8]]
done in 1.415235s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19980
           1       1.00      0.00      0.00      1918

    accuracy                           0.91     21898
   macro avg       0.96      0.50      0.48     21898
weighted avg       0.92      0.91      0.87     21898

Confusion_matrix
[[19980     0]
 [ 1915     3]]
done in 21.785093s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19972
  

threshold:0.2, J-value:0.27599999999999997
threshold:0.30000000000000004, J-value:0.155
threshold:0.4, J-value:0.07
threshold:0.5, J-value:0.036000000000000004
threshold:0.6000000000000001, J-value:0.012
threshold:0.7000000000000001, J-value:0.004
threshold:0.8, J-value:0.001
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.7021389481243496
Balanced accuracy score of test is  0.6985516223231736
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.406
threshold:0.2, J-value:0.27999999999999997
threshold:0.30000000000000004, J-value:0.154
threshold:0.4, J-value:0.066
threshold:0.5, J-value:0.032
threshold:0.6000000000000001, J-value:0.011
threshold:0.7000000000000001, J-value:0.003
threshold:0.8, J-value:0.001
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.7031298345980297
Balanced accuracy score of test is  0.7005543534269214
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.392
th

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2595091489403858
0.26037470726594736
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19950
           1       0.44      0.03      0.06      1948

    accuracy                           0.91     21898
   macro avg       0.68      0.51      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19871    79]
 [ 1885    63]]
done in 1.434885s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2595091489403858
0.2625560786274971
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19883
           1       0.50      0.03      0.05      2015

    accuracy                           0.91     21898
   macro avg       0.70      0.51      0.50     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19824    59]
 [ 1957    58]]
done in 1.372619s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2595091489403858
0.26314207108401255
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17239
           1       0.41      0.03      0.05      1697

    accuracy                           0.91     18936
   macro avg       0.66      0.51      0.50     18936
weighted avg       0.87      0.91      0.87     18936

Confusion_matrix
[[17172    67]
 [ 1650    47]]
done in 1.827384s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2595091489403858
0.2634339117791125
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17079
           1       0.52      0.03      0.05      1750

    accuracy                           0.91     18829
   macro avg       0.71      0.51      0.50     18829
weighted avg       0.87      0.91      0.87     18829

Confusion_matrix
[[17037    42]
 [ 1705    45]]
done in 1.650912s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2595091489403858
0.24268301204012596
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96      2711
           1       0.57      0.06      0.11       251

    accuracy                           0.92      2962
   macro avg       0.75      0.53      0.54      2962
weighted avg       0.89      0.92      0.88      2962

Confusion_matrix
[[2699   12]
 [ 235   16]]
done in 1.522108s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2595091489403858
0.25717037631020556
Classification report
              precision    recall  f1-score   support

           0       0.92      0.99      0.95      2804
           1       0.43      0.05      0.09       265

    accuracy                           0.91      3069
   macro avg       0.68      0.52      0.52      3069
weighted avg       0.88      0.91      0.88      3069

Confusion_matrix
[[2787   17]
 [ 252   13]]
done in 1.462186s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19950
           1       0.29      0.00      0.00      1948

    accuracy                           0.91     21898
   macro avg       0.60      0.50      0.48     21898
weighted avg       0.86      0.91      0.87     21898

Confusion_matrix
[[19940    10]
 [ 1944     4]]
done in 20.110685s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19883
   

  _warn_prf(average, modifier, msg_start, len(result))


0.264372916262498
0.2794088259193761
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19883
           1       0.00      0.00      0.00      2015

    accuracy                           0.91     21898
   macro avg       0.45      0.50      0.48     21898
weighted avg       0.82      0.91      0.86     21898

Confusion_matrix
[[19879     4]
 [ 2015     0]]
done in 0.605906s
0.264372916262498
0.2729337988270996
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17239
           1       0.00      0.00      0.00      1697

    accuracy                           0.91     18936
   macro avg       0.46      0.50      0.48     18936
weighted avg       0.83      0.91      0.87     18936

Confusion_matrix
[[17238     1]
 [ 1697     0]]
done in 0.603317s
0.264372916262498
0.27474664355551137
Classification report
              precision    recall  f

  _warn_prf(average, modifier, msg_start, len(result))


0.264372916262498
0.2525807016254122
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96      2711
           1       0.00      0.00      0.00       251

    accuracy                           0.92      2962
   macro avg       0.46      0.50      0.48      2962
weighted avg       0.84      0.92      0.87      2962

Confusion_matrix
[[2711    0]
 [ 251    0]]
done in 0.609747s


  _warn_prf(average, modifier, msg_start, len(result))


0.264372916262498
0.296758273731138
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95      2804
           1       0.00      0.00      0.00       265

    accuracy                           0.91      3069
   macro avg       0.46      0.50      0.48      3069
weighted avg       0.83      0.91      0.87      3069

Confusion_matrix
[[2801    3]
 [ 265    0]]
done in 0.599595s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19950
           1       0.44      0.02      0.03      1948

    accuracy                           0.91     21898
   macro avg       0.67      0.51      0.49     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19905    45]
 [ 1913    35]]
done in 33.233627s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19883
      

threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.7072499009330308
Balanced accuracy score of test is  0.7106360671466541
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.401
threshold:0.2, J-value:0.255
threshold:0.30000000000000004, J-value:0.135
threshold:0.4, J-value:0.05
threshold:0.5, J-value:0.013
threshold:0.6000000000000001, J-value:0.003
threshold:0.7000000000000001, J-value:0.0
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.7004537203623789
Balanced accuracy score of test is  0.7135740466571312
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.504
threshold:0.2, J-value:0.291
threshold:0.30000000000000004, J-value:0.159
threshold:0.4, J-value:0.07200000000000001
threshold:0.5, J-value:0.032999999999999995
threshold:0.6000000000000001, J-value:0.02
threshold:0.7000000000000001, J-value:0.004
threshold:0.8, J-value:0.0
threshold:0.9, J-value:

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25892930913047413
0.26313429063096405
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19908
           1       0.46      0.04      0.07      1990

    accuracy                           0.91     21898
   macro avg       0.69      0.52      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19819    89]
 [ 1913    77]]
done in 0.912884s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25892930913047413
0.2609482804053149
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19918
           1       0.43      0.03      0.06      1980

    accuracy                           0.91     21898
   macro avg       0.67      0.52      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19825    93]
 [ 1911    69]]
done in 0.917428s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25892930913047413
0.26327010097441067
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17204
           1       0.48      0.04      0.07      1728

    accuracy                           0.91     18932
   macro avg       0.70      0.52      0.51     18932
weighted avg       0.87      0.91      0.87     18932

Confusion_matrix
[[17135    69]
 [ 1664    64]]
done in 0.865002s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25892930913047413
0.26328324901406003
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17156
           1       0.40      0.03      0.06      1726

    accuracy                           0.91     18882
   macro avg       0.65      0.51      0.50     18882
weighted avg       0.86      0.91      0.87     18882

Confusion_matrix
[[17079    77]
 [ 1675    51]]
done in 1.096779s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25892930913047413
0.2622674122013851
Classification report
              precision    recall  f1-score   support

           0       0.92      0.99      0.95      2704
           1       0.39      0.05      0.09       262

    accuracy                           0.91      2966
   macro avg       0.65      0.52      0.52      2966
weighted avg       0.87      0.91      0.88      2966

Confusion_matrix
[[2684   20]
 [ 249   13]]
done in 0.956058s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25892930913047413
0.24632995239791214
Classification report
              precision    recall  f1-score   support

           0       0.92      0.99      0.96      2762
           1       0.53      0.07      0.12       254

    accuracy                           0.92      3016
   macro avg       0.73      0.53      0.54      3016
weighted avg       0.89      0.92      0.89      3016

Confusion_matrix
[[2746   16]
 [ 236   18]]
done in 0.979640s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19908
           1       0.45      0.00      0.00      1990

    accuracy                           0.91     21898
   macro avg       0.68      0.50      0.48     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19902     6]
 [ 1985     5]]
done in 19.987780s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19918
  

  _warn_prf(average, modifier, msg_start, len(result))


Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96      2762
           1       0.00      0.00      0.00       254

    accuracy                           0.92      3016
   macro avg       0.46      0.50      0.48      3016
weighted avg       0.84      0.92      0.88      3016

Confusion_matrix
[[2762    0]
 [ 254    0]]
done in 19.408689s


  _warn_prf(average, modifier, msg_start, len(result))


0.26428564201629917
0.2712285449549372
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19908
           1       0.00      0.00      0.00      1990

    accuracy                           0.91     21898
   macro avg       0.45      0.50      0.48     21898
weighted avg       0.83      0.91      0.87     21898

Confusion_matrix
[[19905     3]
 [ 1990     0]]
done in 0.653973s
0.26428564201629917
0.27044922960654555
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19918
           1       0.00      0.00      0.00      1980

    accuracy                           0.91     21898
   macro avg       0.45      0.50      0.48     21898
weighted avg       0.83      0.91      0.87     21898

Confusion_matrix
[[19915     3]
 [ 1980     0]]
done in 0.650796s
0.26428564201629917
0.2718989637650279
Classification report
              precision    rec

  _warn_prf(average, modifier, msg_start, len(result))


0.26428564201629917
0.2588664114511214
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96      2762
           1       0.00      0.00      0.00       254

    accuracy                           0.92      3016
   macro avg       0.46      0.50      0.48      3016
weighted avg       0.84      0.92      0.88      3016

Confusion_matrix
[[2760    2]
 [ 254    0]]
done in 0.608849s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19908
           1       0.44      0.02      0.04      1990

    accuracy                           0.91     21898
   macro avg       0.68      0.51      0.49     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19859    49]
 [ 1951    39]]
done in 33.482938s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19918
   

threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6984957437377767
Balanced accuracy score of test is  0.70961365842378
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.398
threshold:0.2, J-value:0.27399999999999997
threshold:0.30000000000000004, J-value:0.14600000000000002
threshold:0.4, J-value:0.059000000000000004
threshold:0.5, J-value:0.015000000000000001
threshold:0.6000000000000001, J-value:0.002
threshold:0.7000000000000001, J-value:0.0
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6988955249425198
Balanced accuracy score of test is  0.7075214911518781
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.392
threshold:0.2, J-value:0.263
threshold:0.30000000000000004, J-value:0.132
threshold:0.4, J-value:0.05800000000000001
threshold:0.5, J-value:0.034
threshold:0.6000000000000001, J-value:0.015
threshold:0.7000000000000001, J-value:0.0
threshold

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2631517264955017
0.25484997974541185
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.95     20006
           1       0.39      0.03      0.06      1892

    accuracy                           0.91     21898
   macro avg       0.65      0.51      0.51     21898
weighted avg       0.87      0.91      0.88     21898

Confusion_matrix
[[19906   100]
 [ 1828    64]]
done in 0.858252s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2631517264955017
0.25647906239900653
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.95     19975
           1       0.47      0.04      0.08      1923

    accuracy                           0.91     21898
   macro avg       0.69      0.52      0.51     21898
weighted avg       0.88      0.91      0.88     21898

Confusion_matrix
[[19885    90]
 [ 1844    79]]
done in 0.891979s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2631517264955017
0.25578922182492775
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.95     17241
           1       0.38      0.03      0.05      1634

    accuracy                           0.91     18875
   macro avg       0.65      0.51      0.50     18875
weighted avg       0.87      0.91      0.88     18875

Confusion_matrix
[[17162    79]
 [ 1586    48]]
done in 0.890070s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2631517264955017
0.25996486608496056
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17310
           1       0.47      0.04      0.07      1699

    accuracy                           0.91     19009
   macro avg       0.69      0.52      0.51     19009
weighted avg       0.87      0.91      0.87     19009

Confusion_matrix
[[17243    67]
 [ 1639    60]]
done in 0.992803s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2631517264955017
0.24898554234850084
Classification report
              precision    recall  f1-score   support

           0       0.92      0.99      0.95      2765
           1       0.43      0.06      0.11       258

    accuracy                           0.91      3023
   macro avg       0.68      0.53      0.53      3023
weighted avg       0.88      0.91      0.88      3023

Confusion_matrix
[[2744   21]
 [ 242   16]]
done in 1.000715s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2631517264955017
0.2335432222237556
Classification report
              precision    recall  f1-score   support

           0       0.93      0.99      0.96      2665
           1       0.45      0.08      0.14       224

    accuracy                           0.92      2889
   macro avg       0.69      0.54      0.55      2889
weighted avg       0.89      0.92      0.90      2889

Confusion_matrix
[[2642   23]
 [ 205   19]]
done in 1.179483s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     20006
           1       0.40      0.00      0.00      1892

    accuracy                           0.91     21898
   macro avg       0.66      0.50      0.48     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[20000     6]
 [ 1888     4]]
done in 19.435798s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19975
    

threshold:0.2, J-value:0.262
threshold:0.30000000000000004, J-value:0.148
threshold:0.4, J-value:0.069
threshold:0.5, J-value:0.029
threshold:0.6000000000000001, J-value:0.011
threshold:0.7000000000000001, J-value:0.002
threshold:0.8, J-value:0.001
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.7010668469649379
Balanced accuracy score of test is  0.7041008488900258
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.395
threshold:0.2, J-value:0.26099999999999995
threshold:0.30000000000000004, J-value:0.145
threshold:0.4, J-value:0.064
threshold:0.5, J-value:0.024
threshold:0.6000000000000001, J-value:0.01
threshold:0.7000000000000001, J-value:0.002
threshold:0.8, J-value:0.001
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6978414473710832
Balanced accuracy score of test is  0.7017047782550581
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.44299999999999995
threshold:0.2, J-

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2586404020735353
0.2639917869176803
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19911
           1       0.43      0.03      0.06      1987

    accuracy                           0.91     21898
   macro avg       0.67      0.51      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19820    91]
 [ 1919    68]]
done in 0.895167s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2586404020735353
0.2609678507142087
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19927
           1       0.45      0.03      0.06      1971

    accuracy                           0.91     21898
   macro avg       0.68      0.51      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19851    76]
 [ 1910    61]]
done in 1.044744s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2586404020735353
0.265962196209266
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17229
           1       0.41      0.03      0.06      1741

    accuracy                           0.91     18970
   macro avg       0.66      0.51      0.50     18970
weighted avg       0.86      0.91      0.87     18970

Confusion_matrix
[[17150    79]
 [ 1686    55]]
done in 1.041442s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2586404020735353
0.2606548438568971
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17198
           1       0.44      0.03      0.05      1694

    accuracy                           0.91     18892
   macro avg       0.68      0.51      0.50     18892
weighted avg       0.87      0.91      0.87     18892

Confusion_matrix
[[17142    56]
 [ 1650    44]]
done in 1.071952s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2586404020735353
0.2512258496699414
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96      2682
           1       0.52      0.05      0.10       246

    accuracy                           0.92      2928
   macro avg       0.72      0.52      0.53      2928
weighted avg       0.89      0.92      0.88      2928

Confusion_matrix
[[2670   12]
 [ 233   13]]
done in 1.065897s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2586404020735353
0.26293502488198356
Classification report
              precision    recall  f1-score   support

           0       0.91      0.99      0.95      2729
           1       0.46      0.06      0.11       277

    accuracy                           0.91      3006
   macro avg       0.69      0.53      0.53      3006
weighted avg       0.87      0.91      0.87      3006

Confusion_matrix
[[2709   20]
 [ 260   17]]
done in 1.150670s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19911
           1       0.22      0.00      0.00      1987

    accuracy                           0.91     21898
   macro avg       0.57      0.50      0.48     21898
weighted avg       0.85      0.91      0.87     21898

Confusion_matrix
[[19904     7]
 [ 1985     2]]
done in 19.663683s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19927
   

  _warn_prf(average, modifier, msg_start, len(result))


0.26445464621705217
0.2702963274846509
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19911
           1       0.32      0.00      0.01      1987

    accuracy                           0.91     21898
   macro avg       0.61      0.50      0.48     21898
weighted avg       0.86      0.91      0.87     21898

Confusion_matrix
[[19898    13]
 [ 1981     6]]
done in 0.664255s
0.26445464621705217
0.26900537003743663
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19927
           1       0.30      0.00      0.01      1971

    accuracy                           0.91     21898
   macro avg       0.61      0.50      0.48     21898
weighted avg       0.86      0.91      0.87     21898

Confusion_matrix
[[19913    14]
 [ 1965     6]]
done in 0.639602s
0.26445464621705217
0.2725960626011161
Classification report
              precision    rec

Balanced accuracy score of test is  0.7040617356300095
True positive rate of class 1 is  0.736
True positive rate of class 2 is  0.762
Positive prediction rate of class 1 is  0.376
Positive prediction rate of class 2 is  0.391
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.348
threshold:0.2, J-value:0.21099999999999997
threshold:0.30000000000000004, J-value:0.11000000000000001
threshold:0.4, J-value:0.007
threshold:0.5, J-value:0.002
threshold:0.6000000000000001, J-value:0.002
threshold:0.7000000000000001, J-value:0.0
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6740769195946623
Balanced accuracy score of test is  0.6700628399696437
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.347
threshold:0.2, J-value:0.21699999999999997
threshold:0.30000000000000004, J-value:0.11100000000000002
threshold:0.4, J-value:0.007
threshold:0.5, J-value:0.001
threshold:0.6000000000000001, J-value:0.001
threshold:0

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2615249031494391
0.2575706140794245
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19944
           1       0.43      0.03      0.06      1954

    accuracy                           0.91     21898
   macro avg       0.67      0.51      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19861    83]
 [ 1891    63]]
done in 1.009826s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2615249031494391
0.25832616083809007
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19955
           1       0.47      0.04      0.07      1943

    accuracy                           0.91     21898
   macro avg       0.69      0.52      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19876    79]
 [ 1873    70]]
done in 0.961090s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2615249031494391
0.25826521100481026
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17147
           1       0.44      0.03      0.06      1695

    accuracy                           0.91     18842
   macro avg       0.67      0.51      0.51     18842
weighted avg       0.87      0.91      0.87     18842

Confusion_matrix
[[17077    70]
 [ 1641    54]]
done in 0.944324s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2615249031494391
0.25989764454343484
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17231
           1       0.43      0.03      0.06      1683

    accuracy                           0.91     18914
   macro avg       0.67      0.51      0.50     18914
weighted avg       0.87      0.91      0.87     18914

Confusion_matrix
[[17163    68]
 [ 1632    51]]
done in 1.125843s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2615249031494391
0.25328802400477807
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.95      2797
           1       0.41      0.03      0.06       259

    accuracy                           0.91      3056
   macro avg       0.66      0.52      0.51      3056
weighted avg       0.87      0.91      0.88      3056

Confusion_matrix
[[2784   13]
 [ 250    9]]
done in 1.316333s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2615249031494391
0.24836535560923897
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96      2724
           1       0.63      0.07      0.13       260

    accuracy                           0.92      2984
   macro avg       0.78      0.53      0.54      2984
weighted avg       0.89      0.92      0.88      2984

Confusion_matrix
[[2713   11]
 [ 241   19]]
done in 1.103375s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19944
           1       0.50      0.00      0.01      1954

    accuracy                           0.91     21898
   macro avg       0.71      0.50      0.48     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19939     5]
 [ 1949     5]]
done in 19.507105s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19955
   

  _warn_prf(average, modifier, msg_start, len(result))


0.26691180010608717
0.26804236350549815
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19944
           1       0.34      0.01      0.03      1954

    accuracy                           0.91     21898
   macro avg       0.63      0.51      0.49     21898
weighted avg       0.86      0.91      0.87     21898

Confusion_matrix
[[19890    54]
 [ 1926    28]]
done in 0.642173s
0.26691180010608717
0.2720697371491411
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19955
           1       0.38      0.01      0.03      1943

    accuracy                           0.91     21898
   macro avg       0.65      0.51      0.49     21898
weighted avg       0.86      0.91      0.87     21898

Confusion_matrix
[[19909    46]
 [ 1915    28]]
done in 0.617115s
0.26691180010608717
0.26965226840168327
Classification report
              precision    re

True positive rate of class 1 is  0.737
True positive rate of class 2 is  0.785
Positive prediction rate of class 1 is  0.382
Positive prediction rate of class 2 is  0.394
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.364
threshold:0.2, J-value:0.23500000000000001
threshold:0.30000000000000004, J-value:0.143
threshold:0.4, J-value:0.011
threshold:0.5, J-value:0.011
threshold:0.6000000000000001, J-value:0.002
threshold:0.7000000000000001, J-value:0.0
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6823207591286309
Balanced accuracy score of test is  0.6769141788788026
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.366
threshold:0.2, J-value:0.24000000000000002
threshold:0.30000000000000004, J-value:0.149
threshold:0.4, J-value:0.011
threshold:0.5, J-value:0.011
threshold:0.6000000000000001, J-value:0.001
threshold:0.7000000000000001, J-value:0.0
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25825949338863036
0.2633059221721374
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19893
           1       0.41      0.03      0.06      2005

    accuracy                           0.91     21898
   macro avg       0.66      0.51      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19795    98]
 [ 1936    69]]
done in 0.896884s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25825949338863036
0.2622847363281984
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19907
           1       0.48      0.03      0.06      1991

    accuracy                           0.91     21898
   macro avg       0.70      0.52      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19833    74]
 [ 1922    69]]
done in 0.880424s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25825949338863036
0.26182896927128774
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17191
           1       0.41      0.03      0.06      1729

    accuracy                           0.91     18920
   macro avg       0.66      0.51      0.51     18920
weighted avg       0.87      0.91      0.87     18920

Confusion_matrix
[[17108    83]
 [ 1671    58]]
done in 0.939585s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25825949338863036
0.2643884932567561
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17135
           1       0.46      0.03      0.06      1730

    accuracy                           0.91     18865
   macro avg       0.68      0.51      0.50     18865
weighted avg       0.87      0.91      0.87     18865

Confusion_matrix
[[17075    60]
 [ 1679    51]]
done in 1.153805s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25825949338863036
0.2726893838524846
Classification report
              precision    recall  f1-score   support

           0       0.91      0.99      0.95      2702
           1       0.42      0.04      0.07       276

    accuracy                           0.91      2978
   macro avg       0.67      0.52      0.51      2978
weighted avg       0.87      0.91      0.87      2978

Confusion_matrix
[[2687   15]
 [ 265   11]]
done in 1.083728s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25825949338863036
0.24919954857440885
Classification report
              precision    recall  f1-score   support

           0       0.92      0.99      0.96      2772
           1       0.56      0.07      0.12       261

    accuracy                           0.92      3033
   macro avg       0.74      0.53      0.54      3033
weighted avg       0.89      0.92      0.88      3033

Confusion_matrix
[[2758   14]
 [ 243   18]]
done in 0.992812s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19893
           1       0.54      0.00      0.01      2005

    accuracy                           0.91     21898
   macro avg       0.72      0.50      0.48     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19887     6]
 [ 1998     7]]
done in 19.163097s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19907
  

  _warn_prf(average, modifier, msg_start, len(result))


0.2638844630577774
0.27072063991639456
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19893
           1       0.08      0.00      0.00      2005

    accuracy                           0.91     21898
   macro avg       0.50      0.50      0.48     21898
weighted avg       0.83      0.91      0.86     21898

Confusion_matrix
[[19882    11]
 [ 2004     1]]
done in 0.636178s
0.2638844630577774
0.27224900833516935
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19907
           1       0.33      0.00      0.00      1991

    accuracy                           0.91     21898
   macro avg       0.62      0.50      0.48     21898
weighted avg       0.86      0.91      0.87     21898

Confusion_matrix
[[19903     4]
 [ 1989     2]]
done in 0.636339s
0.2638844630577774
0.26930585598613827
Classification report
              precision    reca

True positive rate of class 1 is  0.732
True positive rate of class 2 is  0.755
Positive prediction rate of class 1 is  0.371
Positive prediction rate of class 2 is  0.39
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.37400000000000005
threshold:0.2, J-value:0.21000000000000002
threshold:0.30000000000000004, J-value:0.093
threshold:0.4, J-value:0.008
threshold:0.5, J-value:-0.001
threshold:0.6000000000000001, J-value:-0.001
threshold:0.7000000000000001, J-value:-0.001
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.687285719246347
Balanced accuracy score of test is  0.6872702163503284
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.38099999999999995
threshold:0.2, J-value:0.21500000000000002
threshold:0.30000000000000004, J-value:0.10099999999999999
threshold:0.4, J-value:0.008
threshold:0.5, J-value:0.0
threshold:0.6000000000000001, J-value:0.0
threshold:0.7000000000000001, J-value:0.0
threshold:0

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.259703992793075
0.2622894173060706
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19931
           1       0.50      0.04      0.08      1967

    accuracy                           0.91     21898
   macro avg       0.71      0.52      0.51     21898
weighted avg       0.88      0.91      0.87     21898

Confusion_matrix
[[19851    80]
 [ 1886    81]]
done in 0.906977s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.259703992793075
0.25915565572607974
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19956
           1       0.47      0.04      0.07      1942

    accuracy                           0.91     21898
   macro avg       0.69      0.52      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19877    79]
 [ 1873    69]]
done in 0.853088s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.259703992793075
0.2606057245497353
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17217
           1       0.52      0.04      0.08      1688

    accuracy                           0.91     18905
   macro avg       0.72      0.52      0.52     18905
weighted avg       0.88      0.91      0.87     18905

Confusion_matrix
[[17152    65]
 [ 1618    70]]
done in 1.085203s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.259703992793075
0.26030672244598035
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17231
           1       0.44      0.03      0.06      1687

    accuracy                           0.91     18918
   macro avg       0.68      0.51      0.51     18918
weighted avg       0.87      0.91      0.87     18918

Confusion_matrix
[[17166    65]
 [ 1635    52]]
done in 0.983221s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.259703992793075
0.272924302557831
Classification report
              precision    recall  f1-score   support

           0       0.91      0.99      0.95      2714
           1       0.42      0.04      0.07       279

    accuracy                           0.91      2993
   macro avg       0.67      0.52      0.51      2993
weighted avg       0.86      0.91      0.87      2993

Confusion_matrix
[[2699   15]
 [ 268   11]]
done in 1.029004s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.259703992793075
0.251848313374711
Classification report
              precision    recall  f1-score   support

           0       0.92      0.99      0.96      2725
           1       0.55      0.07      0.12       255

    accuracy                           0.92      2980
   macro avg       0.73      0.53      0.54      2980
weighted avg       0.89      0.92      0.88      2980

Confusion_matrix
[[2711   14]
 [ 238   17]]
done in 0.960931s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19931
           1       0.28      0.00      0.01      1967

    accuracy                           0.91     21898
   macro avg       0.59      0.50      0.48     21898
weighted avg       0.85      0.91      0.87     21898

Confusion_matrix
[[19918    13]
 [ 1962     5]]
done in 19.871417s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19956
      

threshold:0.4, J-value:0.07
threshold:0.5, J-value:0.037000000000000005
threshold:0.6000000000000001, J-value:0.009999999999999998
threshold:0.7000000000000001, J-value:0.003
threshold:0.8, J-value:0.001
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6964505938982116
Balanced accuracy score of test is  0.7007504821627146
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.391
threshold:0.2, J-value:0.24900000000000003
threshold:0.30000000000000004, J-value:0.152
threshold:0.4, J-value:0.07100000000000001
threshold:0.5, J-value:0.037000000000000005
threshold:0.6000000000000001, J-value:0.009999999999999998
threshold:0.7000000000000001, J-value:0.002
threshold:0.8, J-value:0.001
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.695777701114874
Balanced accuracy score of test is  0.7004161727648129
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.403
threshold:0.2, J-value:0.288
thr

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2599848723690683
0.25514036585612926
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19973
           1       0.52      0.04      0.07      1925

    accuracy                           0.91     21898
   macro avg       0.72      0.52      0.51     21898
weighted avg       0.88      0.91      0.88     21898

Confusion_matrix
[[19905    68]
 [ 1851    74]]
done in 0.903564s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2599848723690683
0.26502750161480243
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19866
           1       0.45      0.03      0.06      2032

    accuracy                           0.91     21898
   macro avg       0.68      0.51      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19782    84]
 [ 1963    69]]
done in 0.922397s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2599848723690683
0.25503552980805894
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17189
           1       0.50      0.04      0.07      1658

    accuracy                           0.91     18847
   macro avg       0.71      0.52      0.51     18847
weighted avg       0.88      0.91      0.88     18847

Confusion_matrix
[[17130    59]
 [ 1598    60]]
done in 0.900432s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2599848723690683
0.26591197547752504
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17067
           1       0.43      0.03      0.05      1750

    accuracy                           0.91     18817
   macro avg       0.67      0.51      0.50     18817
weighted avg       0.86      0.91      0.87     18817

Confusion_matrix
[[17000    67]
 [ 1699    51]]
done in 1.007260s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2599848723690683
0.25578797155851585
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.95      2784
           1       0.61      0.05      0.10       267

    accuracy                           0.91      3051
   macro avg       0.76      0.52      0.53      3051
weighted avg       0.89      0.91      0.88      3051

Confusion_matrix
[[2775    9]
 [ 253   14]]
done in 1.040701s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2599848723690683
0.25962563706600295
Classification report
              precision    recall  f1-score   support

           0       0.91      0.99      0.95      2799
           1       0.51      0.06      0.11       282

    accuracy                           0.91      3081
   macro avg       0.71      0.53      0.53      3081
weighted avg       0.88      0.91      0.88      3081

Confusion_matrix
[[2782   17]
 [ 264   18]]
done in 1.270504s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19973
           1       0.55      0.00      0.01      1925

    accuracy                           0.91     21898
   macro avg       0.73      0.50      0.48     21898
weighted avg       0.88      0.91      0.87     21898

Confusion_matrix
[[19968     5]
 [ 1919     6]]
done in 19.445024s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19866
   

  _warn_prf(average, modifier, msg_start, len(result))


Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95      2799
           1       0.00      0.00      0.00       282

    accuracy                           0.91      3081
   macro avg       0.45      0.50      0.48      3081
weighted avg       0.83      0.91      0.86      3081

Confusion_matrix
[[2799    0]
 [ 282    0]]
done in 19.175401s


  _warn_prf(average, modifier, msg_start, len(result))


0.26553645312445306
0.27413100329676604
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19973
           1       0.10      0.00      0.00      1925

    accuracy                           0.91     21898
   macro avg       0.51      0.50      0.48     21898
weighted avg       0.84      0.91      0.87     21898

Confusion_matrix
[[19955    18]
 [ 1923     2]]
done in 0.616123s
0.26553645312445306
0.2781555748145189
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19866
           1       0.11      0.00      0.00      2032

    accuracy                           0.91     21898
   macro avg       0.51      0.50      0.48     21898
weighted avg       0.83      0.91      0.86     21898

Confusion_matrix
[[19850    16]
 [ 2030     2]]
done in 0.603733s
0.26553645312445306
0.26935524529999305
Classification report
              precision    re

True positive rate of class 1 is  0.742
True positive rate of class 2 is  0.78
Positive prediction rate of class 1 is  0.378
Positive prediction rate of class 2 is  0.389
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.33799999999999997
threshold:0.2, J-value:0.193
threshold:0.30000000000000004, J-value:0.067
threshold:0.4, J-value:0.054
threshold:0.5, J-value:0.0
threshold:0.6000000000000001, J-value:0.0
threshold:0.7000000000000001, J-value:0.0
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6691380116403898
Balanced accuracy score of test is  0.674852813060101
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.33599999999999997
threshold:0.2, J-value:0.195
threshold:0.30000000000000004, J-value:0.07
threshold:0.4, J-value:0.059000000000000004
threshold:0.5, J-value:0.0
threshold:0.6000000000000001, J-value:0.0
threshold:0.7000000000000001, J-value:0.0
threshold:0.8, J-value:0.0
threshold:0.9, J-valu

In [17]:
def add_mean_sd(records, result_table, overall_records, type):
    records.append({
        'auroc': result_table["auroc"].mean(),
        'overall threshold': result_table["overall threshold"].mean(),
        'white threshold': result_table["white threshold"].mean(),
        'black threshold': result_table["black threshold"].mean(),
        'overall ba validation': result_table["overall ba validation"].mean(),
        'overall ba test': result_table["overall ba test"].mean(),
        'white ba validation': result_table["white ba validation"].mean(),
        'white ba test': result_table["white ba test"].mean(),
        'black ba validation': result_table["black ba validation"].mean(),
        'black ba test': result_table["black ba test"].mean(),
        'overall precision':result_table["overall precision"].mean(),
        'overall recall':result_table["overall recall"].mean(),
        'overall tpr':result_table["overall tpr"].mean(),
        'overall tnr':result_table["overall tnr"].mean(),
        'overall pd':result_table["overall pd"].mean(),
        'white precision':result_table["white precision"].mean(),
        'white recall':result_table["white recall"].mean(),
        'white tpr':result_table["white tpr"].mean(),
        'white tnr':result_table["white tnr"].mean(),
        'white pd':result_table["white pd"].mean(),
        'black precision':result_table["black precision"].mean(),
        'black recall':result_table["black recall"].mean(),
        'black tpr':result_table["black tpr"].mean(),
        'black tnr':result_table["black tnr"].mean(),
        'black pd':result_table["black pd"].mean(),
        'eod': result_table["eod"].mean(),
        'di': result_table["di"].mean(),
        })
    records.append({
        'auroc': result_table["auroc"].std(),
        'overall threshold': result_table["overall threshold"].std(),
        'white threshold': result_table["white threshold"].std(),
        'black threshold': result_table["black threshold"].std(),
        'overall ba validation': result_table["overall ba validation"].std(),
        'overall ba test': result_table["overall ba test"].std(),
        'white ba validation': result_table["white ba validation"].std(),
        'white ba test': result_table["white ba test"].std(),
        'black ba validation': result_table["black ba validation"].std(),
        'black ba test': result_table["black ba test"].std(),
        'overall precision':result_table["overall precision"].std(),
        'overall recall':result_table["overall recall"].std(),
        'overall tpr':result_table["overall tpr"].std(),
        'overall tnr':result_table["overall tnr"].std(),
        'overall pd':result_table["overall pd"].std(),
        'white precision':result_table["white precision"].std(),
        'white recall':result_table["white recall"].std(),
        'white tpr':result_table["white tpr"].std(),
        'white tnr':result_table["white tnr"].std(),
        'white pd':result_table["white pd"].std(),
        'black precision':result_table["black precision"].std(),
        'black recall':result_table["black recall"].std(),
        'black tpr':result_table["black tpr"].std(),
        'black tnr':result_table["black tnr"].std(),
        'black pd':result_table["black pd"].std(),
        'eod': result_table["eod"].std(),
        'di': result_table["di"].std(),
        })
    overall_records.append({
        'type': type,
        'auroc': result_table["auroc"].mean(),
        'overall threshold': result_table["overall threshold"].mean(),
        'white threshold': result_table["white threshold"].mean(),
        'black threshold': result_table["black threshold"].mean(),
        'overall ba test': result_table["overall ba test"].mean(),
        'white ba test': result_table["white ba test"].mean(),
        'black ba test': result_table["black ba test"].mean(),
        'overall tpr':result_table["overall tpr"].mean(),
        'overall pd':result_table["overall pd"].mean(),
        'white tpr':result_table["white tpr"].mean(),
        'white pd':result_table["white pd"].mean(),
        'black tpr':result_table["black tpr"].mean(),
        'black pd':result_table["black pd"].mean(),
        'eod': result_table["eod"].mean(),
        'di': result_table["di"].mean(),
        })
    pd_result = pd.DataFrame(records)
    return pd_result, overall_records

In [18]:
overall_table = []
result_lr, overall_records = add_mean_sd (records_lr, result_lr, overall_table, 'lr')
result_rf, overall_records = add_mean_sd (records_rf, result_rf, overall_records, 'rf')
result_dt, overall_records = add_mean_sd (records_dt, result_dt, overall_records, 'dt')
result_gbt, overall_records = add_mean_sd (records_gbt, result_gbt, overall_records, 'gbt')

result_path='/Users/lifuchen/Desktop/research/resample_data/'
result_lr.to_csv(path.join(result_path,'race-lr-result_no_protected.csv'), index=False)
result_rf.to_csv(path.join(result_path,'race-rf-result_no_protected.csv'), index=False)
result_dt.to_csv(path.join(result_path,'race-dt-result_no_protected.csv'), index=False)
result_gbt.to_csv(path.join(result_path,'race-gbt-result_no_protected.csv'), index=False)

overall_result = pd.DataFrame(overall_table)
result_path='/Users/lifuchen/Desktop/research/resample_result/'
overall_result.to_csv(path.join(result_path,'race_no_protected.csv'), index=False)
