In [1]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from os import path
from sklearn.metrics import balanced_accuracy_score, roc_auc_score
import sklearn.preprocessing
from sklearn import preprocessing
from sklearn.preprocessing import Normalizer
import src.lib.utility_classfier as uclf
import src.lib.optimal_threhold_related as thres
import src.lib.fairness_tests as fair

In [2]:
data_path='/Users/lifuchen/Desktop/research/data.csv'
df = pd.read_csv(data_path)

In [3]:
y = df.Class.values
X = df.drop(['GRID','Class', 'Race_B'], axis=1)
X.shape

(109490, 86)

In [4]:
def save_prediction(classifier, characteristic, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_white_scaled, y_val_white, X_test_white_scaled, y_test_white, X_val_black_scaled, y_val_black, X_test_black_scaled, y_test_black):
    method_to_call = getattr(uclf, classifier)
    y_val_score = method_to_call(X_train_scaled, y_train,X_val_scaled, y_val)
    y_test_score = method_to_call(X_train_scaled, y_train,X_test_scaled, y_test)

    y_val_score_white = method_to_call(X_train_scaled, y_train, X_val_white_scaled, y_val_white)
    y_test_score_white = method_to_call(X_train_scaled, y_train,X_test_white_scaled, y_test_white)

    y_val_score_black = method_to_call(X_train_scaled, y_train, X_val_black_scaled, y_val_black)
    y_test_score_black = method_to_call(X_train_scaled, y_train,X_test_black_scaled, y_test_black)

    my_dict = dict(val_score = y_val_score, test_score = y_test_score, val_1_score = y_val_score_white, test_1_score = y_test_score_white, val_2_score = y_val_score_black, test_2_score = y_test_score_black)
    overall_prediction = pd.DataFrame.from_dict(my_dict, orient='index')
    overall_prediction = overall_prediction.transpose()

    result_path='/Users/lifuchen/Desktop/research/predictions/'
    filename = str(classifier) + str(characteristic) + "prediction.csv"
    overall_prediction.to_csv(path.join(result_path, filename), index=False)


In [5]:
def get_result (classifier, characteristic, records, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_white_scaled, y_val_white, X_test_white_scaled, y_test_white, X_val_black_scaled, y_val_black, X_test_black_scaled, y_test_black):
    result_path='/Users/lifuchen/Desktop/research/predictions/'
    filename = str(classifier) + characteristic + "prediction.csv"
    prediction = pd.read_csv(path.join(result_path, filename))

    y_val_score = prediction['val_score'][prediction['val_score'].notna()]
    y_test_score = prediction['test_score'][prediction['test_score'].notna()]

    y_val_score_white = prediction['val_1_score'][prediction['val_1_score'].notna()]
    y_test_score_white = prediction['test_1_score'][prediction['test_1_score'].notna()]

    y_val_score_black = prediction['val_2_score'][prediction['val_2_score'].notna()]
    y_test_score_black = prediction['test_2_score'][prediction['test_2_score'].notna()]

    threshold, ba_val, ba_test = balance_accuracy (y_val, y_val_score,y_test, y_test_score)
    auroc = roc_auc_score(y_test, y_test_score)
    precision, recall, tpr, tnr, pd_overall = thres.calculate_precision_metrics(y_test, y_test_score,threshold)

    threshold_white, ba_val_white, ba_test_white = balance_accuracy (y_val_white, y_val_score_white,y_test_white, y_test_score_white)
    precision_white, recall_white, tpr_white, tnr_white, pd_white = thres.calculate_precision_metrics(y_test_white, y_test_score_white,threshold_white)

    threshold_black, ba_val_black, ba_test_black = balance_accuracy (y_val_black, y_val_score_black, y_test_black, y_test_score_black)
    precision_black, recall_black, tpr_black, tnr_black, pd_black = thres.calculate_precision_metrics(y_test_black, y_test_score_black,threshold_black)

    eod = fair.get_EOD(y_test_white, y_test_score_white,threshold_white, y_test_black, y_test_score_black, threshold_black)
    sp = fair.get_SP(y_test_white, y_test_score_white,threshold_white, y_test_black, y_test_score_black, threshold_black)

    records.append({
        'auroc': auroc,
        'overall threshold': threshold,
        'white threshold': threshold_white,
        'black threshold': threshold_black,
        'overall ba validation': ba_val,
        'overall ba test': ba_test,
        'white ba validation': ba_val_white,
        'white ba test': ba_test_white,
        'black ba validation': ba_val_black,
        'black ba test': ba_test_black,
        'overall precision':precision,
        'overall recall':recall,
        'overall tpr':tpr,
        'overall tnr':tnr,
        'overall pd':pd_overall,
        'white precision':precision_white,
        'white recall':recall_white,
        'white tpr':tpr_white,
        'white tnr':tnr_white,
        'white pd':pd_white,
        'black precision':precision_black,
        'black recall':recall_black,
        'black tpr':tpr_black,
        'black tnr':tnr_black,
        'black pd':pd_black,
        'eod': eod,
        'di': sp,
        })

In [6]:
def balance_accuracy (y_val, y_val_score,y_test, y_test_score):
    
    threshold, _ = thres.get_optimal_threshold_Jvalue (y_val, y_val_score)
    print ("Optimal threshold by J value is ",threshold)

    ba_val = thres.calculate_balanced_accuracy(y_val, y_val_score, threshold)
    print ("Balanced accuracy score of val is ", ba_val)

    ba_test = thres.calculate_balanced_accuracy(y_test, y_test_score, threshold)
    print ("Balanced accuracy score of test is ",ba_test)

    return threshold, ba_val, ba_test

In [7]:
def fairness_metrics (X, y, attribute, random_state):
    X_train, y_train, X_val, y_val, X_test, y_test, X_val_white, X_val_black, y_val_white, y_val_black, X_test_white, X_test_black, y_test_white, y_test_black \
        = fair.split_by_trait(X, y, attribute, random_state)
    print("X train", X_train.shape[0])
    print("Y train", y_train.shape[0])
    print(X_val.shape[0], X_val_white.shape[0], X_val_black.shape[0])
    print(y_val.shape[0], y_val_white.shape[0], y_val_black.shape[0])
    print(X_test.shape[0], X_test_white.shape[0], X_test_black.shape[0])
    print(y_test.shape[0], y_test_white.shape[0], y_test_black.shape[0])

    max_abs_scaler = preprocessing.MaxAbsScaler()
    X_train_scaled = max_abs_scaler.fit_transform(X_train)
    X_test_scaled = max_abs_scaler.transform(X_test)
    X_test_white_scaled = max_abs_scaler.transform(X_test_white)
    X_test_black_scaled = max_abs_scaler.transform(X_test_black)
    X_val_scaled = max_abs_scaler.transform(X_val)
    X_val_white_scaled = max_abs_scaler.transform(X_val_white)
    X_val_black_scaled = max_abs_scaler.transform(X_val_black)

    characteristic = attribute + str(random_state)
    save_prediction ("logic_regression", characteristic, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_white_scaled, y_val_white, X_test_white_scaled, y_test_white, X_val_black_scaled, y_val_black, X_test_black_scaled, y_test_black)
    save_prediction ("random_forest", characteristic, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_white_scaled, y_val_white, X_test_white_scaled, y_test_white, X_val_black_scaled, y_val_black, X_test_black_scaled, y_test_black)
    save_prediction ("decision_tree", characteristic, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_white_scaled, y_val_white, X_test_white_scaled, y_test_white, X_val_black_scaled, y_val_black, X_test_black_scaled, y_test_black)
    save_prediction ("gradiant_boosting", characteristic, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_white_scaled, y_val_white, X_test_white_scaled, y_test_white, X_val_black_scaled, y_val_black, X_test_black_scaled, y_test_black)

    get_result ("logic_regression", characteristic, records_lr, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_white_scaled, y_val_white, X_test_white_scaled, y_test_white, X_val_black_scaled, y_val_black, X_test_black_scaled, y_test_black)
    get_result ("random_forest", characteristic, records_rf, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_white_scaled, y_val_white, X_test_white_scaled, y_test_white, X_val_black_scaled, y_val_black, X_test_black_scaled, y_test_black)
    get_result ("decision_tree", characteristic, records_dt, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_white_scaled, y_val_white, X_test_white_scaled, y_test_white, X_val_black_scaled, y_val_black, X_test_black_scaled, y_test_black)
    get_result ("gradiant_boosting", characteristic, records_gbt, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_white_scaled, y_val_white, X_test_white_scaled, y_test_white, X_val_black_scaled, y_val_black, X_test_black_scaled, y_test_black)

In [8]:
records_lr = []
records_rf = []
records_dt = []
records_gbt = []
for random_state in range(10):
    fairness_metrics (X, y, "Race_W", random_state)

result_lr = pd.DataFrame(records_lr)
result_rf = pd.DataFrame(records_rf)
result_dt = pd.DataFrame(records_dt)
result_gbt = pd.DataFrame(records_gbt)


X train 65694
Y train 65694
21898 18899 2999
21898 18899 2999
21898 18968 2930
21898 18968 2930


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25880760366778377
0.2619702432901052
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19904
           1       0.45      0.03      0.06      1994

    accuracy                           0.91     21898
   macro avg       0.68      0.51      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19821    83]
 [ 1926    68]]
done in 0.554614s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25880760366778377
0.26233596708062135
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19934
           1       0.41      0.03      0.06      1964

    accuracy                           0.91     21898
   macro avg       0.66      0.51      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19841    93]
 [ 1900    64]]
done in 0.514222s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25880760366778377
0.26277255646844816
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17180
           1       0.45      0.03      0.06      1719

    accuracy                           0.91     18899
   macro avg       0.68      0.51      0.51     18899
weighted avg       0.87      0.91      0.87     18899

Confusion_matrix
[[17112    68]
 [ 1663    56]]
done in 0.522433s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25880760366778377
0.26277507724879745
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17260
           1       0.40      0.03      0.06      1708

    accuracy                           0.91     18968
   macro avg       0.66      0.51      0.50     18968
weighted avg       0.87      0.91      0.87     18968

Confusion_matrix
[[17181    79]
 [ 1655    53]]
done in 0.531585s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25880760366778377
0.25691425237396553
Classification report
              precision    recall  f1-score   support

           0       0.91      0.99      0.95      2724
           1       0.44      0.04      0.08       275

    accuracy                           0.91      2999
   macro avg       0.68      0.52      0.52      2999
weighted avg       0.87      0.91      0.87      2999

Confusion_matrix
[[2709   15]
 [ 263   12]]
done in 0.511643s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25880760366778377
0.2594932907427493
Classification report
              precision    recall  f1-score   support

           0       0.92      0.99      0.95      2674
           1       0.44      0.04      0.08       256

    accuracy                           0.91      2930
   macro avg       0.68      0.52      0.52      2930
weighted avg       0.87      0.91      0.88      2930

Confusion_matrix
[[2660   14]
 [ 245   11]]
done in 0.510757s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19904
           1       0.38      0.00      0.00      1994

    accuracy                           0.91     21898
   macro avg       0.65      0.50      0.48     21898
weighted avg       0.86      0.91      0.87     21898

Confusion_matrix
[[19896     8]
 [ 1989     5]]
done in 17.584715s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19934
   

  _warn_prf(average, modifier, msg_start, len(result))


0.26448598669059525
0.2778694234760347
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17260
           1       0.00      0.00      0.00      1708

    accuracy                           0.91     18968
   macro avg       0.45      0.50      0.48     18968
weighted avg       0.83      0.91      0.87     18968

Confusion_matrix
[[17257     3]
 [ 1708     0]]
done in 0.577810s
0.26448598669059525
0.27820099960359335
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95      2724
           1       0.00      0.00      0.00       275

    accuracy                           0.91      2999
   macro avg       0.45      0.50      0.48      2999
weighted avg       0.82      0.91      0.86      2999

Confusion_matrix
[[2723    1]
 [ 275    0]]
done in 0.532821s
0.26448598669059525
0.270807273264847
Classification report
              precision    recall  

  _warn_prf(average, modifier, msg_start, len(result))


Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19904
           1       0.46      0.02      0.03      1994

    accuracy                           0.91     21898
   macro avg       0.68      0.51      0.49     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19864    40]
 [ 1960    34]]
done in 30.482198s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19934
           1       0.46      0.02      0.05      1964

    accuracy                           0.91     21898
   macro avg       0.68      0.51      0.50     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19878    56]
 [ 1917    47]]
done in 30.226808s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17180
           1       0.47      0.02    

threshold:0.6000000000000001, J-value:0.003
threshold:0.7000000000000001, J-value:0.0
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.7110900495116892
Balanced accuracy score of test is  0.6999248305974746
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.46699999999999997
threshold:0.2, J-value:0.32699999999999996
threshold:0.30000000000000004, J-value:0.164
threshold:0.4, J-value:0.085
threshold:0.5, J-value:0.009
threshold:0.6000000000000001, J-value:0.0
threshold:0.7000000000000001, J-value:0.0
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.7335142170604725
Balanced accuracy score of test is  0.6902536579094989
True positive rate of class 1 is  0.674
True positive rate of class 2 is  0.645
Positive prediction rate of class 1 is  0.311
Positive prediction rate of class 2 is  0.297
X train 65694
Y train 65694
21898 18825 30

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2620886010458136
0.25604140929807073
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.95     19980
           1       0.49      0.04      0.07      1918

    accuracy                           0.91     21898
   macro avg       0.70      0.52      0.51     21898
weighted avg       0.88      0.91      0.88     21898

Confusion_matrix
[[19901    79]
 [ 1842    76]]
done in 0.520531s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2620886010458136
0.2579134535597028
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19972
           1       0.44      0.03      0.06      1926

    accuracy                           0.91     21898
   macro avg       0.68      0.52      0.51     21898
weighted avg       0.87      0.91      0.88     21898

Confusion_matrix
[[19888    84]
 [ 1860    66]]
done in 0.526739s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2620886010458136
0.2555919075132823
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.95     17178
           1       0.47      0.04      0.07      1647

    accuracy                           0.91     18825
   macro avg       0.69      0.52      0.51     18825
weighted avg       0.88      0.91      0.88     18825

Confusion_matrix
[[17111    67]
 [ 1587    60]]
done in 0.496659s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2620886010458136
0.2606601132062545
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17196
           1       0.45      0.04      0.07      1687

    accuracy                           0.91     18883
   macro avg       0.68      0.52      0.51     18883
weighted avg       0.87      0.91      0.87     18883

Confusion_matrix
[[17124    72]
 [ 1627    60]]
done in 0.525011s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2620886010458136
0.25879502826931783
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.95      2802
           1       0.57      0.06      0.11       271

    accuracy                           0.91      3073
   macro avg       0.74      0.53      0.53      3073
weighted avg       0.89      0.91      0.88      3073

Confusion_matrix
[[2790   12]
 [ 255   16]]
done in 0.487524s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2620886010458136
0.24071107408844694
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96      2776
           1       0.33      0.03      0.05       239

    accuracy                           0.92      3015
   macro avg       0.63      0.51      0.50      3015
weighted avg       0.88      0.92      0.89      3015

Confusion_matrix
[[2764   12]
 [ 233    6]]
done in 0.488679s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19980
           1       0.83      0.00      0.01      1918

    accuracy                           0.91     21898
   macro avg       0.87      0.50      0.48     21898
weighted avg       0.91      0.91      0.87     21898

Confusion_matrix
[[19979     1]
 [ 1913     5]]
done in 17.164409s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19972
   

threshold:0.30000000000000004, J-value:0.155
threshold:0.4, J-value:0.07200000000000001
threshold:0.5, J-value:0.036000000000000004
threshold:0.6000000000000001, J-value:0.013999999999999999
threshold:0.7000000000000001, J-value:0.004
threshold:0.8, J-value:0.001
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.7034173641837875
Balanced accuracy score of test is  0.6987704125339338
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.406
threshold:0.2, J-value:0.28099999999999997
threshold:0.30000000000000004, J-value:0.155
threshold:0.4, J-value:0.066
threshold:0.5, J-value:0.032
threshold:0.6000000000000001, J-value:0.012
threshold:0.7000000000000001, J-value:0.003
threshold:0.8, J-value:0.001
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.7025392647561872
Balanced accuracy score of test is  0.7004324284896627
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.41800000000000004
t

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25945267743299616
0.26016556622483566
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19950
           1       0.46      0.03      0.06      1948

    accuracy                           0.91     21898
   macro avg       0.69      0.52      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19873    77]
 [ 1882    66]]
done in 0.524424s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25945267743299616
0.26243869330811537
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19883
           1       0.50      0.03      0.05      2015

    accuracy                           0.91     21898
   macro avg       0.71      0.51      0.50     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19826    57]
 [ 1957    58]]
done in 0.513772s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25945267743299616
0.26292576969632264
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17239
           1       0.44      0.03      0.06      1697

    accuracy                           0.91     18936
   macro avg       0.68      0.51      0.50     18936
weighted avg       0.87      0.91      0.87     18936

Confusion_matrix
[[17175    64]
 [ 1647    50]]
done in 0.512211s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25945267743299616
0.263302347009054
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17079
           1       0.53      0.03      0.05      1750

    accuracy                           0.91     18829
   macro avg       0.72      0.51      0.50     18829
weighted avg       0.87      0.91      0.87     18829

Confusion_matrix
[[17037    42]
 [ 1703    47]]
done in 0.510981s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25945267743299616
0.24251964693514066
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96      2711
           1       0.55      0.06      0.11       251

    accuracy                           0.92      2962
   macro avg       0.74      0.53      0.54      2962
weighted avg       0.89      0.92      0.88      2962

Confusion_matrix
[[2698   13]
 [ 235   16]]
done in 0.493766s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25945267743299616
0.25713998508557556
Classification report
              precision    recall  f1-score   support

           0       0.92      0.99      0.95      2804
           1       0.42      0.04      0.08       265

    accuracy                           0.91      3069
   macro avg       0.67      0.52      0.51      3069
weighted avg       0.87      0.91      0.88      3069

Confusion_matrix
[[2789   15]
 [ 254   11]]
done in 0.493817s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19950
           1       0.30      0.00      0.00      1948

    accuracy                           0.91     21898
   macro avg       0.61      0.50      0.48     21898
weighted avg       0.86      0.91      0.87     21898

Confusion_matrix
[[19943     7]
 [ 1945     3]]
done in 16.744922s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19883
  

  _warn_prf(average, modifier, msg_start, len(result))


Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95      2804
           1       0.50      0.00      0.01       265

    accuracy                           0.91      3069
   macro avg       0.71      0.50      0.48      3069
weighted avg       0.88      0.91      0.87      3069

Confusion_matrix
[[2803    1]
 [ 264    1]]
done in 16.453057s
0.264372916262498
0.27018076777808153
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19950
           1       0.00      0.00      0.00      1948

    accuracy                           0.91     21898
   macro avg       0.46      0.50      0.48     21898
weighted avg       0.83      0.91      0.87     21898

Confusion_matrix
[[19949     1]
 [ 1948     0]]
done in 0.546310s
0.264372916262498
0.2794088259193761
Classification report
              precision    recall  f1-score   support

           0       0.

  _warn_prf(average, modifier, msg_start, len(result))


0.264372916262498
0.2525807016254122
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96      2711
           1       0.00      0.00      0.00       251

    accuracy                           0.92      2962
   macro avg       0.46      0.50      0.48      2962
weighted avg       0.84      0.92      0.87      2962

Confusion_matrix
[[2711    0]
 [ 251    0]]
done in 0.517319s


  _warn_prf(average, modifier, msg_start, len(result))


0.264372916262498
0.296758273731138
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95      2804
           1       0.00      0.00      0.00       265

    accuracy                           0.91      3069
   macro avg       0.46      0.50      0.48      3069
weighted avg       0.83      0.91      0.87      3069

Confusion_matrix
[[2801    3]
 [ 265    0]]
done in 0.519005s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19950
           1       0.44      0.02      0.03      1948

    accuracy                           0.91     21898
   macro avg       0.67      0.51      0.49     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19905    45]
 [ 1913    35]]
done in 29.530747s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19883
      

Balanced accuracy score of val is  0.7072499009330308
Balanced accuracy score of test is  0.7106109200360571
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.401
threshold:0.2, J-value:0.255
threshold:0.30000000000000004, J-value:0.135
threshold:0.4, J-value:0.05
threshold:0.5, J-value:0.013
threshold:0.6000000000000001, J-value:0.003
threshold:0.7000000000000001, J-value:0.0
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.7004537203623789
Balanced accuracy score of test is  0.7135740466571312
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.504
threshold:0.2, J-value:0.291
threshold:0.30000000000000004, J-value:0.159
threshold:0.4, J-value:0.07200000000000001
threshold:0.5, J-value:0.032999999999999995
threshold:0.6000000000000001, J-value:0.02
threshold:0.7000000000000001, J-value:0.004
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2588696445882092
0.26295515784374507
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19908
           1       0.46      0.04      0.07      1990

    accuracy                           0.91     21898
   macro avg       0.69      0.52      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19820    88]
 [ 1915    75]]
done in 0.518459s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2588696445882092
0.26084547415836146
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19918
           1       0.43      0.03      0.06      1980

    accuracy                           0.91     21898
   macro avg       0.67      0.51      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19831    87]
 [ 1914    66]]
done in 0.554159s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2588696445882092
0.2631389701556421
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17204
           1       0.48      0.04      0.07      1728

    accuracy                           0.91     18932
   macro avg       0.69      0.52      0.51     18932
weighted avg       0.87      0.91      0.87     18932

Confusion_matrix
[[17136    68]
 [ 1666    62]]
done in 0.515184s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2588696445882092
0.26320409624024377
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17156
           1       0.40      0.03      0.05      1726

    accuracy                           0.91     18882
   macro avg       0.66      0.51      0.50     18882
weighted avg       0.86      0.91      0.87     18882

Confusion_matrix
[[17084    72]
 [ 1678    48]]
done in 0.520531s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2588696445882092
0.26178188249349765
Classification report
              precision    recall  f1-score   support

           0       0.92      0.99      0.95      2704
           1       0.39      0.05      0.09       262

    accuracy                           0.91      2966
   macro avg       0.65      0.52      0.52      2966
weighted avg       0.87      0.91      0.88      2966

Confusion_matrix
[[2684   20]
 [ 249   13]]
done in 0.491990s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2588696445882092
0.24607906097861967
Classification report
              precision    recall  f1-score   support

           0       0.92      0.99      0.96      2762
           1       0.55      0.07      0.13       254

    accuracy                           0.92      3016
   macro avg       0.73      0.53      0.54      3016
weighted avg       0.89      0.92      0.89      3016

Confusion_matrix
[[2747   15]
 [ 236   18]]
done in 0.515493s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19908
           1       0.38      0.00      0.01      1990

    accuracy                           0.91     21898
   macro avg       0.64      0.50      0.48     21898
weighted avg       0.86      0.91      0.87     21898

Confusion_matrix
[[19898    10]
 [ 1984     6]]
done in 16.485183s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19918
   

  _warn_prf(average, modifier, msg_start, len(result))


0.26428564201629917
0.2712285449549372
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19908
           1       0.00      0.00      0.00      1990

    accuracy                           0.91     21898
   macro avg       0.45      0.50      0.48     21898
weighted avg       0.83      0.91      0.87     21898

Confusion_matrix
[[19905     3]
 [ 1990     0]]
done in 0.542418s
0.26428564201629917
0.27044922960654555
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19918
           1       0.00      0.00      0.00      1980

    accuracy                           0.91     21898
   macro avg       0.45      0.50      0.48     21898
weighted avg       0.83      0.91      0.87     21898

Confusion_matrix
[[19915     3]
 [ 1980     0]]
done in 0.542179s
0.26428564201629917
0.27193464291926434
Classification report
              precision    re

  _warn_prf(average, modifier, msg_start, len(result))


0.26428564201629917
0.2588664114511214
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96      2762
           1       0.00      0.00      0.00       254

    accuracy                           0.92      3016
   macro avg       0.46      0.50      0.48      3016
weighted avg       0.84      0.92      0.88      3016

Confusion_matrix
[[2760    2]
 [ 254    0]]
done in 0.519698s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19908
           1       0.44      0.02      0.04      1990

    accuracy                           0.91     21898
   macro avg       0.68      0.51      0.49     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19859    49]
 [ 1951    39]]
done in 29.519114s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19918
   

Balanced accuracy score of val is  0.6984957437377767
Balanced accuracy score of test is  0.7095634525798197
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.398
threshold:0.2, J-value:0.27399999999999997
threshold:0.30000000000000004, J-value:0.14600000000000002
threshold:0.4, J-value:0.059000000000000004
threshold:0.5, J-value:0.015000000000000001
threshold:0.6000000000000001, J-value:0.002
threshold:0.7000000000000001, J-value:0.0
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6989245879511224
Balanced accuracy score of test is  0.7075214911518781
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.392
threshold:0.2, J-value:0.263
threshold:0.30000000000000004, J-value:0.132
threshold:0.4, J-value:0.054000000000000006
threshold:0.5, J-value:0.034
threshold:0.6000000000000001, J-value:0.008
threshold:0.7000000000000001, J-value:0.0
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshol

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26307168792840674
0.2547183682210981
Classification report
              precision    recall  f1-score   support

           0       0.92      0.99      0.95     20006
           1       0.38      0.03      0.06      1892

    accuracy                           0.91     21898
   macro avg       0.65      0.51      0.51     21898
weighted avg       0.87      0.91      0.88     21898

Confusion_matrix
[[19904   102]
 [ 1829    63]]
done in 0.518024s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26307168792840674
0.2562341245754884
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.95     19975
           1       0.48      0.04      0.08      1923

    accuracy                           0.91     21898
   macro avg       0.70      0.52      0.52     21898
weighted avg       0.88      0.91      0.88     21898

Confusion_matrix
[[19886    89]
 [ 1841    82]]
done in 0.521250s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26307168792840674
0.2556868567406752
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.95     17241
           1       0.38      0.03      0.06      1634

    accuracy                           0.91     18875
   macro avg       0.65      0.51      0.51     18875
weighted avg       0.87      0.91      0.88     18875

Confusion_matrix
[[17160    81]
 [ 1584    50]]
done in 0.503699s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26307168792840674
0.2597512527864634
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17310
           1       0.48      0.04      0.07      1699

    accuracy                           0.91     19009
   macro avg       0.70      0.52      0.51     19009
weighted avg       0.87      0.91      0.87     19009

Confusion_matrix
[[17242    68]
 [ 1637    62]]
done in 0.593209s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26307168792840674
0.24867132197332495
Classification report
              precision    recall  f1-score   support

           0       0.92      0.99      0.95      2765
           1       0.38      0.05      0.09       258

    accuracy                           0.91      3023
   macro avg       0.65      0.52      0.52      3023
weighted avg       0.87      0.91      0.88      3023

Confusion_matrix
[[2744   21]
 [ 245   13]]
done in 0.562825s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26307168792840674
0.23309217574806565
Classification report
              precision    recall  f1-score   support

           0       0.93      0.99      0.96      2665
           1       0.49      0.09      0.15       224

    accuracy                           0.92      2889
   macro avg       0.71      0.54      0.56      2889
weighted avg       0.89      0.92      0.90      2889

Confusion_matrix
[[2644   21]
 [ 204   20]]
done in 0.502448s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     20006
           1       0.43      0.00      0.01      1892

    accuracy                           0.91     21898
   macro avg       0.67      0.50      0.48     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19998     8]
 [ 1886     6]]
done in 16.863882s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19975
  

  _warn_prf(average, modifier, msg_start, len(result))


0.2682128952371694
0.2638709963739227
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     20006
           1       0.56      0.00      0.01      1892

    accuracy                           0.91     21898
   macro avg       0.74      0.50      0.48     21898
weighted avg       0.88      0.91      0.87     21898

Confusion_matrix
[[19999     7]
 [ 1883     9]]
done in 0.543181s
0.2682128952371694
0.26619820202997585
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19975
           1       0.56      0.00      0.01      1923

    accuracy                           0.91     21898
   macro avg       0.74      0.50      0.48     21898
weighted avg       0.88      0.91      0.87     21898

Confusion_matrix
[[19968     7]
 [ 1914     9]]
done in 0.545854s
0.2682128952371694
0.26452887711964745
Classification report
              precision    recal

True positive rate of class 1 is  0.735
True positive rate of class 2 is  0.795
Positive prediction rate of class 1 is  0.386
Positive prediction rate of class 2 is  0.378
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.35400000000000004
threshold:0.2, J-value:0.215
threshold:0.30000000000000004, J-value:0.053000000000000005
threshold:0.4, J-value:0.049999999999999996
threshold:0.5, J-value:0.005
threshold:0.6000000000000001, J-value:0.005
threshold:0.7000000000000001, J-value:0.001
threshold:0.8, J-value:0.001
threshold:0.9, J-value:0.001
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6772188480876455
Balanced accuracy score of test is  0.6904182620371148
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.3549999999999999
threshold:0.2, J-value:0.213
threshold:0.30000000000000004, J-value:0.051000000000000004
threshold:0.4, J-value:0.049999999999999996
threshold:0.5, J-value:0.004
threshold:0.6000000000000001, J-value:0.004
threshold:0.7000000000000001,

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25860650985764894
0.26393882410428504
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19911
           1       0.44      0.03      0.06      1987

    accuracy                           0.91     21898
   macro avg       0.68      0.51      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19826    85]
 [ 1919    68]]
done in 0.595026s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25860650985764894
0.26085852705776025
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19927
           1       0.45      0.03      0.06      1971

    accuracy                           0.91     21898
   macro avg       0.68      0.51      0.50     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19855    72]
 [ 1912    59]]
done in 0.580908s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25860650985764894
0.26590919284162867
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17229
           1       0.43      0.03      0.06      1741

    accuracy                           0.91     18970
   macro avg       0.67      0.51      0.51     18970
weighted avg       0.87      0.91      0.87     18970

Confusion_matrix
[[17155    74]
 [ 1685    56]]
done in 0.528408s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25860650985764894
0.2605670366255002
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17198
           1       0.44      0.03      0.05      1694

    accuracy                           0.91     18892
   macro avg       0.68      0.51      0.50     18892
weighted avg       0.87      0.91      0.87     18892

Confusion_matrix
[[17143    55]
 [ 1651    43]]
done in 0.554948s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25860650985764894
0.2511731496003881
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96      2682
           1       0.52      0.05      0.09       246

    accuracy                           0.92      2928
   macro avg       0.72      0.52      0.52      2928
weighted avg       0.89      0.92      0.88      2928

Confusion_matrix
[[2671   11]
 [ 234   12]]
done in 0.535269s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25860650985764894
0.2626904755761424
Classification report
              precision    recall  f1-score   support

           0       0.91      0.99      0.95      2729
           1       0.48      0.06      0.10       277

    accuracy                           0.91      3006
   macro avg       0.70      0.53      0.53      3006
weighted avg       0.87      0.91      0.87      3006

Confusion_matrix
[[2712   17]
 [ 261   16]]
done in 0.531376s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19911
           1       0.50      0.00      0.00      1987

    accuracy                           0.91     21898
   macro avg       0.70      0.50      0.48     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19908     3]
 [ 1984     3]]
done in 17.392160s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19927
   

  _warn_prf(average, modifier, msg_start, len(result))


0.26445464621705217
0.2702963274846509
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19911
           1       0.32      0.00      0.01      1987

    accuracy                           0.91     21898
   macro avg       0.61      0.50      0.48     21898
weighted avg       0.86      0.91      0.87     21898

Confusion_matrix
[[19898    13]
 [ 1981     6]]
done in 0.552648s
0.26445464621705217
0.26900537003743663
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19927
           1       0.30      0.00      0.01      1971

    accuracy                           0.91     21898
   macro avg       0.61      0.50      0.48     21898
weighted avg       0.86      0.91      0.87     21898

Confusion_matrix
[[19913    14]
 [ 1965     6]]
done in 0.552513s
0.26445464621705217
0.2725960626011161
Classification report
              precision    rec

True positive rate of class 1 is  0.743
True positive rate of class 2 is  0.74
Positive prediction rate of class 1 is  0.375
Positive prediction rate of class 2 is  0.386
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.348
threshold:0.2, J-value:0.21099999999999997
threshold:0.30000000000000004, J-value:0.11000000000000001
threshold:0.4, J-value:0.007
threshold:0.5, J-value:0.002
threshold:0.6000000000000001, J-value:0.002
threshold:0.7000000000000001, J-value:0.0
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6740769195946623
Balanced accuracy score of test is  0.6700628399696437
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.347
threshold:0.2, J-value:0.21699999999999997
threshold:0.30000000000000004, J-value:0.11100000000000002
threshold:0.4, J-value:0.007
threshold:0.5, J-value:0.001
threshold:0.6000000000000001, J-value:0.001
threshold:0.7000000000000001, J-value:0.0
threshold:0.8, J-value:0.

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2614767344183246
0.2574320888106272
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19944
           1       0.43      0.03      0.06      1954

    accuracy                           0.91     21898
   macro avg       0.67      0.51      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19864    80]
 [ 1893    61]]
done in 0.579522s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2614767344183246
0.25827171441251523
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19955
           1       0.47      0.04      0.07      1943

    accuracy                           0.91     21898
   macro avg       0.69      0.52      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19873    82]
 [ 1871    72]]
done in 0.565664s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2614767344183246
0.25815018639306336
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17147
           1       0.45      0.03      0.06      1695

    accuracy                           0.91     18842
   macro avg       0.68      0.51      0.51     18842
weighted avg       0.87      0.91      0.87     18842

Confusion_matrix
[[17081    66]
 [ 1642    53]]
done in 0.601031s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2614767344183246
0.2597863125598751
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17231
           1       0.43      0.03      0.06      1683

    accuracy                           0.91     18914
   macro avg       0.67      0.51      0.51     18914
weighted avg       0.87      0.91      0.87     18914

Confusion_matrix
[[17160    71]
 [ 1629    54]]
done in 0.676650s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2614767344183246
0.2530046036508553
Classification report
              precision    recall  f1-score   support

           0       0.92      0.99      0.95      2797
           1       0.36      0.03      0.06       259

    accuracy                           0.91      3056
   macro avg       0.64      0.51      0.51      3056
weighted avg       0.87      0.91      0.88      3056

Confusion_matrix
[[2783   14]
 [ 251    8]]
done in 0.627656s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2614767344183246
0.24867147669161593
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96      2724
           1       0.62      0.07      0.12       260

    accuracy                           0.92      2984
   macro avg       0.77      0.53      0.54      2984
weighted avg       0.89      0.92      0.88      2984

Confusion_matrix
[[2713   11]
 [ 242   18]]
done in 0.609670s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19944
           1       0.58      0.00      0.01      1954

    accuracy                           0.91     21898
   macro avg       0.75      0.50      0.48     21898
weighted avg       0.88      0.91      0.87     21898

Confusion_matrix
[[19939     5]
 [ 1947     7]]
done in 17.046681s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19955
   

threshold:0.30000000000000004, J-value:0.146
threshold:0.4, J-value:0.07
threshold:0.5, J-value:0.027
threshold:0.6000000000000001, J-value:0.012
threshold:0.7000000000000001, J-value:0.002
threshold:0.8, J-value:0.001
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.7074866945769547
Balanced accuracy score of test is  0.7028318348296019
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.41800000000000004
threshold:0.2, J-value:0.26599999999999996
threshold:0.30000000000000004, J-value:0.149
threshold:0.4, J-value:0.07200000000000001
threshold:0.5, J-value:0.027
threshold:0.6000000000000001, J-value:0.013000000000000001
threshold:0.7000000000000001, J-value:0.002
threshold:0.8, J-value:0.001
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.7091967376320634
Balanced accuracy score of test is  0.6991537313067934
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.391
threshold:0.2, J-

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2582371104483379
0.2633338805897585
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19893
           1       0.42      0.03      0.06      2005

    accuracy                           0.91     21898
   macro avg       0.66      0.51      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19799    94]
 [ 1938    67]]
done in 0.552769s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2582371104483379
0.26231292886047025
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19907
           1       0.49      0.03      0.06      1991

    accuracy                           0.91     21898
   macro avg       0.70      0.52      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19837    70]
 [ 1924    67]]
done in 0.553341s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2582371104483379
0.2618893976431273
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17191
           1       0.42      0.03      0.06      1729

    accuracy                           0.91     18920
   macro avg       0.66      0.51      0.51     18920
weighted avg       0.87      0.91      0.87     18920

Confusion_matrix
[[17113    78]
 [ 1673    56]]
done in 0.541776s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2582371104483379
0.2644301021709705
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17135
           1       0.47      0.03      0.05      1730

    accuracy                           0.91     18865
   macro avg       0.69      0.51      0.50     18865
weighted avg       0.87      0.91      0.87     18865

Confusion_matrix
[[17081    54]
 [ 1683    47]]
done in 0.552509s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2582371104483379
0.2725110522990473
Classification report
              precision    recall  f1-score   support

           0       0.91      0.99      0.95      2702
           1       0.41      0.04      0.07       276

    accuracy                           0.91      2978
   macro avg       0.66      0.52      0.51      2978
weighted avg       0.86      0.91      0.87      2978

Confusion_matrix
[[2686   16]
 [ 265   11]]
done in 0.529106s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2582371104483379
0.24914429236110083
Classification report
              precision    recall  f1-score   support

           0       0.92      0.99      0.96      2772
           1       0.56      0.08      0.13       261

    accuracy                           0.92      3033
   macro avg       0.74      0.54      0.55      3033
weighted avg       0.89      0.92      0.88      3033

Confusion_matrix
[[2756   16]
 [ 241   20]]
done in 0.532639s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19893
           1       0.55      0.01      0.01      2005

    accuracy                           0.91     21898
   macro avg       0.73      0.50      0.48     21898
weighted avg       0.88      0.91      0.87     21898

Confusion_matrix
[[19884     9]
 [ 1994    11]]
done in 17.709035s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19907
   

  _warn_prf(average, modifier, msg_start, len(result))


0.2638844630577774
0.27072063991639456
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19893
           1       0.08      0.00      0.00      2005

    accuracy                           0.91     21898
   macro avg       0.50      0.50      0.48     21898
weighted avg       0.83      0.91      0.86     21898

Confusion_matrix
[[19882    11]
 [ 2004     1]]
done in 0.548336s
0.2638844630577774
0.27224900833516935
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19907
           1       0.33      0.00      0.00      1991

    accuracy                           0.91     21898
   macro avg       0.62      0.50      0.48     21898
weighted avg       0.86      0.91      0.87     21898

Confusion_matrix
[[19903     4]
 [ 1989     2]]
done in 0.544960s
0.2638844630577774
0.26930585598613827
Classification report
              precision    reca

True positive rate of class 2 is  0.77
Positive prediction rate of class 1 is  0.375
Positive prediction rate of class 2 is  0.386
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.37400000000000005
threshold:0.2, J-value:0.21000000000000002
threshold:0.30000000000000004, J-value:0.093
threshold:0.4, J-value:0.008
threshold:0.5, J-value:-0.001
threshold:0.6000000000000001, J-value:-0.001
threshold:0.7000000000000001, J-value:-0.001
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.687285719246347
Balanced accuracy score of test is  0.6872702163503284
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.38099999999999995
threshold:0.2, J-value:0.21500000000000002
threshold:0.30000000000000004, J-value:0.10099999999999999
threshold:0.4, J-value:0.008
threshold:0.5, J-value:0.0
threshold:0.6000000000000001, J-value:0.0
threshold:0.7000000000000001, J-value:0.0
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25971781934116017
0.262211723749739
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19931
           1       0.50      0.04      0.08      1967

    accuracy                           0.91     21898
   macro avg       0.71      0.52      0.52     21898
weighted avg       0.88      0.91      0.87     21898

Confusion_matrix
[[19849    82]
 [ 1884    83]]
done in 0.527450s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25971781934116017
0.2591531352572
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19956
           1       0.46      0.03      0.06      1942

    accuracy                           0.91     21898
   macro avg       0.68      0.52      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19876    80]
 [ 1875    67]]
done in 0.533969s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25971781934116017
0.26055698870415095
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17217
           1       0.52      0.04      0.08      1688

    accuracy                           0.91     18905
   macro avg       0.72      0.52      0.51     18905
weighted avg       0.88      0.91      0.87     18905

Confusion_matrix
[[17153    64]
 [ 1619    69]]
done in 0.526311s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25971781934116017
0.26027395563350075
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17231
           1       0.45      0.03      0.05      1687

    accuracy                           0.91     18918
   macro avg       0.68      0.51      0.50     18918
weighted avg       0.87      0.91      0.87     18918

Confusion_matrix
[[17170    61]
 [ 1638    49]]
done in 0.541580s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25971781934116017
0.272663700374143
Classification report
              precision    recall  f1-score   support

           0       0.91      0.99      0.95      2714
           1       0.44      0.05      0.09       279

    accuracy                           0.91      2993
   macro avg       0.67      0.52      0.52      2993
weighted avg       0.87      0.91      0.87      2993

Confusion_matrix
[[2696   18]
 [ 265   14]]
done in 0.509136s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25971781934116017
0.2520378064387914
Classification report
              precision    recall  f1-score   support

           0       0.92      0.99      0.95      2725
           1       0.49      0.07      0.12       255

    accuracy                           0.91      2980
   macro avg       0.70      0.53      0.54      2980
weighted avg       0.88      0.91      0.88      2980

Confusion_matrix
[[2706   19]
 [ 237   18]]
done in 0.493963s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19931
           1       0.60      0.00      0.01      1967

    accuracy                           0.91     21898
   macro avg       0.76      0.50      0.48     21898
weighted avg       0.88      0.91      0.87     21898

Confusion_matrix
[[19927     4]
 [ 1961     6]]
done in 17.001315s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19956
   

threshold:0.30000000000000004, J-value:0.149
threshold:0.4, J-value:0.07200000000000001
threshold:0.5, J-value:0.038000000000000006
threshold:0.6000000000000001, J-value:0.009999999999999998
threshold:0.7000000000000001, J-value:0.003
threshold:0.8, J-value:0.001
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6973386092542913
Balanced accuracy score of test is  0.6995201750751756
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.392
threshold:0.2, J-value:0.247
threshold:0.30000000000000004, J-value:0.15
threshold:0.4, J-value:0.07200000000000001
threshold:0.5, J-value:0.037000000000000005
threshold:0.6000000000000001, J-value:0.009999999999999998
threshold:0.7000000000000001, J-value:0.002
threshold:0.8, J-value:0.001
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6959635948928468
Balanced accuracy score of test is  0.6987705365672221
threshold:0.0, J-value:0.0
threshold:0.1, 

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2599660464048286
0.2551448535640254
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19973
           1       0.50      0.04      0.07      1925

    accuracy                           0.91     21898
   macro avg       0.71      0.52      0.51     21898
weighted avg       0.88      0.91      0.88     21898

Confusion_matrix
[[19902    71]
 [ 1853    72]]
done in 0.526724s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2599660464048286
0.2649737104347409
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19866
           1       0.44      0.03      0.06      2032

    accuracy                           0.91     21898
   macro avg       0.67      0.51      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19780    86]
 [ 1965    67]]
done in 0.524349s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2599660464048286
0.25506420355805376
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17189
           1       0.49      0.03      0.07      1658

    accuracy                           0.91     18847
   macro avg       0.70      0.52      0.51     18847
weighted avg       0.88      0.91      0.88     18847

Confusion_matrix
[[17129    60]
 [ 1600    58]]
done in 0.528842s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2599660464048286
0.26584342030026387
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17067
           1       0.43      0.03      0.05      1750

    accuracy                           0.91     18817
   macro avg       0.67      0.51      0.50     18817
weighted avg       0.86      0.91      0.87     18817

Confusion_matrix
[[17001    66]
 [ 1701    49]]
done in 0.526616s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2599660464048286
0.25564305437115337
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.95      2784
           1       0.56      0.05      0.10       267

    accuracy                           0.91      3051
   macro avg       0.74      0.52      0.53      3051
weighted avg       0.89      0.91      0.88      3051

Confusion_matrix
[[2773   11]
 [ 253   14]]
done in 0.512819s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2599660464048286
0.2596620160045082
Classification report
              precision    recall  f1-score   support

           0       0.91      0.99      0.95      2799
           1       0.47      0.06      0.11       282

    accuracy                           0.91      3081
   macro avg       0.69      0.53      0.53      3081
weighted avg       0.87      0.91      0.87      3081

Confusion_matrix
[[2779   20]
 [ 264   18]]
done in 0.512091s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19973
           1       0.50      0.00      0.01      1925

    accuracy                           0.91     21898
   macro avg       0.71      0.50      0.48     21898
weighted avg       0.88      0.91      0.87     21898

Confusion_matrix
[[19968     5]
 [ 1920     5]]
done in 16.822888s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19866
    

  _warn_prf(average, modifier, msg_start, len(result))


Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95      2799
           1       1.00      0.00      0.01       282

    accuracy                           0.91      3081
   macro avg       0.95      0.50      0.48      3081
weighted avg       0.92      0.91      0.87      3081

Confusion_matrix
[[2799    0]
 [ 281    1]]
done in 16.503383s
0.26553645312445306
0.2694114094281803
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19973
           1       0.12      0.00      0.00      1925

    accuracy                           0.91     21898
   macro avg       0.51      0.50      0.48     21898
weighted avg       0.84      0.91      0.87     21898

Confusion_matrix
[[19958    15]
 [ 1923     2]]
done in 0.544256s
0.26553645312445306
0.279841317193865
Classification report
              precision    recall  f1-score   support

           0       

threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.40199999999999997
threshold:0.2, J-value:0.263
threshold:0.30000000000000004, J-value:0.127
threshold:0.4, J-value:0.018
threshold:0.5, J-value:0.0
threshold:0.6000000000000001, J-value:0.0
threshold:0.7000000000000001, J-value:0.0
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.7008104094020406
Balanced accuracy score of test is  0.7137845583148996
True positive rate of class 1 is  0.75
True positive rate of class 2 is  0.773
Positive prediction rate of class 1 is  0.382
Positive prediction rate of class 2 is  0.385
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.33799999999999997
threshold:0.2, J-value:0.193
threshold:0.30000000000000004, J-value:0.067
threshold:0.4, J-value:0.054
threshold:0.5, J-value:0.0
threshold:0.6000000000000001, J-value:0.0
threshold:0.7000000000000001, J-value:0.0
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal

In [9]:
def add_mean_sd(records, result_table, overall_records, type):
    records.append({
        'auroc': result_table["auroc"].mean(),
        'overall threshold': result_table["overall threshold"].mean(),
        'white threshold': result_table["white threshold"].mean(),
        'black threshold': result_table["black threshold"].mean(),
        'overall ba validation': result_table["overall ba validation"].mean(),
        'overall ba test': result_table["overall ba test"].mean(),
        'white ba validation': result_table["white ba validation"].mean(),
        'white ba test': result_table["white ba test"].mean(),
        'black ba validation': result_table["black ba validation"].mean(),
        'black ba test': result_table["black ba test"].mean(),
        'overall precision':result_table["overall precision"].mean(),
        'overall recall':result_table["overall recall"].mean(),
        'overall tpr':result_table["overall tpr"].mean(),
        'overall tnr':result_table["overall tnr"].mean(),
        'overall pd':result_table["overall pd"].mean(),
        'white precision':result_table["white precision"].mean(),
        'white recall':result_table["white recall"].mean(),
        'white tpr':result_table["white tpr"].mean(),
        'white tnr':result_table["white tnr"].mean(),
        'white pd':result_table["white pd"].mean(),
        'black precision':result_table["black precision"].mean(),
        'black recall':result_table["black recall"].mean(),
        'black tpr':result_table["black tpr"].mean(),
        'black tnr':result_table["black tnr"].mean(),
        'black pd':result_table["black pd"].mean(),
        'eod': result_table["eod"].mean(),
        'di': result_table["di"].mean(),
        })
    records.append({
        'auroc': result_table["auroc"].std(),
        'overall threshold': result_table["overall threshold"].std(),
        'white threshold': result_table["white threshold"].std(),
        'black threshold': result_table["black threshold"].std(),
        'overall ba validation': result_table["overall ba validation"].std(),
        'overall ba test': result_table["overall ba test"].std(),
        'white ba validation': result_table["white ba validation"].std(),
        'white ba test': result_table["white ba test"].std(),
        'black ba validation': result_table["black ba validation"].std(),
        'black ba test': result_table["black ba test"].std(),
        'overall precision':result_table["overall precision"].std(),
        'overall recall':result_table["overall recall"].std(),
        'overall tpr':result_table["overall tpr"].std(),
        'overall tnr':result_table["overall tnr"].std(),
        'overall pd':result_table["overall pd"].std(),
        'white precision':result_table["white precision"].std(),
        'white recall':result_table["white recall"].std(),
        'white tpr':result_table["white tpr"].std(),
        'white tnr':result_table["white tnr"].std(),
        'white pd':result_table["white pd"].std(),
        'black precision':result_table["black precision"].std(),
        'black recall':result_table["black recall"].std(),
        'black tpr':result_table["black tpr"].std(),
        'black tnr':result_table["black tnr"].std(),
        'black pd':result_table["black pd"].std(),
        'eod': result_table["eod"].std(),
        'di': result_table["di"].std(),
        })
    overall_records.append({
        'type': type,
        'auroc': result_table["auroc"].mean(),
        'overall threshold': result_table["overall threshold"].mean(),
        'white threshold': result_table["white threshold"].mean(),
        'black threshold': result_table["black threshold"].mean(),
        'overall ba test': result_table["overall ba test"].mean(),
        'white ba test': result_table["white ba test"].mean(),
        'black ba test': result_table["black ba test"].mean(),
        'overall tpr':result_table["overall tpr"].mean(),
        'overall pd':result_table["overall pd"].mean(),
        'white tpr':result_table["white tpr"].mean(),
        'white pd':result_table["white pd"].mean(),
        'black tpr':result_table["black tpr"].mean(),
        'black pd':result_table["black pd"].mean(),
        'eod': result_table["eod"].mean(),
        'di': result_table["di"].mean(),
        })
    pd_result = pd.DataFrame(records)
    return pd_result, overall_records

In [10]:
overall_table = []
result_lr, overall_records = add_mean_sd (records_lr, result_lr, overall_table, 'lr')
result_rf, overall_records = add_mean_sd (records_rf, result_rf, overall_records, 'rf')
result_dt, overall_records = add_mean_sd (records_dt, result_dt, overall_records, 'dt')
result_gbt, overall_records = add_mean_sd (records_gbt, result_gbt, overall_records, 'gbt')

result_path='/Users/lifuchen/Desktop/research/resample_data/'
result_lr.to_csv(path.join(result_path,'race-lr-result.csv'), index=False)
result_rf.to_csv(path.join(result_path,'race-rf-result.csv'), index=False)
result_dt.to_csv(path.join(result_path,'race-dt-result.csv'), index=False)
result_gbt.to_csv(path.join(result_path,'race-gbt-result.csv'), index=False)

overall_result = pd.DataFrame(overall_table)
result_path='/Users/lifuchen/Desktop/research/resample_result/'
overall_result.to_csv(path.join(result_path,'race.csv'), index=False)
