In [22]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from os import path
from sklearn.metrics import balanced_accuracy_score, roc_auc_score
import sklearn.preprocessing
from sklearn import preprocessing
from sklearn.preprocessing import Normalizer
import src.lib.utility_classfier as uclf
import src.lib.optimal_threhold_related as thres
import src.lib.fairness_tests as fair

In [23]:
data_path='/Users/lifuchen/Desktop/research/data.csv'
df = pd.read_csv(data_path)

In [24]:
y = df.Class.values
X = df.drop(['GRID','Class'], axis=1)
X.shape

(109490, 87)

In [25]:
def save_prediction(classifier, characteristic, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_white_scaled, y_val_white, X_test_white_scaled, y_test_white, X_val_black_scaled, y_val_black, X_test_black_scaled, y_test_black):
    method_to_call = getattr(uclf, classifier)
    y_val_score = method_to_call(X_train_scaled, y_train,X_val_scaled, y_val)
    y_test_score = method_to_call(X_train_scaled, y_train,X_test_scaled, y_test)

    y_val_score_white = method_to_call(X_train_scaled, y_train, X_val_white_scaled, y_val_white)
    y_test_score_white = method_to_call(X_train_scaled, y_train,X_test_white_scaled, y_test_white)

    y_val_score_black = method_to_call(X_train_scaled, y_train, X_val_black_scaled, y_val_black)
    y_test_score_black = method_to_call(X_train_scaled, y_train,X_test_black_scaled, y_test_black)

    my_dict = dict(val_score = y_val_score, test_score = y_test_score, val_1_score = y_val_score_white, test_1_score = y_test_score_white, val_2_score = y_val_score_black, test_2_score = y_test_score_black)
    overall_prediction = pd.DataFrame.from_dict(my_dict, orient='index')
    overall_prediction = overall_prediction.transpose()

    result_path='/Users/lifuchen/Desktop/research/predictions/'
    filename = str(classifier) + str(characteristic) + "prediction.csv"
    overall_prediction.to_csv(path.join(result_path, filename), index=False)


In [26]:
def get_result (classifier, characteristic, records, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_white_scaled, y_val_white, X_test_white_scaled, y_test_white, X_val_black_scaled, y_val_black, X_test_black_scaled, y_test_black):
    result_path='/Users/lifuchen/Desktop/research/predictions/'
    filename = str(classifier) + characteristic + "prediction.csv"
    prediction = pd.read_csv(path.join(result_path, filename))

    y_val_score = prediction['val_score'][prediction['val_score'].notna()]
    y_test_score = prediction['test_score'][prediction['test_score'].notna()]

    y_val_score_white = prediction['val_1_score'][prediction['val_1_score'].notna()]
    y_test_score_white = prediction['test_1_score'][prediction['test_1_score'].notna()]

    y_val_score_black = prediction['val_2_score'][prediction['val_2_score'].notna()]
    y_test_score_black = prediction['test_2_score'][prediction['test_2_score'].notna()]

    threshold, ba_val, ba_test = balance_accuracy (y_val, y_val_score,y_test, y_test_score)
    auroc = roc_auc_score(y_test, y_test_score)
    precision, recall, tpr, tnr, pd_overall = thres.calculate_precision_metrics(y_test, y_test_score,threshold)

    threshold_white, ba_val_white, ba_test_white = balance_accuracy (y_val_white, y_val_score_white,y_test_white, y_test_score_white)
    precision_white, recall_white, tpr_white, tnr_white, pd_white = thres.calculate_precision_metrics(y_test_white, y_test_score_white,threshold_white)

    threshold_black, ba_val_black, ba_test_black = balance_accuracy (y_val_black, y_val_score_black, y_test_black, y_test_score_black)
    precision_black, recall_black, tpr_black, tnr_black, pd_black = thres.calculate_precision_metrics(y_test_black, y_test_score_black,threshold_black)

    eod = fair.get_EOD(y_test_white, y_test_score_white,threshold_white, y_test_black, y_test_score_black, threshold_black)
    sp = fair.get_SP(y_test_white, y_test_score_white,threshold_white, y_test_black, y_test_score_black, threshold_black)

    records.append({
        'auroc': auroc,
        'overall threshold': threshold,
        'white threshold': threshold_white,
        'black threshold': threshold_black,
        'overall ba validation': ba_val,
        'overall ba test': ba_test,
        'white ba validation': ba_val_white,
        'white ba test': ba_test_white,
        'black ba validation': ba_val_black,
        'black ba test': ba_test_black,
        'overall precision':precision,
        'overall recall':recall,
        'overall tpr':tpr,
        'overall tnr':tnr,
        'overall pd':pd_overall,
        'white precision':precision_white,
        'white recall':recall_white,
        'white tpr':tpr_white,
        'white tnr':tnr_white,
        'white pd':pd_white,
        'black precision':precision_black,
        'black recall':recall_black,
        'black tpr':tpr_black,
        'black tnr':tnr_black,
        'black pd':pd_black,
        'eod': eod,
        'di': sp,
        })

In [27]:
def balance_accuracy (y_val, y_val_score,y_test, y_test_score):
    
    threshold, _ = thres.get_optimal_threshold_Jvalue (y_val, y_val_score)
    print ("Optimal threshold by J value is ",threshold)

    ba_val = thres.calculate_balanced_accuracy(y_val, y_val_score, threshold)
    print ("Balanced accuracy score of val is ", ba_val)

    ba_test = thres.calculate_balanced_accuracy(y_test, y_test_score, threshold)
    print ("Balanced accuracy score of test is ",ba_test)

    return threshold, ba_val, ba_test

In [28]:
def fairness_metrics (X, y, attribute, random_state):
    X_train, y_train, X_val, y_val, X_test, y_test, X_val_white, X_val_black, y_val_white, y_val_black, X_test_white, X_test_black, y_test_white, y_test_black \
        = fair.split_by_trait(X, y, attribute, random_state)
    print("X train", X_train.shape[0])
    print("Y train", y_train.shape[0])
    print(X_val.shape[0], X_val_white.shape[0], X_val_black.shape[0])
    print(y_val.shape[0], y_val_white.shape[0], y_val_black.shape[0])
    print(X_test.shape[0], X_test_white.shape[0], X_test_black.shape[0])
    print(y_test.shape[0], y_test_white.shape[0], y_test_black.shape[0])

    max_abs_scaler = preprocessing.MaxAbsScaler()
    X_train_scaled = max_abs_scaler.fit_transform(X_train)
    X_test_scaled = max_abs_scaler.transform(X_test)
    X_test_white_scaled = max_abs_scaler.transform(X_test_white)
    X_test_black_scaled = max_abs_scaler.transform(X_test_black)
    X_val_scaled = max_abs_scaler.transform(X_val)
    X_val_white_scaled = max_abs_scaler.transform(X_val_white)
    X_val_black_scaled = max_abs_scaler.transform(X_val_black)

    characteristic = attribute + str(random_state)
    save_prediction ("logic_regression", characteristic, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_white_scaled, y_val_white, X_test_white_scaled, y_test_white, X_val_black_scaled, y_val_black, X_test_black_scaled, y_test_black)
    save_prediction ("random_forest", characteristic, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_white_scaled, y_val_white, X_test_white_scaled, y_test_white, X_val_black_scaled, y_val_black, X_test_black_scaled, y_test_black)
    save_prediction ("decision_tree", characteristic, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_white_scaled, y_val_white, X_test_white_scaled, y_test_white, X_val_black_scaled, y_val_black, X_test_black_scaled, y_test_black)
    save_prediction ("gradiant_boosting", characteristic, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_white_scaled, y_val_white, X_test_white_scaled, y_test_white, X_val_black_scaled, y_val_black, X_test_black_scaled, y_test_black)

    get_result ("logic_regression", characteristic, records_lr, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_white_scaled, y_val_white, X_test_white_scaled, y_test_white, X_val_black_scaled, y_val_black, X_test_black_scaled, y_test_black)
    get_result ("random_forest", characteristic, records_rf, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_white_scaled, y_val_white, X_test_white_scaled, y_test_white, X_val_black_scaled, y_val_black, X_test_black_scaled, y_test_black)
    get_result ("decision_tree", characteristic, records_dt, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_white_scaled, y_val_white, X_test_white_scaled, y_test_white, X_val_black_scaled, y_val_black, X_test_black_scaled, y_test_black)
    get_result ("gradiant_boosting", characteristic, records_gbt, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_white_scaled, y_val_white, X_test_white_scaled, y_test_white, X_val_black_scaled, y_val_black, X_test_black_scaled, y_test_black)

In [29]:
records_lr = []
records_rf = []
records_dt = []
records_gbt = []
for random_state in range(10):
    fairness_metrics (X, y, "Race_W", random_state)

result_lr = pd.DataFrame(records_lr)
result_rf = pd.DataFrame(records_rf)
result_dt = pd.DataFrame(records_dt)
result_gbt = pd.DataFrame(records_gbt)


X train 65694
Y train 65694
21898 18899 2999
21898 18899 2999
21898 18968 2930
21898 18968 2930


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25882185696726207
0.2619870551548744
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19904
           1       0.43      0.03      0.06      1994

    accuracy                           0.91     21898
   macro avg       0.67      0.51      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19822    82]
 [ 1931    63]]
done in 0.604577s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25882185696726207
0.2623041485961046
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19934
           1       0.43      0.03      0.06      1964

    accuracy                           0.91     21898
   macro avg       0.67      0.51      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19847    87]
 [ 1899    65]]
done in 0.523775s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25882185696726207
0.26282254893490486
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17180
           1       0.45      0.03      0.06      1719

    accuracy                           0.91     18899
   macro avg       0.68      0.51      0.51     18899
weighted avg       0.87      0.91      0.87     18899

Confusion_matrix
[[17114    66]
 [ 1665    54]]
done in 0.517697s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25882185696726207
0.26278189359925863
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17260
           1       0.42      0.03      0.06      1708

    accuracy                           0.91     18968
   macro avg       0.67      0.51      0.51     18968
weighted avg       0.87      0.91      0.87     18968

Confusion_matrix
[[17186    74]
 [ 1654    54]]
done in 0.513775s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25882185696726207
0.25672196780949413
Classification report
              precision    recall  f1-score   support

           0       0.91      0.99      0.95      2724
           1       0.36      0.03      0.06       275

    accuracy                           0.91      2999
   macro avg       0.64      0.51      0.51      2999
weighted avg       0.86      0.91      0.87      2999

Confusion_matrix
[[2708   16]
 [ 266    9]]
done in 0.550020s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25882185696726207
0.259211361149066
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.95      2674
           1       0.46      0.04      0.08       256

    accuracy                           0.91      2930
   macro avg       0.69      0.52      0.52      2930
weighted avg       0.88      0.91      0.88      2930

Confusion_matrix
[[2661   13]
 [ 245   11]]
done in 0.497400s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19904
           1       0.45      0.00      0.01      1994

    accuracy                           0.91     21898
   macro avg       0.68      0.50      0.48     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19893    11]
 [ 1985     9]]
done in 21.349485s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19934
    

  _warn_prf(average, modifier, msg_start, len(result))


0.26448598669059525
0.2778694234760347
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17260
           1       0.00      0.00      0.00      1708

    accuracy                           0.91     18968
   macro avg       0.45      0.50      0.48     18968
weighted avg       0.83      0.91      0.87     18968

Confusion_matrix
[[17257     3]
 [ 1708     0]]
done in 0.710314s
0.26448598669059525
0.27820099960359335
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95      2724
           1       0.00      0.00      0.00       275

    accuracy                           0.91      2999
   macro avg       0.45      0.50      0.48      2999
weighted avg       0.82      0.91      0.86      2999

Confusion_matrix
[[2723    1]
 [ 275    0]]
done in 0.734793s
0.26448598669059525
0.270807273264847
Classification report
              precision    recall  

  _warn_prf(average, modifier, msg_start, len(result))


Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19904
           1       0.46      0.02      0.03      1994

    accuracy                           0.91     21898
   macro avg       0.68      0.51      0.49     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19864    40]
 [ 1960    34]]
done in 36.620208s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19934
           1       0.46      0.02      0.05      1964

    accuracy                           0.91     21898
   macro avg       0.68      0.51      0.50     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19878    56]
 [ 1917    47]]
done in 36.264064s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17180
           1       0.48      0.02    

threshold:0.4, J-value:0.056
threshold:0.5, J-value:0.016
threshold:0.6000000000000001, J-value:0.003
threshold:0.7000000000000001, J-value:0.0
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.7110900495116892
Balanced accuracy score of test is  0.6999248305974746
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.46699999999999997
threshold:0.2, J-value:0.32699999999999996
threshold:0.30000000000000004, J-value:0.165
threshold:0.4, J-value:0.085
threshold:0.5, J-value:0.009
threshold:0.6000000000000001, J-value:0.0
threshold:0.7000000000000001, J-value:0.0
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.7335142170604725
Balanced accuracy score of test is  0.6902536579094989
True positive rate of class 1 is  0.674
True positive rate of class 2 is  0.645
Positive prediction rate of class 1 is  0.311
Positive prediction rate of cl

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2620886582289643
0.2560448666099038
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.95     19980
           1       0.48      0.04      0.07      1918

    accuracy                           0.91     21898
   macro avg       0.70      0.52      0.51     21898
weighted avg       0.88      0.91      0.88     21898

Confusion_matrix
[[19898    82]
 [ 1842    76]]
done in 1.062733s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2620886582289643
0.25793694183031485
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19972
           1       0.43      0.03      0.06      1926

    accuracy                           0.91     21898
   macro avg       0.67      0.51      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19885    87]
 [ 1861    65]]
done in 1.134046s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2620886582289643
0.2555946141881812
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.95     17178
           1       0.46      0.04      0.07      1647

    accuracy                           0.91     18825
   macro avg       0.69      0.52      0.51     18825
weighted avg       0.88      0.91      0.88     18825

Confusion_matrix
[[17108    70]
 [ 1587    60]]
done in 1.157579s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2620886582289643
0.260736585597518
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17196
           1       0.44      0.03      0.06      1687

    accuracy                           0.91     18883
   macro avg       0.68      0.52      0.51     18883
weighted avg       0.87      0.91      0.87     18883

Confusion_matrix
[[17122    74]
 [ 1629    58]]
done in 1.371977s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2620886582289643
0.25880308393464396
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.95      2802
           1       0.57      0.06      0.11       271

    accuracy                           0.91      3073
   macro avg       0.74      0.53      0.53      3073
weighted avg       0.89      0.91      0.88      3073

Confusion_matrix
[[2790   12]
 [ 255   16]]
done in 1.208129s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2620886582289643
0.24040272184487665
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96      2776
           1       0.35      0.03      0.05       239

    accuracy                           0.92      3015
   macro avg       0.64      0.51      0.51      3015
weighted avg       0.88      0.92      0.89      3015

Confusion_matrix
[[2763   13]
 [ 232    7]]
done in 1.203346s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19980
           1       0.75      0.00      0.00      1918

    accuracy                           0.91     21898
   macro avg       0.83      0.50      0.48     21898
weighted avg       0.90      0.91      0.87     21898

Confusion_matrix
[[19979     1]
 [ 1915     3]]
done in 21.285170s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19972
   

threshold:0.2, J-value:0.27599999999999997
threshold:0.30000000000000004, J-value:0.155
threshold:0.4, J-value:0.07
threshold:0.5, J-value:0.036000000000000004
threshold:0.6000000000000001, J-value:0.012
threshold:0.7000000000000001, J-value:0.004
threshold:0.8, J-value:0.001
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.7021389481243496
Balanced accuracy score of test is  0.6985516223231736
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.406
threshold:0.2, J-value:0.27999999999999997
threshold:0.30000000000000004, J-value:0.154
threshold:0.4, J-value:0.066
threshold:0.5, J-value:0.032
threshold:0.6000000000000001, J-value:0.011
threshold:0.7000000000000001, J-value:0.003
threshold:0.8, J-value:0.001
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.7031298345980297
Balanced accuracy score of test is  0.7005543534269214
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.392
th

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2594297514603444
0.26010346671211954
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19950
           1       0.47      0.04      0.07      1948

    accuracy                           0.91     21898
   macro avg       0.69      0.52      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19873    77]
 [ 1879    69]]
done in 1.355038s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2594297514603444
0.26234536939014164
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19883
           1       0.51      0.03      0.06      2015

    accuracy                           0.91     21898
   macro avg       0.71      0.51      0.50     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19823    60]
 [ 1953    62]]
done in 1.397836s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2594297514603444
0.2628301736188533
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17239
           1       0.45      0.03      0.06      1697

    accuracy                           0.91     18936
   macro avg       0.68      0.51      0.51     18936
weighted avg       0.87      0.91      0.87     18936

Confusion_matrix
[[17175    64]
 [ 1644    53]]
done in 1.428232s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2594297514603444
0.2631953744968694
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17079
           1       0.53      0.03      0.05      1750

    accuracy                           0.91     18829
   macro avg       0.72      0.51      0.50     18829
weighted avg       0.87      0.91      0.87     18829

Confusion_matrix
[[17035    44]
 [ 1700    50]]
done in 1.572817s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2594297514603444
0.24267169021451285
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96      2711
           1       0.55      0.06      0.11       251

    accuracy                           0.92      2962
   macro avg       0.74      0.53      0.54      2962
weighted avg       0.89      0.92      0.88      2962

Confusion_matrix
[[2698   13]
 [ 235   16]]
done in 1.530497s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2594297514603444
0.2571303983394487
Classification report
              precision    recall  f1-score   support

           0       0.92      0.99      0.95      2804
           1       0.43      0.05      0.08       265

    accuracy                           0.91      3069
   macro avg       0.67      0.52      0.52      3069
weighted avg       0.87      0.91      0.88      3069

Confusion_matrix
[[2788   16]
 [ 253   12]]
done in 2.061455s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19950
           1       0.43      0.00      0.01      1948

    accuracy                           0.91     21898
   macro avg       0.67      0.50      0.48     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19942     8]
 [ 1942     6]]
done in 19.835581s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19883
    

  _warn_prf(average, modifier, msg_start, len(result))


0.264372916262498
0.27783156880023685
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19883
           1       0.00      0.00      0.00      2015

    accuracy                           0.91     21898
   macro avg       0.45      0.50      0.48     21898
weighted avg       0.82      0.91      0.86     21898

Confusion_matrix
[[19880     3]
 [ 2015     0]]
done in 0.683664s
0.264372916262498
0.27110982457726274
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17239
           1       0.00      0.00      0.00      1697

    accuracy                           0.91     18936
   macro avg       0.46      0.50      0.48     18936
weighted avg       0.83      0.91      0.87     18936

Confusion_matrix
[[17239     0]
 [ 1697     0]]
done in 0.667593s


  _warn_prf(average, modifier, msg_start, len(result))


0.264372916262498
0.27474664355551137
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17079
           1       0.00      0.00      0.00      1750

    accuracy                           0.91     18829
   macro avg       0.45      0.50      0.48     18829
weighted avg       0.82      0.91      0.86     18829

Confusion_matrix
[[17079     0]
 [ 1750     0]]
done in 0.603572s


  _warn_prf(average, modifier, msg_start, len(result))


0.264372916262498
0.2525807016254122
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96      2711
           1       0.00      0.00      0.00       251

    accuracy                           0.92      2962
   macro avg       0.46      0.50      0.48      2962
weighted avg       0.84      0.92      0.87      2962

Confusion_matrix
[[2711    0]
 [ 251    0]]
done in 0.597418s


  _warn_prf(average, modifier, msg_start, len(result))


0.264372916262498
0.3080123553195742
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95      2804
           1       0.00      0.00      0.00       265

    accuracy                           0.91      3069
   macro avg       0.46      0.50      0.48      3069
weighted avg       0.83      0.91      0.87      3069

Confusion_matrix
[[2800    4]
 [ 265    0]]
done in 0.594193s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19950
           1       0.43      0.02      0.03      1948

    accuracy                           0.91     21898
   macro avg       0.67      0.51      0.49     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19904    46]
 [ 1913    35]]
done in 33.001511s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19883
     

Balanced accuracy score of test is  0.7106360671466541
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.401
threshold:0.2, J-value:0.255
threshold:0.30000000000000004, J-value:0.135
threshold:0.4, J-value:0.05
threshold:0.5, J-value:0.013
threshold:0.6000000000000001, J-value:0.003
threshold:0.7000000000000001, J-value:0.0
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.7004537203623789
Balanced accuracy score of test is  0.7135740466571312
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.504
threshold:0.2, J-value:0.291
threshold:0.30000000000000004, J-value:0.159
threshold:0.4, J-value:0.07200000000000001
threshold:0.5, J-value:0.032999999999999995
threshold:0.6000000000000001, J-value:0.02
threshold:0.7000000000000001, J-value:0.004
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.7522223904088552
Balanced accuracy score

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.258858590651608
0.2629661801127107
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19908
           1       0.46      0.04      0.07      1990

    accuracy                           0.91     21898
   macro avg       0.69      0.52      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19823    85]
 [ 1917    73]]
done in 1.196525s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.258858590651608
0.26086137987781655
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19918
           1       0.43      0.03      0.06      1980

    accuracy                           0.91     21898
   macro avg       0.67      0.51      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19830    88]
 [ 1914    66]]
done in 0.954461s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.258858590651608
0.2631449162676813
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17204
           1       0.48      0.04      0.07      1728

    accuracy                           0.91     18932
   macro avg       0.70      0.52      0.51     18932
weighted avg       0.87      0.91      0.87     18932

Confusion_matrix
[[17139    65]
 [ 1667    61]]
done in 1.179350s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.258858590651608
0.2632369222214004
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17156
           1       0.40      0.03      0.05      1726

    accuracy                           0.91     18882
   macro avg       0.66      0.51      0.50     18882
weighted avg       0.86      0.91      0.87     18882

Confusion_matrix
[[17083    73]
 [ 1677    49]]
done in 0.896389s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.258858590651608
0.2618253059097764
Classification report
              precision    recall  f1-score   support

           0       0.91      0.99      0.95      2704
           1       0.38      0.05      0.08       262

    accuracy                           0.91      2966
   macro avg       0.64      0.52      0.52      2966
weighted avg       0.87      0.91      0.88      2966

Confusion_matrix
[[2684   20]
 [ 250   12]]
done in 0.908404s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.258858590651608
0.2459890355371171
Classification report
              precision    recall  f1-score   support

           0       0.92      0.99      0.96      2762
           1       0.53      0.07      0.12       254

    accuracy                           0.92      3016
   macro avg       0.73      0.53      0.54      3016
weighted avg       0.89      0.92      0.89      3016

Confusion_matrix
[[2747   15]
 [ 237   17]]
done in 0.898778s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19908
           1       0.47      0.00      0.01      1990

    accuracy                           0.91     21898
   macro avg       0.69      0.50      0.48     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19900     8]
 [ 1983     7]]
done in 19.833456s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19918
     

  _warn_prf(average, modifier, msg_start, len(result))


0.26428564201629917
0.2588664114511214
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96      2762
           1       0.00      0.00      0.00       254

    accuracy                           0.92      3016
   macro avg       0.46      0.50      0.48      3016
weighted avg       0.84      0.92      0.88      3016

Confusion_matrix
[[2760    2]
 [ 254    0]]
done in 0.613278s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19908
           1       0.44      0.02      0.04      1990

    accuracy                           0.91     21898
   macro avg       0.68      0.51      0.49     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19859    49]
 [ 1951    39]]
done in 33.386764s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19918
   

threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6984957437377767
Balanced accuracy score of test is  0.70961365842378
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.398
threshold:0.2, J-value:0.27399999999999997
threshold:0.30000000000000004, J-value:0.14600000000000002
threshold:0.4, J-value:0.059000000000000004
threshold:0.5, J-value:0.015000000000000001
threshold:0.6000000000000001, J-value:0.002
threshold:0.7000000000000001, J-value:0.0
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6988955249425198
Balanced accuracy score of test is  0.7075214911518781
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.392
threshold:0.2, J-value:0.263
threshold:0.30000000000000004, J-value:0.132
threshold:0.4, J-value:0.05800000000000001
threshold:0.5, J-value:0.034
threshold:0.6000000000000001, J-value:0.015
threshold:0.7000000000000001, J-value:0.0
threshold

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26309747172570896
0.2547885608939852
Classification report
              precision    recall  f1-score   support

           0       0.92      0.99      0.95     20006
           1       0.38      0.03      0.06      1892

    accuracy                           0.91     21898
   macro avg       0.65      0.51      0.51     21898
weighted avg       0.87      0.91      0.88     21898

Confusion_matrix
[[19905   101]
 [ 1829    63]]
done in 0.960614s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26309747172570896
0.25632492941221824
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.95     19975
           1       0.47      0.04      0.08      1923

    accuracy                           0.91     21898
   macro avg       0.69      0.52      0.52     21898
weighted avg       0.88      0.91      0.88     21898

Confusion_matrix
[[19884    91]
 [ 1841    82]]
done in 1.249491s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26309747172570896
0.2557437468569436
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.95     17241
           1       0.38      0.03      0.06      1634

    accuracy                           0.91     18875
   macro avg       0.65      0.51      0.50     18875
weighted avg       0.87      0.91      0.88     18875

Confusion_matrix
[[17162    79]
 [ 1585    49]]
done in 0.986320s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26309747172570896
0.2597938656375841
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17310
           1       0.48      0.04      0.07      1699

    accuracy                           0.91     19009
   macro avg       0.70      0.52      0.51     19009
weighted avg       0.87      0.91      0.87     19009

Confusion_matrix
[[17242    68]
 [ 1637    62]]
done in 0.959364s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26309747172570896
0.24882457311666448
Classification report
              precision    recall  f1-score   support

           0       0.92      0.99      0.95      2765
           1       0.39      0.05      0.10       258

    accuracy                           0.91      3023
   macro avg       0.65      0.52      0.52      3023
weighted avg       0.87      0.91      0.88      3023

Confusion_matrix
[[2743   22]
 [ 244   14]]
done in 0.891903s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26309747172570896
0.2335000735077602
Classification report
              precision    recall  f1-score   support

           0       0.93      0.99      0.96      2665
           1       0.47      0.09      0.15       224

    accuracy                           0.92      2889
   macro avg       0.70      0.54      0.55      2889
weighted avg       0.89      0.92      0.90      2889

Confusion_matrix
[[2642   23]
 [ 204   20]]
done in 0.910578s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     20006
           1       0.60      0.00      0.01      1892

    accuracy                           0.91     21898
   macro avg       0.76      0.50      0.48     21898
weighted avg       0.89      0.91      0.87     21898

Confusion_matrix
[[20000     6]
 [ 1883     9]]
done in 19.500383s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19975
   

threshold:0.2, J-value:0.262
threshold:0.30000000000000004, J-value:0.148
threshold:0.4, J-value:0.069
threshold:0.5, J-value:0.029
threshold:0.6000000000000001, J-value:0.011
threshold:0.7000000000000001, J-value:0.002
threshold:0.8, J-value:0.001
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.7010668469649379
Balanced accuracy score of test is  0.7041008488900258
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.395
threshold:0.2, J-value:0.26099999999999995
threshold:0.30000000000000004, J-value:0.145
threshold:0.4, J-value:0.064
threshold:0.5, J-value:0.024
threshold:0.6000000000000001, J-value:0.01
threshold:0.7000000000000001, J-value:0.002
threshold:0.8, J-value:0.001
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6978414473710832
Balanced accuracy score of test is  0.7017047782550581
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.44299999999999995
threshold:0.2, J-

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25860343089778964
0.2639560344941261
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19911
           1       0.42      0.03      0.06      1987

    accuracy                           0.91     21898
   macro avg       0.67      0.51      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19821    90]
 [ 1922    65]]
done in 0.992911s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25860343089778964
0.2609219422405167
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19927
           1       0.45      0.03      0.06      1971

    accuracy                           0.91     21898
   macro avg       0.68      0.51      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19853    74]
 [ 1910    61]]
done in 1.055458s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25860343089778964
0.26590016935078564
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17229
           1       0.41      0.03      0.06      1741

    accuracy                           0.91     18970
   macro avg       0.66      0.51      0.50     18970
weighted avg       0.86      0.91      0.87     18970

Confusion_matrix
[[17151    78]
 [ 1686    55]]
done in 0.960066s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25860343089778964
0.26061002316804543
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17198
           1       0.45      0.03      0.05      1694

    accuracy                           0.91     18892
   macro avg       0.68      0.51      0.50     18892
weighted avg       0.87      0.91      0.87     18892

Confusion_matrix
[[17143    55]
 [ 1649    45]]
done in 0.954593s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25860343089778964
0.2513603247158366
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96      2682
           1       0.45      0.04      0.07       246

    accuracy                           0.92      2928
   macro avg       0.69      0.52      0.52      2928
weighted avg       0.88      0.92      0.88      2928

Confusion_matrix
[[2670   12]
 [ 236   10]]
done in 1.094702s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25860343089778964
0.2628822799374982
Classification report
              precision    recall  f1-score   support

           0       0.91      0.99      0.95      2729
           1       0.46      0.06      0.10       277

    accuracy                           0.91      3006
   macro avg       0.68      0.53      0.53      3006
weighted avg       0.87      0.91      0.87      3006

Confusion_matrix
[[2710   19]
 [ 261   16]]
done in 0.916068s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19911
           1       0.50      0.00      0.01      1987

    accuracy                           0.91     21898
   macro avg       0.70      0.50      0.48     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19906     5]
 [ 1982     5]]
done in 19.743428s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19927
   

threshold:0.2, J-value:0.248
threshold:0.30000000000000004, J-value:0.145
threshold:0.4, J-value:0.07
threshold:0.5, J-value:0.029
threshold:0.6000000000000001, J-value:0.013000000000000001
threshold:0.7000000000000001, J-value:0.005
threshold:0.8, J-value:0.002
threshold:0.9, J-value:0.001
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6950035736531339
Balanced accuracy score of test is  0.7058600268453218
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.39
threshold:0.2, J-value:0.246
threshold:0.30000000000000004, J-value:0.141
threshold:0.4, J-value:0.065
threshold:0.5, J-value:0.027
threshold:0.6000000000000001, J-value:0.009999999999999998
threshold:0.7000000000000001, J-value:0.005
threshold:0.8, J-value:0.001
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6950732320234417
Balanced accuracy score of test is  0.7062750150926367
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.387
threshold:0.2,

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26149046250411156
0.25749546772155224
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19944
           1       0.44      0.03      0.06      1954

    accuracy                           0.91     21898
   macro avg       0.67      0.51      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19861    83]
 [ 1890    64]]
done in 1.044136s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26149046250411156
0.25831499130545615
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19955
           1       0.47      0.04      0.07      1943

    accuracy                           0.91     21898
   macro avg       0.69      0.52      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19874    81]
 [ 1872    71]]
done in 0.955716s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26149046250411156
0.25820794376035167
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17147
           1       0.44      0.03      0.06      1695

    accuracy                           0.91     18842
   macro avg       0.68      0.51      0.51     18842
weighted avg       0.87      0.91      0.87     18842

Confusion_matrix
[[17077    70]
 [ 1640    55]]
done in 0.941527s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26149046250411156
0.2598420371640909
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17231
           1       0.43      0.03      0.06      1683

    accuracy                           0.91     18914
   macro avg       0.67      0.51      0.51     18914
weighted avg       0.87      0.91      0.87     18914

Confusion_matrix
[[17161    70]
 [ 1630    53]]
done in 0.932380s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26149046250411156
0.25310264261583926
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.95      2797
           1       0.41      0.03      0.06       259

    accuracy                           0.91      3056
   macro avg       0.66      0.52      0.51      3056
weighted avg       0.87      0.91      0.88      3056

Confusion_matrix
[[2784   13]
 [ 250    9]]
done in 0.855882s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26149046250411156
0.24863585411704545
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96      2724
           1       0.62      0.07      0.12       260

    accuracy                           0.92      2984
   macro avg       0.77      0.53      0.54      2984
weighted avg       0.89      0.92      0.88      2984

Confusion_matrix
[[2713   11]
 [ 242   18]]
done in 0.890443s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19944
           1       0.36      0.00      0.01      1954

    accuracy                           0.91     21898
   macro avg       0.63      0.50      0.48     21898
weighted avg       0.86      0.91      0.87     21898

Confusion_matrix
[[19935     9]
 [ 1949     5]]
done in 19.603858s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19955
  

  _warn_prf(average, modifier, msg_start, len(result))


0.26691180010608717
0.26804236350549815
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19944
           1       0.34      0.01      0.03      1954

    accuracy                           0.91     21898
   macro avg       0.63      0.51      0.49     21898
weighted avg       0.86      0.91      0.87     21898

Confusion_matrix
[[19890    54]
 [ 1926    28]]
done in 0.631112s
0.26691180010608717
0.2720697371491411
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19955
           1       0.38      0.01      0.03      1943

    accuracy                           0.91     21898
   macro avg       0.65      0.51      0.49     21898
weighted avg       0.86      0.91      0.87     21898

Confusion_matrix
[[19909    46]
 [ 1915    28]]
done in 0.656124s
0.26691180010608717
0.26965226840168327
Classification report
              precision    re

True positive rate of class 1 is  0.737
True positive rate of class 2 is  0.785
Positive prediction rate of class 1 is  0.382
Positive prediction rate of class 2 is  0.394
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.364
threshold:0.2, J-value:0.23500000000000001
threshold:0.30000000000000004, J-value:0.143
threshold:0.4, J-value:0.011
threshold:0.5, J-value:0.011
threshold:0.6000000000000001, J-value:0.002
threshold:0.7000000000000001, J-value:0.0
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6823207591286309
Balanced accuracy score of test is  0.6769141788788026
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.366
threshold:0.2, J-value:0.24000000000000002
threshold:0.30000000000000004, J-value:0.149
threshold:0.4, J-value:0.011
threshold:0.5, J-value:0.011
threshold:0.6000000000000001, J-value:0.001
threshold:0.7000000000000001, J-value:0.0
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2582296004331764
0.2634053423664784
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19893
           1       0.41      0.03      0.06      2005

    accuracy                           0.91     21898
   macro avg       0.66      0.51      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19800    93]
 [ 1940    65]]
done in 0.993990s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2582296004331764
0.26237358850595266
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19907
           1       0.49      0.03      0.06      1991

    accuracy                           0.91     21898
   macro avg       0.70      0.51      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19838    69]
 [ 1926    65]]
done in 1.022233s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2582296004331764
0.26193518555597833
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17191
           1       0.41      0.03      0.06      1729

    accuracy                           0.91     18920
   macro avg       0.66      0.51      0.51     18920
weighted avg       0.87      0.91      0.87     18920

Confusion_matrix
[[17112    79]
 [ 1673    56]]
done in 1.033473s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2582296004331764
0.2644904135081465
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17135
           1       0.46      0.03      0.05      1730

    accuracy                           0.91     18865
   macro avg       0.69      0.51      0.50     18865
weighted avg       0.87      0.91      0.87     18865

Confusion_matrix
[[17081    54]
 [ 1684    46]]
done in 0.942926s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2582296004331764
0.27274562673674724
Classification report
              precision    recall  f1-score   support

           0       0.91      0.99      0.95      2702
           1       0.39      0.03      0.06       276

    accuracy                           0.91      2978
   macro avg       0.65      0.51      0.51      2978
weighted avg       0.86      0.91      0.87      2978

Confusion_matrix
[[2688   14]
 [ 267    9]]
done in 0.862922s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2582296004331764
0.2492071184543907
Classification report
              precision    recall  f1-score   support

           0       0.92      0.99      0.96      2772
           1       0.56      0.07      0.13       261

    accuracy                           0.92      3033
   macro avg       0.74      0.53      0.54      3033
weighted avg       0.89      0.92      0.88      3033

Confusion_matrix
[[2757   15]
 [ 242   19]]
done in 0.891453s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19893
           1       0.44      0.00      0.01      2005

    accuracy                           0.91     21898
   macro avg       0.68      0.50      0.48     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19883    10]
 [ 1997     8]]
done in 19.166286s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19907
    

  _warn_prf(average, modifier, msg_start, len(result))


Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.96      2772
           1       0.00      0.00      0.00       261

    accuracy                           0.91      3033
   macro avg       0.46      0.50      0.48      3033
weighted avg       0.84      0.91      0.87      3033

Confusion_matrix
[[2772    0]
 [ 261    0]]
done in 19.570604s


  _warn_prf(average, modifier, msg_start, len(result))


0.2638844630577774
0.27072063991639456
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19893
           1       0.08      0.00      0.00      2005

    accuracy                           0.91     21898
   macro avg       0.50      0.50      0.48     21898
weighted avg       0.83      0.91      0.86     21898

Confusion_matrix
[[19882    11]
 [ 2004     1]]
done in 0.639992s
0.2638844630577774
0.27224900833516935
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19907
           1       0.33      0.00      0.00      1991

    accuracy                           0.91     21898
   macro avg       0.62      0.50      0.48     21898
weighted avg       0.86      0.91      0.87     21898

Confusion_matrix
[[19903     4]
 [ 1989     2]]
done in 0.636164s
0.2638844630577774
0.26930585598613827
Classification report
              precision    reca

Balanced accuracy score of test is  0.6997250833457731
True positive rate of class 1 is  0.732
True positive rate of class 2 is  0.755
Positive prediction rate of class 1 is  0.371
Positive prediction rate of class 2 is  0.39
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.37400000000000005
threshold:0.2, J-value:0.21000000000000002
threshold:0.30000000000000004, J-value:0.093
threshold:0.4, J-value:0.008
threshold:0.5, J-value:-0.001
threshold:0.6000000000000001, J-value:-0.001
threshold:0.7000000000000001, J-value:-0.001
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.687285719246347
Balanced accuracy score of test is  0.6872702163503284
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.38099999999999995
threshold:0.2, J-value:0.21500000000000002
threshold:0.30000000000000004, J-value:0.10099999999999999
threshold:0.4, J-value:0.008
threshold:0.5, J-value:0.0
threshold:0.6000000000000001, J-value:0.

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25968103373351586
0.26227214953462646
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19931
           1       0.51      0.04      0.08      1967

    accuracy                           0.91     21898
   macro avg       0.71      0.52      0.52     21898
weighted avg       0.88      0.91      0.87     21898

Confusion_matrix
[[19850    81]
 [ 1883    84]]
done in 0.980242s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25968103373351586
0.2590921607891681
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19956
           1       0.47      0.04      0.07      1942

    accuracy                           0.91     21898
   macro avg       0.69      0.52      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19876    80]
 [ 1872    70]]
done in 0.975455s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25968103373351586
0.2606055980332213
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17217
           1       0.52      0.04      0.08      1688

    accuracy                           0.91     18905
   macro avg       0.72      0.52      0.52     18905
weighted avg       0.88      0.91      0.88     18905

Confusion_matrix
[[17151    66]
 [ 1616    72]]
done in 0.941423s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25968103373351586
0.2602277946694193
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17231
           1       0.45      0.03      0.06      1687

    accuracy                           0.91     18918
   macro avg       0.68      0.51      0.51     18918
weighted avg       0.87      0.91      0.87     18918

Confusion_matrix
[[17165    66]
 [ 1633    54]]
done in 0.956084s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25968103373351586
0.27279876367898453
Classification report
              precision    recall  f1-score   support

           0       0.91      0.99      0.95      2714
           1       0.44      0.04      0.08       279

    accuracy                           0.91      2993
   macro avg       0.68      0.52      0.51      2993
weighted avg       0.87      0.91      0.87      2993

Confusion_matrix
[[2699   15]
 [ 267   12]]
done in 0.895694s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25968103373351586
0.25188279107554623
Classification report
              precision    recall  f1-score   support

           0       0.92      0.99      0.96      2725
           1       0.53      0.06      0.11       255

    accuracy                           0.92      2980
   macro avg       0.73      0.53      0.53      2980
weighted avg       0.89      0.92      0.88      2980

Confusion_matrix
[[2711   14]
 [ 239   16]]
done in 0.978265s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19931
           1       0.47      0.00      0.01      1967

    accuracy                           0.91     21898
   macro avg       0.69      0.50      0.48     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19923     8]
 [ 1960     7]]
done in 19.471136s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19956
  

threshold:0.2, J-value:0.255
threshold:0.30000000000000004, J-value:0.149
threshold:0.4, J-value:0.07
threshold:0.5, J-value:0.037000000000000005
threshold:0.6000000000000001, J-value:0.009999999999999998
threshold:0.7000000000000001, J-value:0.003
threshold:0.8, J-value:0.001
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6964505938982116
Balanced accuracy score of test is  0.7007504821627146
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.391
threshold:0.2, J-value:0.24900000000000003
threshold:0.30000000000000004, J-value:0.152
threshold:0.4, J-value:0.07100000000000001
threshold:0.5, J-value:0.037000000000000005
threshold:0.6000000000000001, J-value:0.009999999999999998
threshold:0.7000000000000001, J-value:0.002
threshold:0.8, J-value:0.001
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.695777701114874
Balanced accuracy score of test is  0.7004161727648129
threshold:0.0,

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25995560917913285
0.25506086388064353
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19973
           1       0.51      0.04      0.07      1925

    accuracy                           0.91     21898
   macro avg       0.71      0.52      0.51     21898
weighted avg       0.88      0.91      0.88     21898

Confusion_matrix
[[19904    69]
 [ 1854    71]]
done in 0.983985s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25995560917913285
0.26490182741307455
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19866
           1       0.44      0.03      0.06      2032

    accuracy                           0.91     21898
   macro avg       0.67      0.51      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19780    86]
 [ 1965    67]]
done in 1.223051s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25995560917913285
0.2549433177304888
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17189
           1       0.49      0.03      0.07      1658

    accuracy                           0.91     18847
   macro avg       0.70      0.52      0.51     18847
weighted avg       0.88      0.91      0.88     18847

Confusion_matrix
[[17129    60]
 [ 1600    58]]
done in 0.920949s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25995560917913285
0.2657918690937799
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17067
           1       0.43      0.03      0.05      1750

    accuracy                           0.91     18817
   macro avg       0.67      0.51      0.50     18817
weighted avg       0.86      0.91      0.87     18817

Confusion_matrix
[[17001    66]
 [ 1700    50]]
done in 0.920628s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25995560917913285
0.25578698393700716
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.95      2784
           1       0.59      0.05      0.09       267

    accuracy                           0.91      3051
   macro avg       0.75      0.52      0.52      3051
weighted avg       0.89      0.91      0.88      3051

Confusion_matrix
[[2775    9]
 [ 254   13]]
done in 0.893973s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25995560917913285
0.2594659577909285
Classification report
              precision    recall  f1-score   support

           0       0.91      0.99      0.95      2799
           1       0.46      0.06      0.11       282

    accuracy                           0.91      3081
   macro avg       0.69      0.53      0.53      3081
weighted avg       0.87      0.91      0.87      3081

Confusion_matrix
[[2779   20]
 [ 265   17]]
done in 0.857947s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19973
           1       0.44      0.00      0.00      1925

    accuracy                           0.91     21898
   macro avg       0.68      0.50      0.48     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19968     5]
 [ 1921     4]]
done in 19.451231s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19866
   

  _warn_prf(average, modifier, msg_start, len(result))


0.26553645312445306
0.2694114094281803
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19973
           1       0.12      0.00      0.00      1925

    accuracy                           0.91     21898
   macro avg       0.51      0.50      0.48     21898
weighted avg       0.84      0.91      0.87     21898

Confusion_matrix
[[19958    15]
 [ 1923     2]]
done in 0.623379s
0.26553645312445306
0.2781555748145189
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19866
           1       0.11      0.00      0.00      2032

    accuracy                           0.91     21898
   macro avg       0.51      0.50      0.48     21898
weighted avg       0.83      0.91      0.86     21898

Confusion_matrix
[[19850    16]
 [ 2030     2]]
done in 0.590243s
0.26553645312445306
0.2730109877358993
Classification report
              precision    reca

Balanced accuracy score of test is  0.715187034883279
True positive rate of class 1 is  0.742
True positive rate of class 2 is  0.78
Positive prediction rate of class 1 is  0.378
Positive prediction rate of class 2 is  0.389
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.33799999999999997
threshold:0.2, J-value:0.193
threshold:0.30000000000000004, J-value:0.067
threshold:0.4, J-value:0.054
threshold:0.5, J-value:0.0
threshold:0.6000000000000001, J-value:0.0
threshold:0.7000000000000001, J-value:0.0
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6691380116403898
Balanced accuracy score of test is  0.674852813060101
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.33599999999999997
threshold:0.2, J-value:0.195
threshold:0.30000000000000004, J-value:0.07
threshold:0.4, J-value:0.059000000000000004
threshold:0.5, J-value:0.0
threshold:0.6000000000000001, J-value:0.0
threshold:0.7000000000000001, J-valu

In [30]:
def add_mean_sd(records, result_table, overall_records, type):
    records.append({
        'auroc': result_table["auroc"].mean(),
        'overall threshold': result_table["overall threshold"].mean(),
        'white threshold': result_table["white threshold"].mean(),
        'black threshold': result_table["black threshold"].mean(),
        'overall ba validation': result_table["overall ba validation"].mean(),
        'overall ba test': result_table["overall ba test"].mean(),
        'white ba validation': result_table["white ba validation"].mean(),
        'white ba test': result_table["white ba test"].mean(),
        'black ba validation': result_table["black ba validation"].mean(),
        'black ba test': result_table["black ba test"].mean(),
        'overall precision':result_table["overall precision"].mean(),
        'overall recall':result_table["overall recall"].mean(),
        'overall tpr':result_table["overall tpr"].mean(),
        'overall tnr':result_table["overall tnr"].mean(),
        'overall pd':result_table["overall pd"].mean(),
        'white precision':result_table["white precision"].mean(),
        'white recall':result_table["white recall"].mean(),
        'white tpr':result_table["white tpr"].mean(),
        'white tnr':result_table["white tnr"].mean(),
        'white pd':result_table["white pd"].mean(),
        'black precision':result_table["black precision"].mean(),
        'black recall':result_table["black recall"].mean(),
        'black tpr':result_table["black tpr"].mean(),
        'black tnr':result_table["black tnr"].mean(),
        'black pd':result_table["black pd"].mean(),
        'eod': result_table["eod"].mean(),
        'di': result_table["di"].mean(),
        })
    records.append({
        'auroc': result_table["auroc"].std(),
        'overall threshold': result_table["overall threshold"].std(),
        'white threshold': result_table["white threshold"].std(),
        'black threshold': result_table["black threshold"].std(),
        'overall ba validation': result_table["overall ba validation"].std(),
        'overall ba test': result_table["overall ba test"].std(),
        'white ba validation': result_table["white ba validation"].std(),
        'white ba test': result_table["white ba test"].std(),
        'black ba validation': result_table["black ba validation"].std(),
        'black ba test': result_table["black ba test"].std(),
        'overall precision':result_table["overall precision"].std(),
        'overall recall':result_table["overall recall"].std(),
        'overall tpr':result_table["overall tpr"].std(),
        'overall tnr':result_table["overall tnr"].std(),
        'overall pd':result_table["overall pd"].std(),
        'white precision':result_table["white precision"].std(),
        'white recall':result_table["white recall"].std(),
        'white tpr':result_table["white tpr"].std(),
        'white tnr':result_table["white tnr"].std(),
        'white pd':result_table["white pd"].std(),
        'black precision':result_table["black precision"].std(),
        'black recall':result_table["black recall"].std(),
        'black tpr':result_table["black tpr"].std(),
        'black tnr':result_table["black tnr"].std(),
        'black pd':result_table["black pd"].std(),
        'eod': result_table["eod"].std(),
        'di': result_table["di"].std(),
        })
    overall_records.append({
        'type': type,
        'auroc': result_table["auroc"].mean(),
        'overall threshold': result_table["overall threshold"].mean(),
        'white threshold': result_table["white threshold"].mean(),
        'black threshold': result_table["black threshold"].mean(),
        'overall ba test': result_table["overall ba test"].mean(),
        'white ba test': result_table["white ba test"].mean(),
        'black ba test': result_table["black ba test"].mean(),
        'overall tpr':result_table["overall tpr"].mean(),
        'overall pd':result_table["overall pd"].mean(),
        'white tpr':result_table["white tpr"].mean(),
        'white pd':result_table["white pd"].mean(),
        'black tpr':result_table["black tpr"].mean(),
        'black pd':result_table["black pd"].mean(),
        'eod': result_table["eod"].mean(),
        'di': result_table["di"].mean(),
        })
    pd_result = pd.DataFrame(records)
    return pd_result, overall_records

In [31]:
overall_table = []
result_lr, overall_records = add_mean_sd (records_lr, result_lr, overall_table, 'lr')
result_rf, overall_records = add_mean_sd (records_rf, result_rf, overall_records, 'rf')
result_dt, overall_records = add_mean_sd (records_dt, result_dt, overall_records, 'dt')
result_gbt, overall_records = add_mean_sd (records_gbt, result_gbt, overall_records, 'gbt')

result_path='/Users/lifuchen/Desktop/research/resample_data/'
result_lr.to_csv(path.join(result_path,'race-lr-result.csv'), index=False)
result_rf.to_csv(path.join(result_path,'race-rf-result.csv'), index=False)
result_dt.to_csv(path.join(result_path,'race-dt-result.csv'), index=False)
result_gbt.to_csv(path.join(result_path,'race-gbt-result.csv'), index=False)

overall_result = pd.DataFrame(overall_table)
result_path='/Users/lifuchen/Desktop/research/resample_result/'
overall_result.to_csv(path.join(result_path,'race.csv'), index=False)
