In [12]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from os import path
from sklearn.metrics import balanced_accuracy_score, roc_auc_score
import sklearn.preprocessing
from sklearn import preprocessing
from sklearn.preprocessing import Normalizer
import src.lib.utility_classfier as uclf
import src.lib.optimal_threhold_related as thres
import src.lib.fairness_tests as fair

In [13]:
data_path='/Users/lifuchen/Desktop/research/data.csv'
df = pd.read_csv(data_path)

In [14]:
y = df.Class.values
X = df.drop(['GRID','Class'], axis=1)
X.shape

(109490, 87)

In [15]:
def save_prediction(classifier, characteristic, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_male_scaled, y_val_male, X_test_male_scaled, y_test_male, X_val_female_scaled, y_val_female, X_test_female_scaled, y_test_female):
    method_to_call = getattr(uclf, classifier)
    y_val_score = method_to_call(X_train_scaled, y_train,X_val_scaled, y_val)
    y_test_score = method_to_call(X_train_scaled, y_train,X_test_scaled, y_test)

    y_val_score_male = method_to_call(X_train_scaled, y_train, X_val_male_scaled, y_val_male)
    y_test_score_male = method_to_call(X_train_scaled, y_train,X_test_male_scaled, y_test_male)

    y_val_score_female = method_to_call(X_train_scaled, y_train, X_val_female_scaled, y_val_female)
    y_test_score_female = method_to_call(X_train_scaled, y_train,X_test_female_scaled, y_test_female)

    my_dict = dict(val_score = y_val_score, test_score = y_test_score, val_1_score = y_val_score_male, test_1_score = y_test_score_male, val_2_score = y_val_score_female, test_2_score = y_test_score_female)
    overall_prediction = pd.DataFrame.from_dict(my_dict, orient='index')
    overall_prediction = overall_prediction.transpose()

    result_path='/Users/lifuchen/Desktop/research/predictions/'
    filename = str(classifier) + str(characteristic) + "prediction.csv"
    overall_prediction.to_csv(path.join(result_path, filename), index=False)

In [16]:
def get_result (classifier,characteristic, records, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_male_scaled, y_val_male, X_test_male_scaled, y_test_male, X_val_female_scaled, y_val_female, X_test_female_scaled, y_test_female):
    result_path='/Users/lifuchen/Desktop/research/predictions/'
    filename = str(classifier) + characteristic + "prediction.csv"
    prediction = pd.read_csv(path.join(result_path, filename))
    
    y_val_score = prediction['val_score'][prediction['val_score'].notna()]
    y_test_score = prediction['test_score'][prediction['test_score'].notna()]
    
    y_val_score_male = prediction['val_1_score'][prediction['val_1_score'].notna()]
    y_test_score_male = prediction['test_1_score'][prediction['test_1_score'].notna()]
    
    y_val_score_female = prediction['val_2_score'][prediction['val_2_score'].notna()]
    y_test_score_female = prediction['test_2_score'][prediction['test_2_score'].notna()]
    
    threshold, ba_val, ba_test = balance_accuracy (y_val, y_val_score,y_test, y_test_score)
    auroc = roc_auc_score(y_test, y_test_score)
    precision, recall, tpr, tnr, pd_overall = thres.calculate_precision_metrics(y_test, y_test_score,threshold)
    
    threshold_male, ba_val_male, ba_test_male = balance_accuracy (y_val_male, y_val_score_male,y_test_male, y_test_score_male)
    precision_male, recall_male, tpr_male, tnr_male, pd_male = thres.calculate_precision_metrics(y_test_male, y_test_score_male,threshold_male)
    
    threshold_female, ba_val_female, ba_test_female = balance_accuracy (y_val_female, y_val_score_female, y_test_female, y_test_score_female)
    precision_female, recall_female, tpr_female, tnr_female, pd_female = thres.calculate_precision_metrics(y_test_female, y_test_score_female,threshold_female)

    eod = fair.get_EOD(y_test_male, y_test_score_male,threshold_male, y_test_female, y_test_score_female, threshold_female)
    sp = fair.get_SP(y_test_male, y_test_score_male,threshold_male, y_test_female, y_test_score_female, threshold_female)

    records.append({
        'auroc': auroc,
        'overall threshold': threshold,
        'male threshold': threshold_male,
        'female threshold': threshold_female,
        'overall ba validation': ba_val,
        'overall ba test': ba_test,
        'male ba validation': ba_val_male,
        'male ba test': ba_test_male,
        'female ba validation': ba_val_female,
        'female ba test': ba_test_female,
        'overall precision':precision,
        'overall recall':recall,
        'overall tpr':tpr,
        'overall tnr':tnr,
        'overall pd':pd_overall,
        'male precision':precision_male,
        'male recall':recall_male,
        'male tpr':tpr_male,
        'male tnr':tnr_male,
        'male pd':pd_male,
        'female precision':precision_female,
        'female recall':recall_female,
        'female tpr':tpr_female,
        'female tnr':tnr_female,
        'female pd':pd_female,
        'eod': eod,
        'di': sp,
        })

In [17]:
def balance_accuracy (y_val, y_val_score,y_test, y_test_score):
    
    threshold, _ = thres.get_optimal_threshold_Jvalue (y_val, y_val_score)
    print ("Optimal threshold by J value is ",threshold)

    ba_val = thres.calculate_balanced_accuracy(y_val, y_val_score, threshold)
    print ("Balanced accuracy score of val is ", ba_val)

    ba_test = thres.calculate_balanced_accuracy(y_test, y_test_score, threshold)
    print ("Balanced accuracy score of test is ",ba_test)

    return threshold, ba_val, ba_test

In [18]:
def fairness_metrics (X, y, attribute, random_state):
    X_train, y_train, X_val, y_val, X_test, y_test, X_val_female, X_val_male, y_val_female, y_val_male, X_test_female, X_test_male, y_test_female, y_test_male \
        = fair.split_by_trait(X, y, attribute, random_state)
    print("X train", X_train.shape[0])
    print("Y train", y_train.shape[0])
    print(X_val.shape[0], X_val_male.shape[0], X_val_female.shape[0])
    print(y_val.shape[0], y_val_male.shape[0], y_val_female.shape[0])
    print(X_test.shape[0], X_test_male.shape[0], X_test_female.shape[0])
    print(y_test.shape[0], y_test_male.shape[0], y_test_female.shape[0])

    max_abs_scaler = preprocessing.MaxAbsScaler()
    X_train_scaled = max_abs_scaler.fit_transform(X_train)
    X_test_scaled = max_abs_scaler.transform(X_test)
    X_test_male_scaled = max_abs_scaler.transform(X_test_male)
    X_test_female_scaled = max_abs_scaler.transform(X_test_female)
    X_val_scaled = max_abs_scaler.transform(X_val)
    X_val_male_scaled = max_abs_scaler.transform(X_val_male)
    X_val_female_scaled = max_abs_scaler.transform(X_val_female)

    characteristic = attribute + str(random_state)
    save_prediction ("logic_regression", characteristic, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_male_scaled, y_val_male, X_test_male_scaled, y_test_male, X_val_female_scaled, y_val_female, X_test_female_scaled, y_test_female)
    save_prediction ("random_forest", characteristic, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_male_scaled, y_val_male, X_test_male_scaled, y_test_male, X_val_female_scaled, y_val_female, X_test_female_scaled, y_test_female)
    save_prediction ("decision_tree", characteristic, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_male_scaled, y_val_male, X_test_male_scaled, y_test_male, X_val_female_scaled, y_val_female, X_test_female_scaled, y_test_female)
    save_prediction ("gradiant_boosting", characteristic, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_male_scaled, y_val_male, X_test_male_scaled, y_test_male, X_val_female_scaled, y_val_female, X_test_female_scaled, y_test_female)

    get_result ("logic_regression", characteristic, records_lr, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_male_scaled, y_val_male, X_test_male_scaled, y_test_male, X_val_female_scaled, y_val_female, X_test_female_scaled, y_test_female)
    get_result ("random_forest", characteristic, records_rf, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_male_scaled, y_val_male, X_test_male_scaled, y_test_male, X_val_female_scaled, y_val_female, X_test_female_scaled, y_test_female)
    get_result ("decision_tree", characteristic, records_dt, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_male_scaled, y_val_male, X_test_male_scaled, y_test_male, X_val_female_scaled, y_val_female, X_test_female_scaled, y_test_female)
    get_result ("gradiant_boosting", characteristic, records_gbt, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_male_scaled, y_val_male, X_test_male_scaled, y_test_male, X_val_female_scaled, y_val_female, X_test_female_scaled, y_test_female)

In [19]:
records_lr = []
records_rf = []
records_dt = []
records_gbt = []

fairness_metrics (X, y, "GENDER", 0)

X train 65694
Y train 65694
21898 7782 14116
21898 7782 14116
21898 7707 14191
21898 7707 14191


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2590532945377533
0.26233534084451454
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19904
           1       0.46      0.04      0.07      1994

    accuracy                           0.91     21898
   macro avg       0.69      0.52      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19822    82]
 [ 1924    70]]
done in 0.702149s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2590532945377533
0.26252435164193133
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19934
           1       0.42      0.04      0.07      1964

    accuracy                           0.91     21898
   macro avg       0.67      0.52      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19839    95]
 [ 1894    70]]
done in 0.591743s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2590532945377533
0.30587964804898465
Classification report
              precision    recall  f1-score   support

           0       0.89      1.00      0.94      6915
           1       0.46      0.03      0.06       867

    accuracy                           0.89      7782
   macro avg       0.68      0.51      0.50      7782
weighted avg       0.84      0.89      0.84      7782

Confusion_matrix
[[6881   34]
 [ 838   29]]
done in 0.570478s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2590532945377533
0.3014896098314331
Classification report
              precision    recall  f1-score   support

           0       0.89      1.00      0.94      6860
           1       0.51      0.03      0.06       847

    accuracy                           0.89      7707
   macro avg       0.70      0.52      0.50      7707
weighted avg       0.85      0.89      0.85      7707

Confusion_matrix
[[6832   28]
 [ 818   29]]
done in 0.677470s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2590532945377533
0.23832982946273593
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96     12989
           1       0.46      0.04      0.07      1127

    accuracy                           0.92     14116
   macro avg       0.69      0.52      0.51     14116
weighted avg       0.89      0.92      0.89     14116

Confusion_matrix
[[12941    48]
 [ 1086    41]]
done in 0.988293s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2590532945377533
0.2413626826357662
Classification report
              precision    recall  f1-score   support

           0       0.92      0.99      0.96     13074
           1       0.38      0.04      0.07      1117

    accuracy                           0.92     14191
   macro avg       0.65      0.52      0.51     14191
weighted avg       0.88      0.92      0.89     14191

Confusion_matrix
[[13007    67]
 [ 1076    41]]
done in 0.930001s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19904
           1       0.43      0.00      0.01      1994

    accuracy                           0.91     21898
   macro avg       0.67      0.50      0.48     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19892    12]
 [ 1985     9]]
done in 23.554023s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19934


  _warn_prf(average, modifier, msg_start, len(result))


0.26448598669059525
0.2520034366722554
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96     13074
           1       0.00      0.00      0.00      1117

    accuracy                           0.92     14191
   macro avg       0.46      0.50      0.48     14191
weighted avg       0.85      0.92      0.88     14191

Confusion_matrix
[[13073     1]
 [ 1117     0]]
done in 0.664381s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19904
           1       0.46      0.02      0.03      1994

    accuracy                           0.91     21898
   macro avg       0.68      0.51      0.49     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19864    40]
 [ 1960    34]]
done in 36.474388s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19934

threshold:0.7000000000000001, J-value:0.0
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.714224541591011
Balanced accuracy score of test is  0.6986341842540669
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.40599999999999997
threshold:0.2, J-value:0.28
threshold:0.30000000000000004, J-value:0.13099999999999998
threshold:0.4, J-value:0.057999999999999996
threshold:0.5, J-value:0.019999999999999997
threshold:0.6000000000000001, J-value:0.005
threshold:0.7000000000000001, J-value:0.0
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.7033498712742721
Balanced accuracy score of test is  0.695231669999759
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.42900000000000005
threshold:0.2, J-value:0.268
threshold:0.30000000000000004, J-value:0.14800000000000002
threshold:0.4, J-value:0.061000000000000006
threshold:0.5, J-value:0.01

In [20]:
records_lr = []
records_rf = []
records_dt = []
records_gbt = []
for random_state in range(10):
    fairness_metrics (X, y, "GENDER", random_state)

result_lr = pd.DataFrame(records_lr)
result_rf = pd.DataFrame(records_rf)
result_dt = pd.DataFrame(records_dt)
result_gbt = pd.DataFrame(records_gbt)

X train 65694
Y train 65694
21898 7782 14116
21898 7782 14116
21898 7707 14191
21898 7707 14191


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2590532945377533
0.26233534084451454
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19904
           1       0.46      0.04      0.07      1994

    accuracy                           0.91     21898
   macro avg       0.69      0.52      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19822    82]
 [ 1924    70]]
done in 1.459033s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2590532945377533
0.26252435164193133
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19934
           1       0.42      0.04      0.07      1964

    accuracy                           0.91     21898
   macro avg       0.67      0.52      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19839    95]
 [ 1894    70]]
done in 1.454523s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2590532945377533
0.30587964804898465
Classification report
              precision    recall  f1-score   support

           0       0.89      1.00      0.94      6915
           1       0.46      0.03      0.06       867

    accuracy                           0.89      7782
   macro avg       0.68      0.51      0.50      7782
weighted avg       0.84      0.89      0.84      7782

Confusion_matrix
[[6881   34]
 [ 838   29]]
done in 1.188643s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2590532945377533
0.3014896098314331
Classification report
              precision    recall  f1-score   support

           0       0.89      1.00      0.94      6860
           1       0.51      0.03      0.06       847

    accuracy                           0.89      7707
   macro avg       0.70      0.52      0.50      7707
weighted avg       0.85      0.89      0.85      7707

Confusion_matrix
[[6832   28]
 [ 818   29]]
done in 1.274817s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2590532945377533
0.23832982946273593
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96     12989
           1       0.46      0.04      0.07      1127

    accuracy                           0.92     14116
   macro avg       0.69      0.52      0.51     14116
weighted avg       0.89      0.92      0.89     14116

Confusion_matrix
[[12941    48]
 [ 1086    41]]
done in 1.526959s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2590532945377533
0.2413626826357662
Classification report
              precision    recall  f1-score   support

           0       0.92      0.99      0.96     13074
           1       0.38      0.04      0.07      1117

    accuracy                           0.92     14191
   macro avg       0.65      0.52      0.51     14191
weighted avg       0.88      0.92      0.89     14191

Confusion_matrix
[[13007    67]
 [ 1076    41]]
done in 1.489129s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19904
           1       0.43      0.01      0.01      1994

    accuracy                           0.91     21898
   macro avg       0.67      0.50      0.48     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19891    13]
 [ 1984    10]]
done in 21.933903s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19934


  _warn_prf(average, modifier, msg_start, len(result))


0.26448598669059525
0.2520034366722554
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96     13074
           1       0.00      0.00      0.00      1117

    accuracy                           0.92     14191
   macro avg       0.46      0.50      0.48     14191
weighted avg       0.85      0.92      0.88     14191

Confusion_matrix
[[13073     1]
 [ 1117     0]]
done in 0.657726s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19904
           1       0.46      0.02      0.03      1994

    accuracy                           0.91     21898
   macro avg       0.69      0.51      0.49     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19863    41]
 [ 1959    35]]
done in 35.630253s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19934

threshold:0.5, J-value:0.015000000000000001
threshold:0.6000000000000001, J-value:0.003
threshold:0.7000000000000001, J-value:0.0
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.7141491798546766
Balanced accuracy score of test is  0.6986341842540669
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.40599999999999997
threshold:0.2, J-value:0.28
threshold:0.30000000000000004, J-value:0.13099999999999998
threshold:0.4, J-value:0.057999999999999996
threshold:0.5, J-value:0.019000000000000003
threshold:0.6000000000000001, J-value:0.005
threshold:0.7000000000000001, J-value:0.0
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.7033498712742721
Balanced accuracy score of test is  0.6953045562971352
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.42900000000000005
threshold:0.2, J-value:0.268
threshold:0.30000000000000004, J-value:0

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26234066472929296
0.2564476749347357
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.95     19980
           1       0.46      0.04      0.07      1918

    accuracy                           0.91     21898
   macro avg       0.69      0.52      0.51     21898
weighted avg       0.88      0.91      0.88     21898

Confusion_matrix
[[19892    88]
 [ 1843    75]]
done in 1.727447s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26234066472929296
0.2580498997876009
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19972
           1       0.43      0.03      0.06      1926

    accuracy                           0.91     21898
   macro avg       0.67      0.51      0.51     21898
weighted avg       0.87      0.91      0.88     21898

Confusion_matrix
[[19884    88]
 [ 1860    66]]
done in 1.717145s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26234066472929296
0.2968087963756622
Classification report
              precision    recall  f1-score   support

           0       0.89      1.00      0.94      6836
           1       0.46      0.03      0.07       829

    accuracy                           0.89      7665
   macro avg       0.68      0.52      0.50      7665
weighted avg       0.85      0.89      0.85      7665

Confusion_matrix
[[6802   34]
 [ 800   29]]
done in 1.594009s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26234066472929296
0.2972506317370367
Classification report
              precision    recall  f1-score   support

           0       0.90      0.99      0.94      6982
           1       0.38      0.03      0.06       825

    accuracy                           0.89      7807
   macro avg       0.64      0.51      0.50      7807
weighted avg       0.84      0.89      0.85      7807

Confusion_matrix
[[6941   41]
 [ 800   25]]
done in 1.268133s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26234066472929296
0.2347117095132012
Classification report
              precision    recall  f1-score   support

           0       0.93      1.00      0.96     13144
           1       0.46      0.04      0.08      1089

    accuracy                           0.92     14233
   macro avg       0.69      0.52      0.52     14233
weighted avg       0.89      0.92      0.89     14233

Confusion_matrix
[[13090    54]
 [ 1043    46]]
done in 1.255158s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26234066472929296
0.236331064053498
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96     12990
           1       0.47      0.04      0.07      1101

    accuracy                           0.92     14091
   macro avg       0.70      0.52      0.51     14091
weighted avg       0.89      0.92      0.89     14091

Confusion_matrix
[[12943    47]
 [ 1060    41]]
done in 1.222487s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19980
           1       0.00      0.00      0.00      1918

    accuracy                           0.91     21898
   macro avg       0.46      0.50      0.48     21898
weighted avg       0.83      0.91      0.87     21898

Confusion_matrix
[[19979     1]
 [ 1918     0]]
done in 20.361155s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19972


threshold:0.2, J-value:0.27399999999999997
threshold:0.30000000000000004, J-value:0.151
threshold:0.4, J-value:0.07
threshold:0.5, J-value:0.035
threshold:0.6000000000000001, J-value:0.013000000000000001
threshold:0.7000000000000001, J-value:0.003
threshold:0.8, J-value:0.001
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.700518479898042
Balanced accuracy score of test is  0.6982577269652072
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.39199999999999996
threshold:0.2, J-value:0.29600000000000004
threshold:0.30000000000000004, J-value:0.16799999999999998
threshold:0.4, J-value:0.067
threshold:0.5, J-value:0.030000000000000002
threshold:0.6000000000000001, J-value:0.012
threshold:0.7000000000000001, J-value:0.001
threshold:0.8, J-value:0.001
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6960358874926682
Balanced accuracy score of test is  0.6837357534091995
threshold:0.0, J

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25961984362013407
0.2604168124547779
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19950
           1       0.47      0.04      0.07      1948

    accuracy                           0.91     21898
   macro avg       0.69      0.52      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19872    78]
 [ 1878    70]]
done in 0.973107s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25961984362013407
0.2627724396460385
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19883
           1       0.50      0.03      0.06      2015

    accuracy                           0.91     21898
   macro avg       0.71      0.51      0.50     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19821    62]
 [ 1953    62]]
done in 1.013887s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25961984362013407
0.3015634591817161
Classification report
              precision    recall  f1-score   support

           0       0.89      1.00      0.94      6895
           1       0.48      0.03      0.06       848

    accuracy                           0.89      7743
   macro avg       0.69      0.51      0.50      7743
weighted avg       0.85      0.89      0.85      7743

Confusion_matrix
[[6864   31]
 [ 819   29]]
done in 1.029789s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25961984362013407
0.30557009379295047
Classification report
              precision    recall  f1-score   support

           0       0.89      1.00      0.94      6860
           1       0.44      0.03      0.05       880

    accuracy                           0.89      7740
   macro avg       0.67      0.51      0.50      7740
weighted avg       0.84      0.89      0.84      7740

Confusion_matrix
[[6830   30]
 [ 856   24]]
done in 1.245340s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25961984362013407
0.2379089717195831
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96     13055
           1       0.47      0.04      0.07      1100

    accuracy                           0.92     14155
   macro avg       0.70      0.52      0.51     14155
weighted avg       0.89      0.92      0.89     14155

Confusion_matrix
[[13008    47]
 [ 1059    41]]
done in 1.148134s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25961984362013407
0.23937550200674634
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96     13023
           1       0.54      0.03      0.06      1135

    accuracy                           0.92     14158
   macro avg       0.73      0.52      0.51     14158
weighted avg       0.89      0.92      0.89     14158

Confusion_matrix
[[12991    32]
 [ 1097    38]]
done in 1.428525s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19950
           1       0.54      0.00      0.01      1948

    accuracy                           0.91     21898
   macro avg       0.72      0.50      0.48     21898
weighted avg       0.88      0.91      0.87     21898

Confusion_matrix
[[19944     6]
 [ 1941     7]]
done in 20.448980s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     1988

  _warn_prf(average, modifier, msg_start, len(result))


0.264372916262498
0.2794088259193761
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19883
           1       0.00      0.00      0.00      2015

    accuracy                           0.91     21898
   macro avg       0.45      0.50      0.48     21898
weighted avg       0.82      0.91      0.86     21898

Confusion_matrix
[[19879     4]
 [ 2015     0]]
done in 0.654952s
0.264372916262498
0.31187540312420786
Classification report
              precision    recall  f1-score   support

           0       0.89      1.00      0.94      6895
           1       0.00      0.00      0.00       848

    accuracy                           0.89      7743
   macro avg       0.45      0.50      0.47      7743
weighted avg       0.79      0.89      0.84      7743

Confusion_matrix
[[6895    0]
 [ 848    0]]
done in 0.619164s


  _warn_prf(average, modifier, msg_start, len(result))


0.264372916262498
0.3392184415550595
Classification report
              precision    recall  f1-score   support

           0       0.89      1.00      0.94      6860
           1       0.00      0.00      0.00       880

    accuracy                           0.89      7740
   macro avg       0.44      0.50      0.47      7740
weighted avg       0.79      0.89      0.83      7740

Confusion_matrix
[[6855    5]
 [ 880    0]]
done in 0.604982s
0.264372916262498
0.24493312822456925
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96     13055
           1       0.00      0.00      0.00      1100

    accuracy                           0.92     14155
   macro avg       0.46      0.50      0.48     14155
weighted avg       0.85      0.92      0.89     14155

Confusion_matrix
[[13055     0]
 [ 1100     0]]
done in 0.614657s


  _warn_prf(average, modifier, msg_start, len(result))


0.264372916262498
0.249151187225685
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96     13023
           1       0.00      0.00      0.00      1135

    accuracy                           0.92     14158
   macro avg       0.46      0.50      0.48     14158
weighted avg       0.85      0.92      0.88     14158

Confusion_matrix
[[13023     0]
 [ 1135     0]]
done in 0.615125s


  _warn_prf(average, modifier, msg_start, len(result))


Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19950
           1       0.40      0.02      0.03      1948

    accuracy                           0.91     21898
   macro avg       0.65      0.51      0.49     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19898    52]
 [ 1914    34]]
done in 33.454288s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19883
           1       0.45      0.02      0.03      2015

    accuracy                           0.91     21898
   macro avg       0.68      0.51      0.49     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19844    39]
 [ 1983    32]]
done in 33.266353s
Classification report
              precision    recall  f1-score   support

           0       0.89      1.00      0.94      6895
           1       0.39      0.01    

threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.40800000000000003
threshold:0.2, J-value:0.254
threshold:0.30000000000000004, J-value:0.131
threshold:0.4, J-value:0.054
threshold:0.5, J-value:0.017
threshold:0.6000000000000001, J-value:0.005
threshold:0.7000000000000001, J-value:0.001
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.7039857595487622
Balanced accuracy score of test is  0.7076862656749952
True positive rate of class 1 is  0.75
True positive rate of class 2 is  0.648
Positive prediction rate of class 1 is  0.388
Positive prediction rate of class 2 is  0.266
X train 65694
Y train 65694
21898 7751 14147
21898 7751 14147
21898 7757 14141
21898 7757 14141


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2591785464035789
0.2630908466896804
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19908
           1       0.48      0.04      0.07      1990

    accuracy                           0.91     21898
   macro avg       0.70      0.52      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19824    84]
 [ 1913    77]]
done in 1.103293s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2591785464035789
0.26119087323907286
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19918
           1       0.45      0.03      0.06      1980

    accuracy                           0.91     21898
   macro avg       0.68      0.51      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19836    82]
 [ 1914    66]]
done in 1.078993s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2591785464035789
0.3015330765425016
Classification report
              precision    recall  f1-score   support

           0       0.89      0.99      0.94      6908
           1       0.45      0.04      0.07       843

    accuracy                           0.89      7751
   macro avg       0.67      0.52      0.50      7751
weighted avg       0.85      0.89      0.85      7751

Confusion_matrix
[[6872   36]
 [ 813   30]]
done in 1.266868s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2591785464035789
0.30306864427481933
Classification report
              precision    recall  f1-score   support

           0       0.89      0.99      0.94      6894
           1       0.43      0.03      0.06       863

    accuracy                           0.89      7757
   macro avg       0.66      0.51      0.50      7757
weighted avg       0.84      0.89      0.84      7757

Confusion_matrix
[[6855   39]
 [ 833   30]]
done in 1.133346s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2591785464035789
0.24202873291367014
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96     13000
           1       0.49      0.04      0.08      1147

    accuracy                           0.92     14147
   macro avg       0.71      0.52      0.52     14147
weighted avg       0.89      0.92      0.89     14147

Confusion_matrix
[[12952    48]
 [ 1100    47]]
done in 1.012906s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2591785464035789
0.23821895683116076
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96     13024
           1       0.46      0.03      0.06      1117

    accuracy                           0.92     14141
   macro avg       0.69      0.51      0.51     14141
weighted avg       0.89      0.92      0.89     14141

Confusion_matrix
[[12981    43]
 [ 1081    36]]
done in 0.995477s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19908
           1       0.56      0.01      0.01      1990

    accuracy                           0.91     21898
   macro avg       0.73      0.50      0.48     21898
weighted avg       0.88      0.91      0.87     21898

Confusion_matrix
[[19900     8]
 [ 1980    10]]
done in 19.811606s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19918

  _warn_prf(average, modifier, msg_start, len(result))


0.26428564201629917
0.25009370971580136
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96     13000
           1       0.00      0.00      0.00      1147

    accuracy                           0.92     14147
   macro avg       0.46      0.50      0.48     14147
weighted avg       0.84      0.92      0.88     14147

Confusion_matrix
[[12998     2]
 [ 1147     0]]
done in 0.625333s
0.26428564201629917
0.24877068299137428
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96     13024
           1       0.00      0.00      0.00      1117

    accuracy                           0.92     14141
   macro avg       0.46      0.50      0.48     14141
weighted avg       0.85      0.92      0.88     14141

Confusion_matrix
[[13021     3]
 [ 1117     0]]
done in 0.629559s
Classification report
              precision    recall  f1-score   support

           0 

True positive rate of class 1 is  0.689
True positive rate of class 2 is  0.615
Positive prediction rate of class 1 is  0.378
Positive prediction rate of class 2 is  0.286
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.397
threshold:0.2, J-value:0.27299999999999996
threshold:0.30000000000000004, J-value:0.148
threshold:0.4, J-value:0.058
threshold:0.5, J-value:0.020999999999999998
threshold:0.6000000000000001, J-value:0.002
threshold:0.7000000000000001, J-value:0.0
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6985210612031425
Balanced accuracy score of test is  0.7106769573432893
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.385
threshold:0.2, J-value:0.275
threshold:0.30000000000000004, J-value:0.165
threshold:0.4, J-value:0.05800000000000001
threshold:0.5, J-value:0.023
threshold:0.6000000000000001, J-value:0.004
threshold:0.7000000000000001, J-value:0.0
threshold:0.8, J-value:0.0
threshold:

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26348190676177874
0.2545484519972916
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.95     20006
           1       0.41      0.03      0.06      1892

    accuracy                           0.91     21898
   macro avg       0.66      0.52      0.51     21898
weighted avg       0.87      0.91      0.88     21898

Confusion_matrix
[[19910    96]
 [ 1826    66]]
done in 1.115174s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26348190676177874
0.2562989959857017
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.95     19975
           1       0.47      0.04      0.07      1923

    accuracy                           0.91     21898
   macro avg       0.69      0.52      0.51     21898
weighted avg       0.88      0.91      0.88     21898

Confusion_matrix
[[19886    89]
 [ 1845    78]]
done in 1.218992s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26348190676177874
0.2901214376266646
Classification report
              precision    recall  f1-score   support

           0       0.90      0.99      0.94      7042
           1       0.35      0.04      0.06       800

    accuracy                           0.89      7842
   macro avg       0.63      0.51      0.50      7842
weighted avg       0.84      0.89      0.85      7842

Confusion_matrix
[[6990   52]
 [ 772   28]]
done in 1.244796s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26348190676177874
0.29051238587003203
Classification report
              precision    recall  f1-score   support

           0       0.90      0.99      0.94      6943
           1       0.47      0.04      0.08       816

    accuracy                           0.89      7759
   macro avg       0.68      0.52      0.51      7759
weighted avg       0.85      0.89      0.85      7759

Confusion_matrix
[[6904   39]
 [ 782   34]]
done in 1.095844s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26348190676177874
0.2347018844599024
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96     12964
           1       0.46      0.03      0.06      1092

    accuracy                           0.92     14056
   macro avg       0.69      0.52      0.51     14056
weighted avg       0.89      0.92      0.89     14056

Confusion_matrix
[[12920    44]
 [ 1054    38]]
done in 1.140017s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26348190676177874
0.2375238568589941
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96     13032
           1       0.47      0.04      0.07      1107

    accuracy                           0.92     14139
   macro avg       0.70      0.52      0.52     14139
weighted avg       0.89      0.92      0.89     14139

Confusion_matrix
[[12982    50]
 [ 1063    44]]
done in 1.342537s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     20006
           1       0.40      0.00      0.00      1892

    accuracy                           0.91     21898
   macro avg       0.66      0.50      0.48     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[20000     6]
 [ 1888     4]]
done in 20.277069s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19975

threshold:0.2, J-value:0.267
threshold:0.30000000000000004, J-value:0.148
threshold:0.4, J-value:0.07
threshold:0.5, J-value:0.030000000000000002
threshold:0.6000000000000001, J-value:0.011
threshold:0.7000000000000001, J-value:0.002
threshold:0.8, J-value:0.001
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.7025598979925474
Balanced accuracy score of test is  0.7058304940457943
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.37499999999999994
threshold:0.2, J-value:0.265
threshold:0.30000000000000004, J-value:0.15
threshold:0.4, J-value:0.059
threshold:0.5, J-value:0.028000000000000004
threshold:0.6000000000000001, J-value:0.007
threshold:0.7000000000000001, J-value:0.001
threshold:0.8, J-value:0.001
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6878731184322635
Balanced accuracy score of test is  0.6978600078228037
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.415000

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25891442369580603
0.2639875999666128
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19911
           1       0.43      0.03      0.06      1987

    accuracy                           0.91     21898
   macro avg       0.67      0.51      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19823    88]
 [ 1921    66]]
done in 1.166254s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25891442369580603
0.26099789634872095
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19927
           1       0.44      0.03      0.06      1971

    accuracy                           0.91     21898
   macro avg       0.68      0.51      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19850    77]
 [ 1910    61]]
done in 1.153343s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25891442369580603
0.2968648033990215
Classification report
              precision    recall  f1-score   support

           0       0.90      0.99      0.94      6979
           1       0.38      0.03      0.05       835

    accuracy                           0.89      7814
   macro avg       0.64      0.51      0.50      7814
weighted avg       0.84      0.89      0.85      7814

Confusion_matrix
[[6943   36]
 [ 813   22]]
done in 1.080049s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25891442369580603
0.30226197387542697
Classification report
              precision    recall  f1-score   support

           0       0.89      1.00      0.94      6927
           1       0.46      0.03      0.06       851

    accuracy                           0.89      7778
   macro avg       0.68      0.51      0.50      7778
weighted avg       0.85      0.89      0.85      7778

Confusion_matrix
[[6895   32]
 [ 824   27]]
done in 1.107181s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25891442369580603
0.2457468680991858
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96     12932
           1       0.46      0.04      0.07      1152

    accuracy                           0.92     14084
   macro avg       0.69      0.52      0.51     14084
weighted avg       0.88      0.92      0.88     14084

Confusion_matrix
[[12880    52]
 [ 1108    44]]
done in 1.098693s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25891442369580603
0.23826758508790516
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96     13000
           1       0.43      0.03      0.06      1120

    accuracy                           0.92     14120
   macro avg       0.68      0.51      0.51     14120
weighted avg       0.88      0.92      0.89     14120

Confusion_matrix
[[12955    45]
 [ 1086    34]]
done in 1.032431s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19911
           1       0.50      0.00      0.01      1987

    accuracy                           0.91     21898
   macro avg       0.70      0.50      0.48     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19906     5]
 [ 1982     5]]
done in 20.019800s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     1992

threshold:0.2, J-value:0.249
threshold:0.30000000000000004, J-value:0.146
threshold:0.4, J-value:0.07200000000000001
threshold:0.5, J-value:0.029
threshold:0.6000000000000001, J-value:0.013000000000000001
threshold:0.7000000000000001, J-value:0.006
threshold:0.8, J-value:0.002
threshold:0.9, J-value:0.001
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6934189427805268
Balanced accuracy score of test is  0.7051436500201891
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.39299999999999996
threshold:0.2, J-value:0.268
threshold:0.30000000000000004, J-value:0.143
threshold:0.4, J-value:0.066
threshold:0.5, J-value:0.020999999999999998
threshold:0.6000000000000001, J-value:0.012
threshold:0.7000000000000001, J-value:0.006
threshold:0.8, J-value:0.002
threshold:0.9, J-value:0.001
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6966550635653753
Balanced accuracy score of test is  0.698471316704318
threshold:0.0, J-value:0.0
threshold:0.

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26184366533688136
0.2579746772215873
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19944
           1       0.43      0.03      0.06      1954

    accuracy                           0.91     21898
   macro avg       0.67      0.51      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19857    87]
 [ 1889    65]]
done in 1.119402s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26184366533688136
0.25872967546761316
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19955
           1       0.48      0.04      0.07      1943

    accuracy                           0.91     21898
   macro avg       0.70      0.52      0.51     21898
weighted avg       0.88      0.91      0.87     21898

Confusion_matrix
[[19875    80]
 [ 1869    74]]
done in 1.086002s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26184366533688136
0.3015232464352372
Classification report
              precision    recall  f1-score   support

           0       0.89      0.99      0.94      6814
           1       0.37      0.03      0.05       830

    accuracy                           0.89      7644
   macro avg       0.63      0.51      0.49      7644
weighted avg       0.84      0.89      0.84      7644

Confusion_matrix
[[6778   36]
 [ 809   21]]
done in 1.067224s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26184366533688136
0.296342785357318
Classification report
              precision    recall  f1-score   support

           0       0.89      0.99      0.94      7022
           1       0.48      0.04      0.08       859

    accuracy                           0.89      7881
   macro avg       0.69      0.52      0.51      7881
weighted avg       0.85      0.89      0.85      7881

Confusion_matrix
[[6984   38]
 [ 824   35]]
done in 1.156510s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26184366533688136
0.2346208633399303
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96     13130
           1       0.46      0.04      0.07      1124

    accuracy                           0.92     14254
   macro avg       0.69      0.52      0.52     14254
weighted avg       0.89      0.92      0.89     14254

Confusion_matrix
[[13079    51]
 [ 1080    44]]
done in 1.040682s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26184366533688136
0.23758186073972826
Classification report
              precision    recall  f1-score   support

           0       0.93      1.00      0.96     12933
           1       0.48      0.04      0.07      1084

    accuracy                           0.92     14017
   macro avg       0.70      0.52      0.51     14017
weighted avg       0.89      0.92      0.89     14017

Confusion_matrix
[[12891    42]
 [ 1045    39]]
done in 1.090993s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19944
           1       0.42      0.00      0.01      1954

    accuracy                           0.91     21898
   macro avg       0.66      0.50      0.48     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19937     7]
 [ 1949     5]]
done in 19.636623s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     1995

threshold:0.2, J-value:0.264
threshold:0.30000000000000004, J-value:0.14
threshold:0.4, J-value:0.07
threshold:0.5, J-value:0.029
threshold:0.6000000000000001, J-value:0.009999999999999998
threshold:0.7000000000000001, J-value:0.002
threshold:0.8, J-value:0.001
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.7076613391600883
Balanced accuracy score of test is  0.7040711131698406
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.38000000000000006
threshold:0.2, J-value:0.25
threshold:0.30000000000000004, J-value:0.134
threshold:0.4, J-value:0.059
threshold:0.5, J-value:0.02
threshold:0.6000000000000001, J-value:0.004
threshold:0.7000000000000001, J-value:0.002
threshold:0.8, J-value:0.001
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6899820002051058
Balanced accuracy score of test is  0.7129958099424095
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.43000000000000005
thres

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.258519738060419
0.2637604789981243
Classification report
              precision    recall  f1-score   support

           0       0.91      0.99      0.95     19893
           1       0.39      0.03      0.06      2005

    accuracy                           0.91     21898
   macro avg       0.65      0.51      0.51     21898
weighted avg       0.86      0.91      0.87     21898

Confusion_matrix
[[19790   103]
 [ 1939    66]]
done in 1.083304s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.258519738060419
0.26258995883227126
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19907
           1       0.47      0.03      0.06      1991

    accuracy                           0.91     21898
   macro avg       0.69      0.51      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19832    75]
 [ 1924    67]]
done in 1.222680s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.258519738060419
0.305855997808892
Classification report
              precision    recall  f1-score   support

           0       0.89      0.99      0.94      6922
           1       0.31      0.02      0.04       873

    accuracy                           0.88      7795
   macro avg       0.60      0.51      0.49      7795
weighted avg       0.83      0.88      0.84      7795

Confusion_matrix
[[6876   46]
 [ 852   21]]
done in 1.105272s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.258519738060419
0.30629268702943246
Classification report
              precision    recall  f1-score   support

           0       0.89      1.00      0.94      6882
           1       0.47      0.03      0.06       880

    accuracy                           0.89      7762
   macro avg       0.68      0.51      0.50      7762
weighted avg       0.84      0.89      0.84      7762

Confusion_matrix
[[6851   31]
 [ 852   28]]
done in 1.098124s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.258519738060419
0.24049347416724204
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96     12971
           1       0.44      0.04      0.07      1132

    accuracy                           0.92     14103
   macro avg       0.68      0.52      0.52     14103
weighted avg       0.88      0.92      0.89     14103

Confusion_matrix
[[12914    57]
 [ 1087    45]]
done in 1.104129s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.258519738060419
0.23859303068665969
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96     13025
           1       0.47      0.04      0.07      1111

    accuracy                           0.92     14136
   macro avg       0.70      0.52      0.51     14136
weighted avg       0.89      0.92      0.89     14136

Confusion_matrix
[[12981    44]
 [ 1072    39]]
done in 1.314383s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19893
           1       0.67      0.00      0.01      2005

    accuracy                           0.91     21898
   macro avg       0.79      0.50      0.48     21898
weighted avg       0.89      0.91      0.87     21898

Confusion_matrix
[[19889     4]
 [ 1997     8]]
done in 20.264659s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19907


threshold:0.2, J-value:0.26599999999999996
threshold:0.30000000000000004, J-value:0.146
threshold:0.4, J-value:0.069
threshold:0.5, J-value:0.028
threshold:0.6000000000000001, J-value:0.008
threshold:0.7000000000000001, J-value:0.006
threshold:0.8, J-value:0.001
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.700800028280979
Balanced accuracy score of test is  0.6949334495812358
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.39299999999999996
threshold:0.2, J-value:0.26
threshold:0.30000000000000004, J-value:0.13799999999999998
threshold:0.4, J-value:0.063
threshold:0.5, J-value:0.017
threshold:0.6000000000000001, J-value:0.006
threshold:0.7000000000000001, J-value:0.003
threshold:0.8, J-value:0.001
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6962299264625331
Balanced accuracy score of test is  0.6943412987767826
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.396
thre

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2600573491720898
0.26247496339359394
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19931
           1       0.51      0.04      0.08      1967

    accuracy                           0.91     21898
   macro avg       0.71      0.52      0.52     21898
weighted avg       0.88      0.91      0.87     21898

Confusion_matrix
[[19851    80]
 [ 1883    84]]
done in 1.094696s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2600573491720898
0.2594083793707783
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19956
           1       0.46      0.04      0.07      1942

    accuracy                           0.91     21898
   macro avg       0.69      0.52      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19867    89]
 [ 1866    76]]
done in 1.308936s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2600573491720898
0.3072785126223686
Classification report
              precision    recall  f1-score   support

           0       0.89      0.99      0.94      6937
           1       0.53      0.05      0.10       867

    accuracy                           0.89      7804
   macro avg       0.71      0.52      0.52      7804
weighted avg       0.85      0.89      0.85      7804

Confusion_matrix
[[6897   40]
 [ 821   46]]
done in 1.161553s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2600573491720898
0.29697382173014536
Classification report
              precision    recall  f1-score   support

           0       0.90      0.99      0.94      7008
           1       0.48      0.04      0.07       841

    accuracy                           0.89      7849
   macro avg       0.69      0.52      0.51      7849
weighted avg       0.85      0.89      0.85      7849

Confusion_matrix
[[6972   36]
 [ 808   33]]
done in 1.116300s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2600573491720898
0.2376667543556092
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96     12994
           1       0.49      0.03      0.06      1100

    accuracy                           0.92     14094
   macro avg       0.71      0.52      0.51     14094
weighted avg       0.89      0.92      0.89     14094

Confusion_matrix
[[12954    40]
 [ 1062    38]]
done in 1.042694s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2600573491720898
0.23842103813092694
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96     12948
           1       0.45      0.04      0.07      1101

    accuracy                           0.92     14049
   macro avg       0.69      0.52      0.52     14049
weighted avg       0.89      0.92      0.89     14049

Confusion_matrix
[[12895    53]
 [ 1058    43]]
done in 1.046195s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19931
           1       0.27      0.00      0.00      1967

    accuracy                           0.91     21898
   macro avg       0.59      0.50      0.48     21898
weighted avg       0.85      0.91      0.87     21898

Confusion_matrix
[[19920    11]
 [ 1963     4]]
done in 20.064865s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19956

threshold:0.2, J-value:0.254
threshold:0.30000000000000004, J-value:0.148
threshold:0.4, J-value:0.067
threshold:0.5, J-value:0.03899999999999999
threshold:0.6000000000000001, J-value:0.011
threshold:0.7000000000000001, J-value:0.003
threshold:0.8, J-value:0.002
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6972065190744368
Balanced accuracy score of test is  0.6992557932291412
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.37400000000000005
threshold:0.2, J-value:0.26
threshold:0.30000000000000004, J-value:0.16
threshold:0.4, J-value:0.06999999999999999
threshold:0.5, J-value:0.047
threshold:0.6000000000000001, J-value:0.01
threshold:0.7000000000000001, J-value:0.004
threshold:0.8, J-value:0.002
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6870177286798853
Balanced accuracy score of test is  0.6933577525125014
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.396
thres

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2603858817110064
0.25506577819525156
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.95     19973
           1       0.53      0.04      0.08      1925

    accuracy                           0.91     21898
   macro avg       0.72      0.52      0.51     21898
weighted avg       0.88      0.91      0.88     21898

Confusion_matrix
[[19904    69]
 [ 1847    78]]
done in 0.564105s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2603858817110064
0.26527655323060295
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19866
           1       0.45      0.04      0.07      2032

    accuracy                           0.91     21898
   macro avg       0.68      0.52      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19777    89]
 [ 1960    72]]
done in 0.567468s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2603858817110064
0.28404033314363986
Classification report
              precision    recall  f1-score   support

           0       0.90      1.00      0.95      6911
           1       0.52      0.04      0.07       785

    accuracy                           0.90      7696
   macro avg       0.71      0.52      0.51      7696
weighted avg       0.86      0.90      0.86      7696

Confusion_matrix
[[6882   29]
 [ 754   31]]
done in 0.558458s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2603858817110064
0.30511993142195837
Classification report
              precision    recall  f1-score   support

           0       0.89      0.99      0.94      6966
           1       0.41      0.04      0.07       874

    accuracy                           0.89      7840
   macro avg       0.65      0.51      0.50      7840
weighted avg       0.84      0.89      0.84      7840

Confusion_matrix
[[6919   47]
 [ 842   32]]
done in 0.616953s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2603858817110064
0.23936459703183824
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96     13062
           1       0.54      0.04      0.08      1140

    accuracy                           0.92     14202
   macro avg       0.73      0.52      0.52     14202
weighted avg       0.89      0.92      0.89     14202

Confusion_matrix
[[13022    40]
 [ 1093    47]]
done in 0.567110s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2603858817110064
0.24305631670903327
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96     12900
           1       0.49      0.03      0.06      1158

    accuracy                           0.92     14058
   macro avg       0.70      0.52      0.51     14058
weighted avg       0.88      0.92      0.88     14058

Confusion_matrix
[[12858    42]
 [ 1118    40]]
done in 0.562716s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19973
           1       0.58      0.00      0.01      1925

    accuracy                           0.91     21898
   macro avg       0.75      0.50      0.48     21898
weighted avg       0.88      0.91      0.87     21898

Confusion_matrix
[[19968     5]
 [ 1918     7]]
done in 17.634176s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19866

threshold:0.2, J-value:0.264
threshold:0.30000000000000004, J-value:0.159
threshold:0.4, J-value:0.08
threshold:0.5, J-value:0.038
threshold:0.6000000000000001, J-value:0.012
threshold:0.7000000000000001, J-value:0.005
threshold:0.8, J-value:0.001
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.7040064346608181
Balanced accuracy score of test is  0.7045495667428464
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.40399999999999997
threshold:0.2, J-value:0.272
threshold:0.30000000000000004, J-value:0.162
threshold:0.4, J-value:0.089
threshold:0.5, J-value:0.035
threshold:0.6000000000000001, J-value:0.013999999999999999
threshold:0.7000000000000001, J-value:0.005
threshold:0.8, J-value:0.001
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.7022537688002234
Balanced accuracy score of test is  0.7025041867297912
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.4
threshold:0.2, J-v

In [21]:
def add_mean_sd(records, result_table, overall_records, type):
    records.append({
        'auroc': result_table["auroc"].mean(),
        'overall threshold': result_table["overall threshold"].mean(),
        'male threshold': result_table["male threshold"].mean(),
        'female threshold': result_table["female threshold"].mean(),
        'overall ba validation': result_table["overall ba validation"].mean(),
        'overall ba test': result_table["overall ba test"].mean(),
        'male ba validation': result_table["male ba validation"].mean(),
        'male ba test': result_table["male ba test"].mean(),
        'female ba validation': result_table["female ba validation"].mean(),
        'female ba test': result_table["female ba test"].mean(),
        'overall precision':result_table["overall precision"].mean(),
        'overall recall':result_table["overall recall"].mean(),
        'overall tpr':result_table["overall tpr"].mean(),
        'overall tnr':result_table["overall tnr"].mean(),
        'overall pd':result_table["overall pd"].mean(),
        'male precision':result_table["male precision"].mean(),
        'male recall':result_table["male recall"].mean(),
        'male tpr':result_table["male tpr"].mean(),
        'male tnr':result_table["male tnr"].mean(),
        'male pd':result_table["male pd"].mean(),
        'female precision':result_table["female precision"].mean(),
        'female recall':result_table["female recall"].mean(),
        'female tpr':result_table["female tpr"].mean(),
        'female tnr':result_table["female tnr"].mean(),
        'female pd':result_table["female pd"].mean(),
        'eod': result_table["eod"].mean(),
        'di': result_table["di"].mean(),
        })
    records.append({
        'auroc': result_table["auroc"].std(),
        'overall threshold': result_table["overall threshold"].std(),
        'male threshold': result_table["male threshold"].std(),
        'female threshold': result_table["female threshold"].std(),
        'overall ba validation': result_table["overall ba validation"].std(),
        'overall ba test': result_table["overall ba test"].std(),
        'male ba validation': result_table["male ba validation"].std(),
        'male ba test': result_table["male ba test"].std(),
        'female ba validation': result_table["female ba validation"].std(),
        'female ba test': result_table["female ba test"].std(),
        'overall precision':result_table["overall precision"].std(),
        'overall recall':result_table["overall recall"].std(),
        'overall tpr':result_table["overall tpr"].std(),
        'overall tnr':result_table["overall tnr"].std(),
        'overall pd':result_table["overall pd"].std(),
        'male precision':result_table["male precision"].std(),
        'male recall':result_table["male recall"].std(),
        'male tpr':result_table["male tpr"].std(),
        'male tnr':result_table["male tnr"].std(),
        'male pd':result_table["male pd"].std(),
        'female precision':result_table["female precision"].std(),
        'female recall':result_table["female recall"].std(),
        'female tpr':result_table["female tpr"].std(),
        'female tnr':result_table["female tnr"].std(),
        'female pd':result_table["female pd"].std(),
        'eod': result_table["eod"].std(),
        'di': result_table["di"].std(),
        })
    overall_records.append({
        'type': type,
        'auroc': result_table["auroc"].mean(),
        'overall threshold': result_table["overall threshold"].mean(),
        'male threshold': result_table["male threshold"].mean(),
        'female threshold': result_table["female threshold"].mean(),
        'overall ba test': result_table["overall ba test"].mean(),
        'male ba test': result_table["male ba test"].mean(),
        'female ba test': result_table["female ba test"].mean(),
        'overall tpr':result_table["overall tpr"].mean(),
        'overall pd':result_table["overall pd"].mean(),
        'male tpr':result_table["male tpr"].mean(),
        'male pd':result_table["male pd"].mean(),
        'female tpr':result_table["female tpr"].mean(),
        'female pd':result_table["female pd"].mean(),
        'eod': result_table["eod"].mean(),
        'di': result_table["di"].mean(),
        })
    pd_result = pd.DataFrame(records)
    return pd_result, overall_records

In [22]:
overall_table = []
result_lr, overall_records = add_mean_sd (records_lr, result_lr, overall_table, 'lr')
result_rf, overall_records = add_mean_sd (records_rf, result_rf, overall_records, 'rf')
result_dt, overall_records = add_mean_sd (records_dt, result_dt, overall_records, 'dt')
result_gbt, overall_records = add_mean_sd (records_gbt, result_gbt, overall_records, 'gbt')

result_path='/Users/lifuchen/Desktop/research/resample_data/'
result_lr.to_csv(path.join(result_path,'gender-lr-result.csv'), index=False)
result_rf.to_csv(path.join(result_path,'gender-rf-result.csv'), index=False)
result_dt.to_csv(path.join(result_path,'gender-dt-result.csv'), index=False)
result_gbt.to_csv(path.join(result_path,'gender-gbt-result.csv'), index=False)

overall_result = pd.DataFrame(overall_table)
result_path='/Users/lifuchen/Desktop/research/resample_result/'
overall_result.to_csv(path.join(result_path,'gender.csv'), index=False)
