In [1]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from os import path
import imblearn
from imblearn.over_sampling import RandomOverSampler, SMOTE
from sklearn.metrics import balanced_accuracy_score, roc_auc_score
import sklearn.preprocessing
from sklearn import preprocessing
from sklearn.preprocessing import Normalizer
import src.lib.utility_classfier as uclf
import src.lib.optimal_threhold_related as thres
import src.lib.fairness_tests as fair

In [2]:
data_path='/Users/lifuchen/Desktop/research/data.csv'
df = pd.read_csv(data_path)

In [3]:
y = df.Class.values
# don't drop 'Race_B' here, so race information is still included when training the model
X = df.drop(['GRID','Class'], axis=1)
X.shape

(109490, 87)

In [4]:
def save_prediction(classifier, characteristic, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_white_scaled, y_val_white, X_test_white_scaled, y_test_white, X_val_black_scaled, y_val_black, X_test_black_scaled, y_test_black):
    method_to_call = getattr(uclf, classifier)
    y_val_score = method_to_call(X_train_scaled, y_train,X_val_scaled, y_val)
    y_test_score = method_to_call(X_train_scaled, y_train,X_test_scaled, y_test)

    y_val_score_white = method_to_call(X_train_scaled, y_train, X_val_white_scaled, y_val_white)
    y_test_score_white = method_to_call(X_train_scaled, y_train,X_test_white_scaled, y_test_white)

    y_val_score_black = method_to_call(X_train_scaled, y_train, X_val_black_scaled, y_val_black)
    y_test_score_black = method_to_call(X_train_scaled, y_train,X_test_black_scaled, y_test_black)

    my_dict = dict(val_score = y_val_score, test_score = y_test_score, val_1_score = y_val_score_white, test_1_score = y_test_score_white, val_2_score = y_val_score_black, test_2_score = y_test_score_black)
    overall_prediction = pd.DataFrame.from_dict(my_dict, orient='index')
    overall_prediction = overall_prediction.transpose()

    result_path='/Users/lifuchen/Desktop/research/predictions/'
    filename = str(classifier) + str(characteristic) + "prediction.csv"
    overall_prediction.to_csv(path.join(result_path, filename), index=False)

In [5]:
def get_result (classifier, characteristic, records, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_white_scaled, y_val_white, X_test_white_scaled, y_test_white, X_val_black_scaled, y_val_black, X_test_black_scaled, y_test_black):        
    result_path='/Users/lifuchen/Desktop/research/predictions/'
    filename = str(classifier) + characteristic + "prediction.csv"
    prediction = pd.read_csv(path.join(result_path, filename))
    
    y_val_score = prediction['val_score'][prediction['val_score'].notna()]
    y_test_score = prediction['test_score'][prediction['test_score'].notna()]
    
    y_val_score_white = prediction['val_1_score'][prediction['val_1_score'].notna()]
    y_test_score_white = prediction['test_1_score'][prediction['test_1_score'].notna()]
    
    y_val_score_black = prediction['val_2_score'][prediction['val_2_score'].notna()]
    y_test_score_black = prediction['test_2_score'][prediction['test_2_score'].notna()]
    
    threshold, ba_val, ba_test = balance_accuracy (y_val, y_val_score,y_test, y_test_score)
    auroc = roc_auc_score(y_test, y_test_score)
    precision, recall, tpr, tnr, pd_overall = thres.calculate_precision_metrics(y_test, y_test_score,threshold)
    
    threshold_white, ba_val_white, ba_test_white = balance_accuracy (y_val_white, y_val_score_white,y_test_white, y_test_score_white)
    precision_white, recall_white, tpr_white, tnr_white, pd_white = thres.calculate_precision_metrics(y_test_white, y_test_score_white,threshold_white)
    
    threshold_black, ba_val_black, ba_test_black = balance_accuracy (y_val_black, y_val_score_black, y_test_black, y_test_score_black)
    precision_black, recall_black, tpr_black, tnr_black, pd_black = thres.calculate_precision_metrics(y_test_black, y_test_score_black,threshold_black)

    eod = fair.get_EOD(y_test_white, y_test_score_white,threshold_white, y_test_black, y_test_score_black, threshold_black)
    sp = fair.get_SP(y_test_white, y_test_score_white,threshold_white, y_test_black, y_test_score_black, threshold_black)

    records.append({
        'auroc': auroc,
        'overall threshold': threshold,
        'white threshold': threshold_white,
        'black threshold': threshold_black,
        'overall ba validation': ba_val,
        'overall ba test': ba_test,
        'white ba validation': ba_val_white,
        'white ba test': ba_test_white,
        'black ba validation': ba_val_black,
        'black ba test': ba_test_black,
        'overall precision':precision,
        'overall recall':recall,
        'overall tpr':tpr,
        'overall tnr':tnr,
        'overall pd':pd_overall,
        'white precision':precision_white,
        'white recall':recall_white,
        'white tpr':tpr_white,
        'white tnr':tnr_white,
        'white pd':pd_white,
        'black precision':precision_black,
        'black recall':recall_black,
        'black tpr':tpr_black,
        'black tnr':tnr_black,
        'black pd':pd_black,
        'eod': eod,
        'di': sp,
        })

In [6]:
def balance_accuracy (y_val, y_val_score,y_test, y_test_score):
    
    threshold, _ = thres.get_optimal_threshold_Jvalue (y_val, y_val_score)
    print ("Optimal threshold by J value is ",threshold)

    ba_val = thres.calculate_balanced_accuracy(y_val, y_val_score, threshold)
    print ("Balanced accuracy score of val is ", ba_val)

    ba_test = thres.calculate_balanced_accuracy(y_test, y_test_score, threshold)
    print ("Balanced accuracy score of test is ",ba_test)

    return threshold, ba_val, ba_test

In [7]:
def fairness_metrics (X, y, attribute, random_state):
    # call this split method that resamples by size, and drop the "attribute"
    # a copy of attribute is included in the data, so we still have them in out model
    X_train, y_train, X_val, y_val, X_test, y_test, X_val_white, X_val_black, y_val_white, y_val_black, X_test_white, X_test_black, y_test_white, y_test_black \
        = fair.split_by_trait_balance_size (X, y, attribute, random_state)
    
    print("X train", X_train.shape[0])
    print("Y train", y_train.shape[0])
    print(X_val.shape[0], X_val_white.shape[0], X_val_black.shape[0])
    print(y_val.shape[0], y_val_white.shape[0], y_val_black.shape[0])
    print(X_test.shape[0], X_test_white.shape[0], X_test_black.shape[0])
    print(y_test.shape[0], y_test_white.shape[0], y_test_black.shape[0])

    max_abs_scaler = preprocessing.MaxAbsScaler()
    X_train_scaled = max_abs_scaler.fit_transform(X_train)
    X_test_scaled = max_abs_scaler.transform(X_test)
    X_test_white_scaled = max_abs_scaler.transform(X_test_white)
    X_test_black_scaled = max_abs_scaler.transform(X_test_black)
    X_val_scaled = max_abs_scaler.transform(X_val)
    X_val_white_scaled = max_abs_scaler.transform(X_val_white)
    X_val_black_scaled = max_abs_scaler.transform(X_val_black)

    characteristic = attribute + "resample-by-size" + str(random_state)
    save_prediction ("logic_regression", characteristic, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_white_scaled, y_val_white, X_test_white_scaled, y_test_white, X_val_black_scaled, y_val_black, X_test_black_scaled, y_test_black)
    save_prediction ("random_forest", characteristic, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_white_scaled, y_val_white, X_test_white_scaled, y_test_white, X_val_black_scaled, y_val_black, X_test_black_scaled, y_test_black)
    save_prediction ("decision_tree", characteristic, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_white_scaled, y_val_white, X_test_white_scaled, y_test_white, X_val_black_scaled, y_val_black, X_test_black_scaled, y_test_black)
    save_prediction ("gradiant_boosting", characteristic, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_white_scaled, y_val_white, X_test_white_scaled, y_test_white, X_val_black_scaled, y_val_black, X_test_black_scaled, y_test_black)

    get_result ("logic_regression", characteristic, records_lr, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_white_scaled, y_val_white, X_test_white_scaled, y_test_white, X_val_black_scaled, y_val_black, X_test_black_scaled, y_test_black)
    get_result ("random_forest", characteristic, records_rf, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_white_scaled, y_val_white, X_test_white_scaled, y_test_white, X_val_black_scaled, y_val_black, X_test_black_scaled, y_test_black)
    get_result ("decision_tree", characteristic, records_dt, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_white_scaled, y_val_white, X_test_white_scaled, y_test_white, X_val_black_scaled, y_val_black, X_test_black_scaled, y_test_black)
    get_result ("gradiant_boosting", characteristic, records_gbt, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_white_scaled, y_val_white, X_test_white_scaled, y_test_white, X_val_black_scaled, y_val_black, X_test_black_scaled, y_test_black)

In [8]:
records_lr = []
records_rf = []
records_dt = []
records_gbt = []
for random_state in range(10):
    fairness_metrics (X, y, "Race_W", random_state)

result_lr = pd.DataFrame(records_lr)
result_rf = pd.DataFrame(records_rf)
result_dt = pd.DataFrame(records_dt)
result_gbt = pd.DataFrame(records_gbt)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train ['Class'] = y_train


(56639,)
(56639,)
(113278, 87)
X train 113278
Y train 113278
21898 18899 2999
21898 18899 2999
21898 18968 2930
21898 18968 2930


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.1731260358552146
0.2667016844794276
Classification report
              precision    recall  f1-score   support

           0       0.91      0.99      0.95     19904
           1       0.44      0.05      0.08      1994

    accuracy                           0.91     21898
   macro avg       0.68      0.52      0.52     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19787   117]
 [ 1901    93]]
done in 0.799961s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.1731260358552146
0.2657082130833344
Classification report
              precision    recall  f1-score   support

           0       0.91      0.99      0.95     19934
           1       0.40      0.04      0.08      1964

    accuracy                           0.91     21898
   macro avg       0.66      0.52      0.52     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19807   127]
 [ 1878    86]]
done in 0.794769s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.1731260358552146
0.26364194059681995
Classification report
              precision    recall  f1-score   support

           0       0.91      0.99      0.95     17180
           1       0.44      0.05      0.09      1719

    accuracy                           0.91     18899
   macro avg       0.68      0.52      0.52     18899
weighted avg       0.87      0.91      0.87     18899

Confusion_matrix
[[17066   114]
 [ 1628    91]]
done in 0.994599s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.1731260358552146
0.2633637462137018
Classification report
              precision    recall  f1-score   support

           0       0.91      0.99      0.95     17260
           1       0.40      0.05      0.09      1708

    accuracy                           0.91     18968
   macro avg       0.66      0.52      0.52     18968
weighted avg       0.87      0.91      0.87     18968

Confusion_matrix
[[17134   126]
 [ 1625    83]]
done in 0.874548s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.1731260358552146
0.28598347828983123
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95      2724
           1       0.40      0.01      0.01       275

    accuracy                           0.91      2999
   macro avg       0.65      0.50      0.48      2999
weighted avg       0.86      0.91      0.87      2999

Confusion_matrix
[[2721    3]
 [ 273    2]]
done in 0.788218s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.1731260358552146
0.2808856354666762
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95      2674
           1       0.75      0.01      0.02       256

    accuracy                           0.91      2930
   macro avg       0.83      0.51      0.49      2930
weighted avg       0.90      0.91      0.87      2930

Confusion_matrix
[[2673    1]
 [ 253    3]]
done in 0.885872s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19904
           1       0.33      0.00      0.00      1994

    accuracy                           0.91     21898
   macro avg       0.62      0.50      0.48     21898
weighted avg       0.86      0.91      0.87     21898

Confusion_matrix
[[19898     6]
 [ 1991     3]]
done in 36.769476s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19934
    

  _warn_prf(average, modifier, msg_start, len(result))


Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95      2674
           1       0.00      0.00      0.00       256

    accuracy                           0.91      2930
   macro avg       0.46      0.50      0.48      2930
weighted avg       0.83      0.91      0.87      2930

Confusion_matrix
[[2673    1]
 [ 256    0]]
done in 35.418866s
0.17802488713209083
0.2776372921532345
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19904
           1       0.20      0.00      0.00      1994

    accuracy                           0.91     21898
   macro avg       0.55      0.50      0.48     21898
weighted avg       0.84      0.91      0.87     21898

Confusion_matrix
[[19896     8]
 [ 1992     2]]
done in 1.330433s
0.17802488713209083
0.27594846222380204
Classification report
              precision    recall  f1-score   support

           0     

threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.41000000000000003
threshold:0.2, J-value:0.19
threshold:0.30000000000000004, J-value:0.048
threshold:0.4, J-value:0.003
threshold:0.5, J-value:0.0
threshold:0.6000000000000001, J-value:0.0
threshold:0.7000000000000001, J-value:0.0
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.7050607395541316
Balanced accuracy score of test is  0.6753342370979806
True positive rate of class 1 is  0.721
True positive rate of class 2 is  0.594
Positive prediction rate of class 1 is  0.378
Positive prediction rate of class 2 is  0.274
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.347
threshold:0.2, J-value:0.19699999999999998
threshold:0.30000000000000004, J-value:0.061
threshold:0.4, J-value:0.006999999999999999
threshold:0.5, J-value:0.001
threshold:0.6000000000000001, J-value:0.001
threshold:0.7000000000000001, J-value:0.001
threshold:0.8, J-value:0.001
threshold:0

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train ['Class'] = y_train


(56798,)
(56798,)
(113596, 87)
X train 113596
Y train 113596
21898 18825 3073
21898 18825 3073
21898 18883 3015
21898 18883 3015


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.17753872475285545
0.25986421597830484
Classification report
              precision    recall  f1-score   support

           0       0.92      0.99      0.95     19980
           1       0.40      0.05      0.09      1918

    accuracy                           0.91     21898
   macro avg       0.66      0.52      0.52     21898
weighted avg       0.87      0.91      0.88     21898

Confusion_matrix
[[19843   137]
 [ 1826    92]]
done in 0.842691s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.17753872475285545
0.2610447220990318
Classification report
              precision    recall  f1-score   support

           0       0.92      0.99      0.95     19972
           1       0.47      0.05      0.09      1926

    accuracy                           0.91     21898
   macro avg       0.69      0.52      0.52     21898
weighted avg       0.88      0.91      0.88     21898

Confusion_matrix
[[19862   110]
 [ 1827    99]]
done in 0.828663s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.17753872475285545
0.2560328910018886
Classification report
              precision    recall  f1-score   support

           0       0.92      0.99      0.95     17178
           1       0.39      0.05      0.09      1647

    accuracy                           0.91     18825
   macro avg       0.65      0.52      0.52     18825
weighted avg       0.87      0.91      0.88     18825

Confusion_matrix
[[17044   134]
 [ 1560    87]]
done in 0.827872s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.17753872475285545
0.26169989477288685
Classification report
              precision    recall  f1-score   support

           0       0.91      0.99      0.95     17196
           1       0.47      0.06      0.10      1687

    accuracy                           0.91     18883
   macro avg       0.69      0.53      0.53     18883
weighted avg       0.88      0.91      0.88     18883

Confusion_matrix
[[17087   109]
 [ 1590    97]]
done in 0.861307s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.17753872475285545
0.28333466592332157
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95      2802
           1       0.62      0.02      0.04       271

    accuracy                           0.91      3073
   macro avg       0.77      0.51      0.49      3073
weighted avg       0.89      0.91      0.87      3073

Confusion_matrix
[[2799    3]
 [ 266    5]]
done in 0.814126s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.17753872475285545
0.25694136369093723
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96      2776
           1       0.67      0.01      0.02       239

    accuracy                           0.92      3015
   macro avg       0.79      0.50      0.49      3015
weighted avg       0.90      0.92      0.88      3015

Confusion_matrix
[[2775    1]
 [ 237    2]]
done in 0.797263s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19980
           1       0.67      0.00      0.00      1918

    accuracy                           0.91     21898
   macro avg       0.79      0.50      0.48     21898
weighted avg       0.89      0.91      0.87     21898

Confusion_matrix
[[19979     1]
 [ 1916     2]]
done in 37.260652s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19972
  

  _warn_prf(average, modifier, msg_start, len(result))


0.1832928240263281
0.2772487383669442
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19980
           1       0.29      0.00      0.00      1918

    accuracy                           0.91     21898
   macro avg       0.60      0.50      0.48     21898
weighted avg       0.86      0.91      0.87     21898

Confusion_matrix
[[19975     5]
 [ 1916     2]]
done in 1.353134s
0.1832928240263281
0.2690594181589989
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19972
           1       0.00      0.00      0.00      1926

    accuracy                           0.91     21898
   macro avg       0.46      0.50      0.48     21898
weighted avg       0.83      0.91      0.87     21898

Confusion_matrix
[[19969     3]
 [ 1926     0]]
done in 1.350626s
0.1832928240263281
0.27083990057749635
Classification report
              precision    recall

  _warn_prf(average, modifier, msg_start, len(result))


0.1832928240263281
0.31693267502469796
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95      2802
           1       0.50      0.01      0.01       271

    accuracy                           0.91      3073
   macro avg       0.71      0.50      0.48      3073
weighted avg       0.88      0.91      0.87      3073

Confusion_matrix
[[2800    2]
 [ 269    2]]
done in 1.327838s
0.1832928240263281
0.269734583505693
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96      2776
           1       0.00      0.00      0.00       239

    accuracy                           0.92      3015
   macro avg       0.46      0.50      0.48      3015
weighted avg       0.85      0.92      0.88      3015

Confusion_matrix
[[2773    3]
 [ 239    0]]
done in 1.320775s
Classification report
              precision    recall  f1-score   support

           0       0.91  

True positive rate of class 2 is  0.473
Positive prediction rate of class 1 is  0.357
Positive prediction rate of class 2 is  0.188
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.389
threshold:0.2, J-value:0.255
threshold:0.30000000000000004, J-value:0.152
threshold:0.4, J-value:0.07
threshold:0.5, J-value:0.027
threshold:0.6000000000000001, J-value:0.007
threshold:0.7000000000000001, J-value:0.003
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6949083337769468
Balanced accuracy score of test is  0.7005163407378845
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.4099999999999999
threshold:0.2, J-value:0.27499999999999997
threshold:0.30000000000000004, J-value:0.165
threshold:0.4, J-value:0.077
threshold:0.5, J-value:0.029
threshold:0.6000000000000001, J-value:0.008
threshold:0.7000000000000001, J-value:0.002
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train ['Class'] = y_train


(56741,)
(56741,)
(113482, 87)
X train 113482
Y train 113482
21898 18936 2962
21898 18936 2962
21898 18829 3069
21898 18829 3069


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.17489165492078745
0.2641569447526818
Classification report
              precision    recall  f1-score   support

           0       0.91      0.99      0.95     19950
           1       0.45      0.04      0.08      1948

    accuracy                           0.91     21898
   macro avg       0.68      0.52      0.52     21898
weighted avg       0.87      0.91      0.88     21898

Confusion_matrix
[[19848   102]
 [ 1863    85]]
done in 0.792731s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.17489165492078745
0.2657526303424164
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19883
           1       0.45      0.04      0.07      2015

    accuracy                           0.91     21898
   macro avg       0.68      0.52      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19787    96]
 [ 1936    79]]
done in 0.833583s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.17489165492078745
0.26425946123054017
Classification report
              precision    recall  f1-score   support

           0       0.91      0.99      0.95     17239
           1       0.45      0.05      0.09      1697

    accuracy                           0.91     18936
   macro avg       0.68      0.52      0.52     18936
weighted avg       0.87      0.91      0.87     18936

Confusion_matrix
[[17140    99]
 [ 1615    82]]
done in 0.810414s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.17489165492078745
0.2639013769848382
Classification report
              precision    recall  f1-score   support

           0       0.91      0.99      0.95     17079
           1       0.46      0.04      0.08      1750

    accuracy                           0.91     18829
   macro avg       0.68      0.52      0.51     18829
weighted avg       0.87      0.91      0.87     18829

Confusion_matrix
[[16990    89]
 [ 1675    75]]
done in 0.796853s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.17489165492078745
0.2635015591940306
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96      2711
           1       0.50      0.01      0.02       251

    accuracy                           0.92      2962
   macro avg       0.71      0.51      0.49      2962
weighted avg       0.88      0.92      0.88      2962

Confusion_matrix
[[2708    3]
 [ 248    3]]
done in 0.785210s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.17489165492078745
0.2771104828904256
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95      2804
           1       0.36      0.02      0.03       265

    accuracy                           0.91      3069
   macro avg       0.64      0.51      0.49      3069
weighted avg       0.87      0.91      0.87      3069

Confusion_matrix
[[2797    7]
 [ 261    4]]
done in 0.787037s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19950
           1       0.36      0.00      0.00      1948

    accuracy                           0.91     21898
   macro avg       0.64      0.50      0.48     21898
weighted avg       0.86      0.91      0.87     21898

Confusion_matrix
[[19943     7]
 [ 1944     4]]
done in 37.434834s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19883
   

  _warn_prf(average, modifier, msg_start, len(result))


Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95      2804
           1       0.00      0.00      0.00       265

    accuracy                           0.91      3069
   macro avg       0.46      0.50      0.48      3069
weighted avg       0.83      0.91      0.87      3069

Confusion_matrix
[[2804    0]
 [ 265    0]]
done in 36.529062s


  _warn_prf(average, modifier, msg_start, len(result))


0.17982010145576538
0.2746317824300106
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19950
           1       0.27      0.00      0.00      1948

    accuracy                           0.91     21898
   macro avg       0.59      0.50      0.48     21898
weighted avg       0.85      0.91      0.87     21898

Confusion_matrix
[[19939    11]
 [ 1944     4]]
done in 1.344346s
0.17982010145576538
0.28190597651858007
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19883
           1       0.11      0.00      0.00      2015

    accuracy                           0.91     21898
   macro avg       0.51      0.50      0.48     21898
weighted avg       0.83      0.91      0.86     21898

Confusion_matrix
[[19867    16]
 [ 2013     2]]
done in 1.340986s
0.17982010145576538
0.27418214331617036
Classification report
              precision    re

True positive rate of class 2 is  0.611
Positive prediction rate of class 1 is  0.372
Positive prediction rate of class 2 is  0.278
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.3350000000000001
threshold:0.2, J-value:0.128
threshold:0.30000000000000004, J-value:0.05700000000000001
threshold:0.4, J-value:0.054000000000000006
threshold:0.5, J-value:0.001
threshold:0.6000000000000001, J-value:0.0
threshold:0.7000000000000001, J-value:0.0
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6671920561156485
Balanced accuracy score of test is  0.6788031572790152
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.34299999999999997
threshold:0.2, J-value:0.128
threshold:0.30000000000000004, J-value:0.05500000000000001
threshold:0.4, J-value:0.052
threshold:0.5, J-value:0.001
threshold:0.6000000000000001, J-value:0.0
threshold:0.7000000000000001, J-value:0.0
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train ['Class'] = y_train


(56692,)
(56692,)
(113384, 87)
X train 113384
Y train 113384
21898 18932 2966
21898 18932 2966
21898 18882 3016
21898 18882 3016


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.17397062861609122
0.2671729214421902
Classification report
              precision    recall  f1-score   support

           0       0.91      0.99      0.95     19908
           1       0.44      0.05      0.08      1990

    accuracy                           0.91     21898
   macro avg       0.68      0.52      0.52     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19792   116]
 [ 1897    93]]
done in 0.776713s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.17397062861609122
0.2645216605571823
Classification report
              precision    recall  f1-score   support

           0       0.91      0.99      0.95     19918
           1       0.40      0.04      0.07      1980

    accuracy                           0.91     21898
   macro avg       0.66      0.52      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19798   120]
 [ 1900    80]]
done in 0.784896s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.17397062861609122
0.26441424311467004
Classification report
              precision    recall  f1-score   support

           0       0.91      0.99      0.95     17204
           1       0.44      0.05      0.09      1728

    accuracy                           0.91     18932
   macro avg       0.67      0.52      0.52     18932
weighted avg       0.87      0.91      0.87     18932

Confusion_matrix
[[17089   115]
 [ 1639    89]]
done in 0.802063s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.17397062861609122
0.26416227029732664
Classification report
              precision    recall  f1-score   support

           0       0.91      0.99      0.95     17156
           1       0.40      0.04      0.08      1726

    accuracy                           0.91     18882
   macro avg       0.66      0.52      0.52     18882
weighted avg       0.86      0.91      0.87     18882

Confusion_matrix
[[17040   116]
 [ 1649    77]]
done in 0.851027s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.17397062861609122
0.2847815856689641
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95      2704
           1       0.80      0.02      0.03       262

    accuracy                           0.91      2966
   macro avg       0.86      0.51      0.49      2966
weighted avg       0.90      0.91      0.87      2966

Confusion_matrix
[[2703    1]
 [ 258    4]]
done in 0.769713s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.17397062861609122
0.26677166284053583
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96      2762
           1       0.43      0.01      0.02       254

    accuracy                           0.92      3016
   macro avg       0.67      0.51      0.49      3016
weighted avg       0.88      0.92      0.88      3016

Confusion_matrix
[[2758    4]
 [ 251    3]]
done in 0.774901s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19908
           1       0.54      0.00      0.01      1990

    accuracy                           0.91     21898
   macro avg       0.72      0.50      0.48     21898
weighted avg       0.88      0.91      0.87     21898

Confusion_matrix
[[19902     6]
 [ 1983     7]]
done in 36.496458s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19918
  

  _warn_prf(average, modifier, msg_start, len(result))


0.17831308395288786
0.2766792274590046
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19908
           1       0.43      0.01      0.02      1990

    accuracy                           0.91     21898
   macro avg       0.67      0.51      0.49     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19878    30]
 [ 1967    23]]
done in 1.343692s
0.17831308395288786
0.2755851218120846
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19918
           1       0.36      0.01      0.02      1980

    accuracy                           0.91     21898
   macro avg       0.64      0.50      0.48     21898
weighted avg       0.86      0.91      0.87     21898

Confusion_matrix
[[19890    28]
 [ 1964    16]]
done in 1.341754s
0.17831308395288786
0.2724470521302851
Classification report
              precision    reca

True positive rate of class 2 is  0.638
Positive prediction rate of class 1 is  0.368
Positive prediction rate of class 2 is  0.259
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.341
threshold:0.2, J-value:0.203
threshold:0.30000000000000004, J-value:0.08299999999999999
threshold:0.4, J-value:0.01
threshold:0.5, J-value:0.01
threshold:0.6000000000000001, J-value:0.001
threshold:0.7000000000000001, J-value:0.001
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6703613001717448
Balanced accuracy score of test is  0.6676883048782838
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.35800000000000004
threshold:0.2, J-value:0.21999999999999997
threshold:0.30000000000000004, J-value:0.089
threshold:0.4, J-value:0.009999999999999998
threshold:0.5, J-value:0.009999999999999998
threshold:0.6000000000000001, J-value:0.001
threshold:0.7000000000000001, J-value:0.001
threshold:0.8, J-value:0.0
threshold:0.9, J-va

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train ['Class'] = y_train


(56622,)
(56622,)
(113244, 87)
X train 113244
Y train 113244
21898 18875 3023
21898 18875 3023
21898 19009 2889
21898 19009 2889


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.17743529608073927
0.2583120677076889
Classification report
              precision    recall  f1-score   support

           0       0.92      0.99      0.95     20006
           1       0.38      0.04      0.07      1892

    accuracy                           0.91     21898
   macro avg       0.65      0.52      0.51     21898
weighted avg       0.87      0.91      0.88     21898

Confusion_matrix
[[19880   126]
 [ 1814    78]]
done in 0.811637s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.17743529608073927
0.2579707892695004
Classification report
              precision    recall  f1-score   support

           0       0.92      0.99      0.95     19975
           1       0.47      0.05      0.09      1923

    accuracy                           0.91     21898
   macro avg       0.69      0.52      0.52     21898
weighted avg       0.88      0.91      0.88     21898

Confusion_matrix
[[19863   112]
 [ 1823   100]]
done in 0.787224s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.17743529608073927
0.256878035048346
Classification report
              precision    recall  f1-score   support

           0       0.92      0.99      0.95     17241
           1       0.38      0.05      0.08      1634

    accuracy                           0.91     18875
   macro avg       0.65      0.52      0.52     18875
weighted avg       0.87      0.91      0.88     18875

Confusion_matrix
[[17117   124]
 [ 1558    76]]
done in 0.803355s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.17743529608073927
0.26018869748861834
Classification report
              precision    recall  f1-score   support

           0       0.91      0.99      0.95     17310
           1       0.47      0.06      0.10      1699

    accuracy                           0.91     19009
   macro avg       0.69      0.52      0.53     19009
weighted avg       0.87      0.91      0.88     19009

Confusion_matrix
[[17202   108]
 [ 1604    95]]
done in 0.805991s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.17743529608073927
0.26726587731572615
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96      2765
           1       0.50      0.01      0.02       258

    accuracy                           0.91      3023
   macro avg       0.71      0.50      0.49      3023
weighted avg       0.88      0.91      0.88      3023

Confusion_matrix
[[2763    2]
 [ 256    2]]
done in 0.794437s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.17743529608073927
0.24337742916662278
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96      2665
           1       0.56      0.02      0.04       224

    accuracy                           0.92      2889
   macro avg       0.74      0.51      0.50      2889
weighted avg       0.90      0.92      0.89      2889

Confusion_matrix
[[2661    4]
 [ 219    5]]
done in 0.796126s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     20006
           1       0.60      0.00      0.00      1892

    accuracy                           0.91     21898
   macro avg       0.76      0.50      0.48     21898
weighted avg       0.89      0.91      0.87     21898

Confusion_matrix
[[20004     2]
 [ 1889     3]]
done in 37.186396s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19975
  

  _warn_prf(average, modifier, msg_start, len(result))


0.18263253560275988
0.2703512314355614
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17310
           1       0.00      0.00      0.00      1699

    accuracy                           0.91     19009
   macro avg       0.46      0.50      0.48     19009
weighted avg       0.83      0.91      0.87     19009

Confusion_matrix
[[17310     0]
 [ 1699     0]]
done in 1.328466s


  _warn_prf(average, modifier, msg_start, len(result))


0.18263253560275988
0.28829262148570395
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95      2765
           1       0.17      0.00      0.01       258

    accuracy                           0.91      3023
   macro avg       0.54      0.50      0.48      3023
weighted avg       0.85      0.91      0.87      3023

Confusion_matrix
[[2760    5]
 [ 257    1]]
done in 1.308712s
0.18263253560275988
0.2573780654716651
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96      2665
           1       0.40      0.01      0.02       224

    accuracy                           0.92      2889
   macro avg       0.66      0.50      0.49      2889
weighted avg       0.88      0.92      0.89      2889

Confusion_matrix
[[2662    3]
 [ 222    2]]
done in 1.307726s
Classification report
              precision    recall  f1-score   support

           0       0.9

True positive rate of class 2 is  0.442
Positive prediction rate of class 1 is  0.373
Positive prediction rate of class 2 is  0.149
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.394
threshold:0.2, J-value:0.26399999999999996
threshold:0.30000000000000004, J-value:0.147
threshold:0.4, J-value:0.076
threshold:0.5, J-value:0.027
threshold:0.6000000000000001, J-value:0.007
threshold:0.7000000000000001, J-value:0.002
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6970903443554671
Balanced accuracy score of test is  0.7028183695558086
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.403
threshold:0.2, J-value:0.279
threshold:0.30000000000000004, J-value:0.154
threshold:0.4, J-value:0.082
threshold:0.5, J-value:0.029
threshold:0.6000000000000001, J-value:0.007
threshold:0.7000000000000001, J-value:0.001
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced ac

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train ['Class'] = y_train


(56644,)
(56644,)
(113288, 87)
X train 113288
Y train 113288
21898 18970 2928
21898 18970 2928
21898 18892 3006
21898 18892 3006


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.1730505169500745
0.26745226880549217
Classification report
              precision    recall  f1-score   support

           0       0.91      0.99      0.95     19911
           1       0.42      0.05      0.09      1987

    accuracy                           0.91     21898
   macro avg       0.67      0.52      0.52     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19778   133]
 [ 1890    97]]
done in 0.791643s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.1730505169500745
0.26539005603331006
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19927
           1       0.44      0.04      0.07      1971

    accuracy                           0.91     21898
   macro avg       0.67      0.52      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19834    93]
 [ 1899    72]]
done in 0.811316s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.1730505169500745
0.2666289973147923
Classification report
              precision    recall  f1-score   support

           0       0.91      0.99      0.95     17229
           1       0.41      0.05      0.09      1741

    accuracy                           0.91     18970
   macro avg       0.66      0.52      0.52     18970
weighted avg       0.87      0.91      0.87     18970

Confusion_matrix
[[17099   130]
 [ 1649    92]]
done in 0.790847s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.1730505169500745
0.26158378912263236
Classification report
              precision    recall  f1-score   support

           0       0.91      0.99      0.95     17198
           1       0.43      0.04      0.07      1694

    accuracy                           0.91     18892
   macro avg       0.67      0.52      0.51     18892
weighted avg       0.87      0.91      0.87     18892

Confusion_matrix
[[17108    90]
 [ 1627    67]]
done in 0.807779s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.1730505169500745
0.27278610083369437
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96      2682
           1       0.62      0.02      0.04       246

    accuracy                           0.92      2928
   macro avg       0.77      0.51      0.50      2928
weighted avg       0.89      0.92      0.88      2928

Confusion_matrix
[[2679    3]
 [ 241    5]]
done in 0.782522s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.1730505169500745
0.2893115445484544
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95      2729
           1       0.62      0.02      0.04       277

    accuracy                           0.91      3006
   macro avg       0.77      0.51      0.49      3006
weighted avg       0.88      0.91      0.87      3006

Confusion_matrix
[[2726    3]
 [ 272    5]]
done in 0.791604s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19911
           1       0.83      0.00      0.01      1987

    accuracy                           0.91     21898
   macro avg       0.87      0.50      0.48     21898
weighted avg       0.90      0.91      0.87     21898

Confusion_matrix
[[19910     1]
 [ 1982     5]]
done in 36.822724s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19927
    

  _warn_prf(average, modifier, msg_start, len(result))


Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95      2729
           1       0.00      0.00      0.00       277

    accuracy                           0.91      3006
   macro avg       0.45      0.50      0.48      3006
weighted avg       0.82      0.91      0.86      3006

Confusion_matrix
[[2729    0]
 [ 277    0]]
done in 37.230814s


  _warn_prf(average, modifier, msg_start, len(result))


0.17790572732510343
0.27757807533933104
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19911
           1       0.00      0.00      0.00      1987

    accuracy                           0.91     21898
   macro avg       0.45      0.50      0.48     21898
weighted avg       0.83      0.91      0.87     21898

Confusion_matrix
[[19911     0]
 [ 1987     0]]
done in 1.347628s


  _warn_prf(average, modifier, msg_start, len(result))


0.17790572732510343
0.2746917796012997
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19927
           1       0.00      0.00      0.00      1971

    accuracy                           0.91     21898
   macro avg       0.45      0.50      0.48     21898
weighted avg       0.83      0.91      0.87     21898

Confusion_matrix
[[19927     0]
 [ 1971     0]]
done in 1.351738s


  _warn_prf(average, modifier, msg_start, len(result))


0.17790572732510343
0.27395759849037643
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17229
           1       0.00      0.00      0.00      1741

    accuracy                           0.91     18970
   macro avg       0.45      0.50      0.48     18970
weighted avg       0.82      0.91      0.86     18970

Confusion_matrix
[[17229     0]
 [ 1741     0]]
done in 1.345593s


  _warn_prf(average, modifier, msg_start, len(result))


0.17790572732510343
0.26793679299108564
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17198
           1       0.00      0.00      0.00      1694

    accuracy                           0.91     18892
   macro avg       0.46      0.50      0.48     18892
weighted avg       0.83      0.91      0.87     18892

Confusion_matrix
[[17198     0]
 [ 1694     0]]
done in 1.338981s


  _warn_prf(average, modifier, msg_start, len(result))


0.17790572732510343
0.3010345117548604
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96      2682
           1       0.00      0.00      0.00       246

    accuracy                           0.92      2928
   macro avg       0.46      0.50      0.48      2928
weighted avg       0.84      0.92      0.88      2928

Confusion_matrix
[[2682    0]
 [ 246    0]]
done in 1.316322s


  _warn_prf(average, modifier, msg_start, len(result))


0.17790572732510343
0.31714527495730904
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95      2729
           1       0.00      0.00      0.00       277

    accuracy                           0.91      3006
   macro avg       0.45      0.50      0.48      3006
weighted avg       0.82      0.91      0.86      3006

Confusion_matrix
[[2729    0]
 [ 277    0]]
done in 1.319428s


  _warn_prf(average, modifier, msg_start, len(result))


Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19911
           1       0.42      0.03      0.05      1987

    accuracy                           0.91     21898
   macro avg       0.67      0.51      0.50     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19836    75]
 [ 1932    55]]
done in 76.573744s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19927
           1       0.41      0.03      0.05      1971

    accuracy                           0.91     21898
   macro avg       0.66      0.51      0.50     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19854    73]
 [ 1920    51]]
done in 210.724410s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17229
           1       0.41      0.03   

threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.7061583249512955
Balanced accuracy score of test is  0.7131655228024785
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.277
threshold:0.2, J-value:0.135
threshold:0.30000000000000004, J-value:0.102
threshold:0.4, J-value:0.051000000000000004
threshold:0.5, J-value:0.033999999999999996
threshold:0.6000000000000001, J-value:0.026000000000000002
threshold:0.7000000000000001, J-value:0.007
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6387070078754479
Balanced accuracy score of test is  0.6717387652080278
True positive rate of class 1 is  0.69
True positive rate of class 2 is  0.455
Positive prediction rate of class 1 is  0.302
Positive prediction rate of class 2 is  0.143


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train ['Class'] = y_train


(56750,)
(56750,)
(113500, 87)
X train 113500
Y train 113500
21898 18842 3056
21898 18842 3056
21898 18914 2984
21898 18914 2984


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.17611450529530606
0.26129715262397923
Classification report
              precision    recall  f1-score   support

           0       0.91      0.99      0.95     19944
           1       0.49      0.05      0.09      1954

    accuracy                           0.91     21898
   macro avg       0.70      0.52      0.52     21898
weighted avg       0.88      0.91      0.88     21898

Confusion_matrix
[[19840   104]
 [ 1856    98]]
done in 0.821114s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.17611450529530606
0.2618953925049787
Classification report
              precision    recall  f1-score   support

           0       0.91      0.99      0.95     19955
           1       0.46      0.05      0.09      1943

    accuracy                           0.91     21898
   macro avg       0.69      0.52      0.52     21898
weighted avg       0.87      0.91      0.88     21898

Confusion_matrix
[[19843   112]
 [ 1849    94]]
done in 0.821832s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.17611450529530606
0.2587867792960299
Classification report
              precision    recall  f1-score   support

           0       0.91      0.99      0.95     17147
           1       0.49      0.06      0.10      1695

    accuracy                           0.91     18842
   macro avg       0.70      0.53      0.53     18842
weighted avg       0.88      0.91      0.88     18842

Confusion_matrix
[[17046   101]
 [ 1599    96]]
done in 0.799297s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.17611450529530606
0.2608533882202139
Classification report
              precision    recall  f1-score   support

           0       0.91      0.99      0.95     17231
           1       0.46      0.05      0.10      1683

    accuracy                           0.91     18914
   macro avg       0.69      0.52      0.53     18914
weighted avg       0.87      0.91      0.88     18914

Confusion_matrix
[[17121   110]
 [ 1591    92]]
done in 0.806696s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.17611450529530606
0.27677504995553076
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96      2797
           1       0.40      0.01      0.02       259

    accuracy                           0.91      3056
   macro avg       0.66      0.50      0.49      3056
weighted avg       0.87      0.91      0.88      3056

Confusion_matrix
[[2794    3]
 [ 257    2]]
done in 0.785735s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.17611450529530606
0.2685001073313997
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95      2724
           1       0.50      0.01      0.02       260

    accuracy                           0.91      2984
   macro avg       0.71      0.50      0.48      2984
weighted avg       0.88      0.91      0.87      2984

Confusion_matrix
[[2722    2]
 [ 258    2]]
done in 0.784367s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19944
           1       0.58      0.00      0.01      1954

    accuracy                           0.91     21898
   macro avg       0.75      0.50      0.48     21898
weighted avg       0.88      0.91      0.87     21898

Confusion_matrix
[[19939     5]
 [ 1947     7]]
done in 36.855218s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19955
   

  _warn_prf(average, modifier, msg_start, len(result))


Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95      2724
           1       0.00      0.00      0.00       260

    accuracy                           0.91      2984
   macro avg       0.46      0.50      0.48      2984
weighted avg       0.83      0.91      0.87      2984

Confusion_matrix
[[2724    0]
 [ 260    0]]
done in 35.447702s


  _warn_prf(average, modifier, msg_start, len(result))


0.180940134302841
0.2755987548575981
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19944
           1       0.30      0.01      0.01      1954

    accuracy                           0.91     21898
   macro avg       0.61      0.50      0.48     21898
weighted avg       0.86      0.91      0.87     21898

Confusion_matrix
[[19921    23]
 [ 1944    10]]
done in 1.332631s
0.180940134302841
0.274748676067297
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19955
           1       0.47      0.01      0.02      1943

    accuracy                           0.91     21898
   macro avg       0.69      0.50      0.48     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19938    17]
 [ 1928    15]]
done in 1.332577s
0.180940134302841
0.2735857457972069
Classification report
              precision    recall  f1-

  _warn_prf(average, modifier, msg_start, len(result))


0.180940134302841
0.2822082782452647
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95      2724
           1       0.50      0.01      0.02       260

    accuracy                           0.91      2984
   macro avg       0.71      0.50      0.48      2984
weighted avg       0.88      0.91      0.87      2984

Confusion_matrix
[[2722    2]
 [ 258    2]]
done in 1.309011s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19944
           1       0.46      0.03      0.06      1954

    accuracy                           0.91     21898
   macro avg       0.69      0.51      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19870    74]
 [ 1890    64]]
done in 74.606743s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19955
     

threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.42199999999999993
threshold:0.2, J-value:0.27199999999999996
threshold:0.30000000000000004, J-value:0.156
threshold:0.4, J-value:0.07300000000000001
threshold:0.5, J-value:0.032
threshold:0.6000000000000001, J-value:0.009000000000000001
threshold:0.7000000000000001, J-value:0.001
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.7113599169286302
Balanced accuracy score of test is  0.7023485839009844
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.256
threshold:0.2, J-value:0.10599999999999998
threshold:0.30000000000000004, J-value:0.055999999999999994
threshold:0.4, J-value:0.011
threshold:0.5, J-value:0.009000000000000001
threshold:0.6000000000000001, J-value:0.003
threshold:0.7000000000000001, J-value:0.0
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6280888375990271
Bala

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train ['Class'] = y_train


(56721,)
(56721,)
(113442, 87)
X train 113442
Y train 113442
21898 18920 2978
21898 18920 2978
21898 18865 3033
21898 18865 3033


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.17362600467476288
0.2680569816124494
Classification report
              precision    recall  f1-score   support

           0       0.91      0.99      0.95     19893
           1       0.40      0.05      0.08      2005

    accuracy                           0.91     21898
   macro avg       0.66      0.52      0.52     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19751   142]
 [ 1910    95]]
done in 0.833466s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.17362600467476288
0.2661492324005403
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19907
           1       0.49      0.05      0.08      1991

    accuracy                           0.91     21898
   macro avg       0.70      0.52      0.52     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19811    96]
 [ 1899    92]]
done in 0.880934s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.17362600467476288
0.2627365965615911
Classification report
              precision    recall  f1-score   support

           0       0.91      0.99      0.95     17191
           1       0.40      0.05      0.10      1729

    accuracy                           0.91     18920
   macro avg       0.66      0.52      0.52     18920
weighted avg       0.87      0.91      0.87     18920

Confusion_matrix
[[17052   139]
 [ 1635    94]]
done in 0.821356s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.17362600467476288
0.265342507586929
Classification report
              precision    recall  f1-score   support

           0       0.91      0.99      0.95     17135
           1       0.48      0.05      0.09      1730

    accuracy                           0.91     18865
   macro avg       0.70      0.52      0.52     18865
weighted avg       0.87      0.91      0.87     18865

Confusion_matrix
[[17040    95]
 [ 1641    89]]
done in 0.864166s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.17362600467476288
0.3018587563479224
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95      2702
           1       0.25      0.00      0.01       276

    accuracy                           0.91      2978
   macro avg       0.58      0.50      0.48      2978
weighted avg       0.85      0.91      0.86      2978

Confusion_matrix
[[2699    3]
 [ 275    1]]
done in 0.801588s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.17362600467476288
0.2711669915857623
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.96      2772
           1       0.75      0.01      0.02       261

    accuracy                           0.91      3033
   macro avg       0.83      0.51      0.49      3033
weighted avg       0.90      0.91      0.88      3033

Confusion_matrix
[[2771    1]
 [ 258    3]]
done in 0.785382s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19893
           1       0.58      0.00      0.01      2005

    accuracy                           0.91     21898
   macro avg       0.75      0.50      0.48     21898
weighted avg       0.88      0.91      0.87     21898

Confusion_matrix
[[19888     5]
 [ 1998     7]]
done in 36.256729s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19907
   

  _warn_prf(average, modifier, msg_start, len(result))


Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.96      2772
           1       0.00      0.00      0.00       261

    accuracy                           0.91      3033
   macro avg       0.46      0.50      0.48      3033
weighted avg       0.84      0.91      0.87      3033

Confusion_matrix
[[2772    0]
 [ 261    0]]
done in 35.161406s


  _warn_prf(average, modifier, msg_start, len(result))


0.17892383530199343
0.2785841470540735
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19893
           1       0.00      0.00      0.00      2005

    accuracy                           0.91     21898
   macro avg       0.45      0.50      0.48     21898
weighted avg       0.83      0.91      0.86     21898

Confusion_matrix
[[19889     4]
 [ 2005     0]]
done in 1.313439s
0.17892383530199343
0.27940210033435886
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19907
           1       0.33      0.00      0.00      1991

    accuracy                           0.91     21898
   macro avg       0.62      0.50      0.48     21898
weighted avg       0.86      0.91      0.87     21898

Confusion_matrix
[[19905     2]
 [ 1990     1]]
done in 1.323520s
0.17892383530199343
0.27168782638829375
Classification report
              precision    re

True positive rate of class 2 is  0.621
Positive prediction rate of class 1 is  0.364
Positive prediction rate of class 2 is  0.273
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.35800000000000004
threshold:0.2, J-value:0.17
threshold:0.30000000000000004, J-value:0.093
threshold:0.4, J-value:0.012
threshold:0.5, J-value:0.0
threshold:0.6000000000000001, J-value:0.0
threshold:0.7000000000000001, J-value:0.0
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.679143505033726
Balanced accuracy score of test is  0.6769740342315524
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.37499999999999994
threshold:0.2, J-value:0.17700000000000002
threshold:0.30000000000000004, J-value:0.097
threshold:0.4, J-value:0.011
threshold:0.5, J-value:0.0
threshold:0.6000000000000001, J-value:0.0
threshold:0.7000000000000001, J-value:0.0
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train ['Class'] = y_train


(56683,)
(56683,)
(113366, 87)
X train 113366
Y train 113366
21898 18905 2993
21898 18905 2993
21898 18918 2980
21898 18918 2980


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.1754539191675366
0.26730104934705473
Classification report
              precision    recall  f1-score   support

           0       0.91      0.99      0.95     19931
           1       0.46      0.05      0.09      1967

    accuracy                           0.91     21898
   macro avg       0.69      0.52      0.52     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19819   112]
 [ 1870    97]]
done in 0.806984s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.1754539191675366
0.2627757611224452
Classification report
              precision    recall  f1-score   support

           0       0.91      0.99      0.95     19956
           1       0.45      0.05      0.08      1942

    accuracy                           0.91     21898
   macro avg       0.68      0.52      0.52     21898
weighted avg       0.87      0.91      0.88     21898

Confusion_matrix
[[19846   110]
 [ 1853    89]]
done in 0.778797s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.1754539191675366
0.261944599303299
Classification report
              precision    recall  f1-score   support

           0       0.91      0.99      0.95     17217
           1       0.47      0.06      0.10      1688

    accuracy                           0.91     18905
   macro avg       0.69      0.52      0.53     18905
weighted avg       0.87      0.91      0.88     18905

Confusion_matrix
[[17109   108]
 [ 1594    94]]
done in 0.781121s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.1754539191675366
0.2616090730574051
Classification report
              precision    recall  f1-score   support

           0       0.91      0.99      0.95     17231
           1       0.44      0.05      0.09      1687

    accuracy                           0.91     18918
   macro avg       0.68      0.52      0.52     18918
weighted avg       0.87      0.91      0.88     18918

Confusion_matrix
[[17124   107]
 [ 1603    84]]
done in 0.781417s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.1754539191675366
0.30113455689039015
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95      2714
           1       0.43      0.01      0.02       279

    accuracy                           0.91      2993
   macro avg       0.67      0.50      0.49      2993
weighted avg       0.86      0.91      0.86      2993

Confusion_matrix
[[2710    4]
 [ 276    3]]
done in 0.765520s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.1754539191675366
0.27018227280513946
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96      2725
           1       0.62      0.02      0.04       255

    accuracy                           0.92      2980
   macro avg       0.77      0.51      0.50      2980
weighted avg       0.89      0.92      0.88      2980

Confusion_matrix
[[2722    3]
 [ 250    5]]
done in 0.774626s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19931
           1       0.33      0.00      0.00      1967

    accuracy                           0.91     21898
   macro avg       0.62      0.50      0.48     21898
weighted avg       0.86      0.91      0.87     21898

Confusion_matrix
[[19925     6]
 [ 1964     3]]
done in 58.433589s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19956
   

  _warn_prf(average, modifier, msg_start, len(result))


0.18006857852899236
0.2760777061641584
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19931
           1       0.00      0.00      0.00      1967

    accuracy                           0.91     21898
   macro avg       0.46      0.50      0.48     21898
weighted avg       0.83      0.91      0.87     21898

Confusion_matrix
[[19931     0]
 [ 1967     0]]
done in 1.322460s


  _warn_prf(average, modifier, msg_start, len(result))


0.18006857852899236
0.27232079344852467
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19956
           1       0.00      0.00      0.00      1942

    accuracy                           0.91     21898
   macro avg       0.46      0.50      0.48     21898
weighted avg       0.83      0.91      0.87     21898

Confusion_matrix
[[19955     1]
 [ 1942     0]]
done in 1.318746s
0.18006857852899236
0.2702946169059894
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17217
           1       0.00      0.00      0.00      1688

    accuracy                           0.91     18905
   macro avg       0.46      0.50      0.48     18905
weighted avg       0.83      0.91      0.87     18905

Confusion_matrix
[[17217     0]
 [ 1688     0]]
done in 1.314662s


  _warn_prf(average, modifier, msg_start, len(result))


0.18006857852899236
0.2700644872130904
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17231
           1       0.00      0.00      0.00      1687

    accuracy                           0.91     18918
   macro avg       0.46      0.50      0.48     18918
weighted avg       0.83      0.91      0.87     18918

Confusion_matrix
[[17231     0]
 [ 1687     0]]
done in 1.309747s


  _warn_prf(average, modifier, msg_start, len(result))


0.18006857852899236
0.3126060397510899
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95      2714
           1       0.00      0.00      0.00       279

    accuracy                           0.91      2993
   macro avg       0.45      0.50      0.48      2993
weighted avg       0.82      0.91      0.86      2993

Confusion_matrix
[[2714    0]
 [ 279    0]]
done in 1.285725s


  _warn_prf(average, modifier, msg_start, len(result))


0.18006857852899236
0.28664455229481567
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.96      2725
           1       0.00      0.00      0.00       255

    accuracy                           0.91      2980
   macro avg       0.46      0.50      0.48      2980
weighted avg       0.84      0.91      0.87      2980

Confusion_matrix
[[2724    1]
 [ 255    0]]
done in 1.291886s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19931
           1       0.50      0.04      0.07      1967

    accuracy                           0.91     21898
   macro avg       0.70      0.52      0.51     21898
weighted avg       0.88      0.91      0.87     21898

Confusion_matrix
[[19860    71]
 [ 1897    70]]
done in 93.834406s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19956
  

threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6924656460314266
Balanced accuracy score of test is  0.698993114408857
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.396
threshold:0.2, J-value:0.264
threshold:0.30000000000000004, J-value:0.156
threshold:0.4, J-value:0.085
threshold:0.5, J-value:0.035
threshold:0.6000000000000001, J-value:0.004
threshold:0.7000000000000001, J-value:0.0
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6982691766679412
Balanced accuracy score of test is  0.7055332751928991
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.322
threshold:0.2, J-value:0.11699999999999999
threshold:0.30000000000000004, J-value:0.051000000000000004
threshold:0.4, J-value:0.032
threshold:0.5, J-value:0.016
threshold:0.6000000000000001, J-value:0.003
threshold:0.7000000000000001, J-value:0.0
threshold:0.8, J-value:0

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train ['Class'] = y_train


(56842,)
(56842,)
(113684, 87)
X train 113684
Y train 113684
21898 18847 3051
21898 18847 3051
21898 18817 3081
21898 18817 3081


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.1739422667345199
0.2591406496755032
Classification report
              precision    recall  f1-score   support

           0       0.92      0.99      0.95     19973
           1       0.45      0.05      0.09      1925

    accuracy                           0.91     21898
   macro avg       0.68      0.52      0.52     21898
weighted avg       0.88      0.91      0.88     21898

Confusion_matrix
[[19854   119]
 [ 1826    99]]
done in 0.777150s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.1739422667345199
0.26921504500209614
Classification report
              precision    recall  f1-score   support

           0       0.91      0.99      0.95     19866
           1       0.44      0.04      0.07      2032

    accuracy                           0.91     21898
   macro avg       0.67      0.52      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19762   104]
 [ 1951    81]]
done in 0.767604s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.1739422667345199
0.25532338295532875
Classification report
              precision    recall  f1-score   support

           0       0.92      0.99      0.95     17189
           1       0.46      0.06      0.10      1658

    accuracy                           0.91     18847
   macro avg       0.69      0.53      0.53     18847
weighted avg       0.88      0.91      0.88     18847

Confusion_matrix
[[17072   117]
 [ 1560    98]]
done in 0.775417s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.1739422667345199
0.2666054554408122
Classification report
              precision    recall  f1-score   support

           0       0.91      0.99      0.95     17067
           1       0.43      0.04      0.08      1750

    accuracy                           0.91     18817
   macro avg       0.67      0.52      0.52     18817
weighted avg       0.87      0.91      0.87     18817

Confusion_matrix
[[16965   102]
 [ 1673    77]]
done in 0.762061s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.1739422667345199
0.28272112357754475
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95      2784
           1       0.33      0.00      0.01       267

    accuracy                           0.91      3051
   macro avg       0.62      0.50      0.48      3051
weighted avg       0.86      0.91      0.87      3051

Confusion_matrix
[[2782    2]
 [ 266    1]]
done in 0.732087s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.1739422667345199
0.2851529374963121
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95      2799
           1       0.67      0.01      0.03       282

    accuracy                           0.91      3081
   macro avg       0.79      0.51      0.49      3081
weighted avg       0.89      0.91      0.87      3081

Confusion_matrix
[[2797    2]
 [ 278    4]]
done in 0.759439s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19973
           1       0.50      0.00      0.00      1925

    accuracy                           0.91     21898
   macro avg       0.71      0.50      0.48     21898
weighted avg       0.88      0.91      0.87     21898

Confusion_matrix
[[19971     2]
 [ 1923     2]]
done in 77.889152s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19866
    

  _warn_prf(average, modifier, msg_start, len(result))


Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95      2799
           1       0.00      0.00      0.00       282

    accuracy                           0.91      3081
   macro avg       0.45      0.50      0.48      3081
weighted avg       0.83      0.91      0.86      3081

Confusion_matrix
[[2799    0]
 [ 282    0]]
done in 50.158307s


  _warn_prf(average, modifier, msg_start, len(result))


0.17903596856741097
0.2693001524165527
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19973
           1       0.00      0.00      0.00      1925

    accuracy                           0.91     21898
   macro avg       0.46      0.50      0.48     21898
weighted avg       0.83      0.91      0.87     21898

Confusion_matrix
[[19973     0]
 [ 1925     0]]
done in 1.313430s


  _warn_prf(average, modifier, msg_start, len(result))


0.17903596856741097
0.2799662891324194
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19866
           1       0.00      0.00      0.00      2032

    accuracy                           0.91     21898
   macro avg       0.45      0.50      0.48     21898
weighted avg       0.82      0.91      0.86     21898

Confusion_matrix
[[19866     0]
 [ 2032     0]]
done in 1.318717s


  _warn_prf(average, modifier, msg_start, len(result))


0.17903596856741097
0.263948709136459
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17189
           1       0.00      0.00      0.00      1658

    accuracy                           0.91     18847
   macro avg       0.46      0.50      0.48     18847
weighted avg       0.83      0.91      0.87     18847

Confusion_matrix
[[17189     0]
 [ 1658     0]]
done in 1.329964s


  _warn_prf(average, modifier, msg_start, len(result))


0.17903596856741097
0.2767657151162528
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17067
           1       0.00      0.00      0.00      1750

    accuracy                           0.91     18817
   macro avg       0.45      0.50      0.48     18817
weighted avg       0.82      0.91      0.86     18817

Confusion_matrix
[[17067     0]
 [ 1750     0]]
done in 1.309482s


  _warn_prf(average, modifier, msg_start, len(result))


0.17903596856741097
0.30235772419627255
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95      2784
           1       0.00      0.00      0.00       267

    accuracy                           0.91      3051
   macro avg       0.46      0.50      0.48      3051
weighted avg       0.83      0.91      0.87      3051

Confusion_matrix
[[2784    0]
 [ 267    0]]
done in 1.288791s


  _warn_prf(average, modifier, msg_start, len(result))


0.17903596856741097
0.29951357938305456
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95      2799
           1       0.00      0.00      0.00       282

    accuracy                           0.91      3081
   macro avg       0.45      0.50      0.48      3081
weighted avg       0.83      0.91      0.86      3081

Confusion_matrix
[[2799    0]
 [ 282    0]]
done in 1.289296s


  _warn_prf(average, modifier, msg_start, len(result))


Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19973
           1       0.44      0.03      0.06      1925

    accuracy                           0.91     21898
   macro avg       0.68      0.51      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19897    76]
 [ 1865    60]]
done in 109.624362s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19866
           1       0.41      0.02      0.05      2032

    accuracy                           0.91     21898
   macro avg       0.66      0.51      0.50     21898
weighted avg       0.86      0.91      0.87     21898

Confusion_matrix
[[19795    71]
 [ 1982    50]]
done in 80.815404s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17189
           1       0.43      0.03   

threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.7083908404686392
Balanced accuracy score of test is  0.7049141450920322
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.251
threshold:0.2, J-value:0.142
threshold:0.30000000000000004, J-value:0.08600000000000001
threshold:0.4, J-value:0.04
threshold:0.5, J-value:0.018
threshold:0.6000000000000001, J-value:0.003
threshold:0.7000000000000001, J-value:0.0
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6256659240604417
Balanced accuracy score of test is  0.6591886920100644
True positive rate of class 1 is  0.675
True positive rate of class 2 is  0.422
Positive prediction rate of class 1 is  0.304
Positive prediction rate of class 2 is  0.133


In [9]:
def add_mean_sd(records, result_table, overall_records, type):
    records.append({
        'auroc': result_table["auroc"].mean(),
        'overall threshold': result_table["overall threshold"].mean(),
        'white threshold': result_table["white threshold"].mean(),
        'black threshold': result_table["black threshold"].mean(),
        'overall ba validation': result_table["overall ba validation"].mean(),
        'overall ba test': result_table["overall ba test"].mean(),
        'white ba validation': result_table["white ba validation"].mean(),
        'white ba test': result_table["white ba test"].mean(),
        'black ba validation': result_table["black ba validation"].mean(),
        'black ba test': result_table["black ba test"].mean(),
        'overall precision':result_table["overall precision"].mean(),
        'overall recall':result_table["overall recall"].mean(),
        'overall tpr':result_table["overall tpr"].mean(),
        'overall tnr':result_table["overall tnr"].mean(),
        'overall pd':result_table["overall pd"].mean(),
        'white precision':result_table["white precision"].mean(),
        'white recall':result_table["white recall"].mean(),
        'white tpr':result_table["white tpr"].mean(),
        'white tnr':result_table["white tnr"].mean(),
        'white pd':result_table["white pd"].mean(),
        'black precision':result_table["black precision"].mean(),
        'black recall':result_table["black recall"].mean(),
        'black tpr':result_table["black tpr"].mean(),
        'black tnr':result_table["black tnr"].mean(),
        'black pd':result_table["black pd"].mean(),
        'eod': result_table["eod"].mean(),
        'di': result_table["di"].mean(),
        })
    records.append({
        'auroc': result_table["auroc"].std(),
        'overall threshold': result_table["overall threshold"].std(),
        'white threshold': result_table["white threshold"].std(),
        'black threshold': result_table["black threshold"].std(),
        'overall ba validation': result_table["overall ba validation"].std(),
        'overall ba test': result_table["overall ba test"].std(),
        'white ba validation': result_table["white ba validation"].std(),
        'white ba test': result_table["white ba test"].std(),
        'black ba validation': result_table["black ba validation"].std(),
        'black ba test': result_table["black ba test"].std(),
        'overall precision':result_table["overall precision"].std(),
        'overall recall':result_table["overall recall"].std(),
        'overall tpr':result_table["overall tpr"].std(),
        'overall tnr':result_table["overall tnr"].std(),
        'overall pd':result_table["overall pd"].std(),
        'white precision':result_table["white precision"].std(),
        'white recall':result_table["white recall"].std(),
        'white tpr':result_table["white tpr"].std(),
        'white tnr':result_table["white tnr"].std(),
        'white pd':result_table["white pd"].std(),
        'black precision':result_table["black precision"].std(),
        'black recall':result_table["black recall"].std(),
        'black tpr':result_table["black tpr"].std(),
        'black tnr':result_table["black tnr"].std(),
        'black pd':result_table["black pd"].std(),
        'eod': result_table["eod"].std(),
        'di': result_table["di"].std(),
        })
    overall_records.append({
        'type': type,
        'auroc': result_table["auroc"].mean(),
        'overall threshold': result_table["overall threshold"].mean(),
        'white threshold': result_table["white threshold"].mean(),
        'black threshold': result_table["black threshold"].mean(),
        'overall ba test': result_table["overall ba test"].mean(),
        'white ba test': result_table["white ba test"].mean(),
        'black ba test': result_table["black ba test"].mean(),
        'overall tpr':result_table["overall tpr"].mean(),
        'overall pd':result_table["overall pd"].mean(),
        'white tpr':result_table["white tpr"].mean(),
        'white pd':result_table["white pd"].mean(),
        'black tpr':result_table["black tpr"].mean(),
        'black pd':result_table["black pd"].mean(),
        'eod': result_table["eod"].mean(),
        'di': result_table["di"].mean(),
        })
    pd_result = pd.DataFrame(records)
    return pd_result, overall_records



In [10]:
overall_table = []
result_lr, overall_records = add_mean_sd (records_lr, result_lr, overall_table, 'lr')
result_rf, overall_records = add_mean_sd (records_rf, result_rf, overall_records, 'rf')
result_dt, overall_records = add_mean_sd (records_dt, result_dt, overall_records, 'dt')
result_gbt, overall_records = add_mean_sd (records_gbt, result_gbt, overall_records, 'gbt')

result_path='/Users/lifuchen/Desktop/research/resample_data/'
result_lr.to_csv(path.join(result_path,'race-lr-resample-size-result.csv'), index=False)
result_rf.to_csv(path.join(result_path,'race-rf-resample-size-result.csv'), index=False)
result_dt.to_csv(path.join(result_path,'race-dt-resample-size-result.csv'), index=False)
result_gbt.to_csv(path.join(result_path,'race-gbt-resample-size-result.csv'), index=False)

overall_result = pd.DataFrame(overall_table)
result_path='/Users/lifuchen/Desktop/research/resample_result/'
overall_result.to_csv(path.join(result_path,'race-resample-size.csv'), index=False)