In [11]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from os import path
import imblearn
from imblearn.over_sampling import RandomOverSampler, SMOTE
from sklearn.metrics import balanced_accuracy_score, roc_auc_score
import sklearn.preprocessing
from sklearn import preprocessing
from sklearn.preprocessing import Normalizer
import src.lib.utility_classfier as uclf
import src.lib.optimal_threhold_related as thres
import src.lib.fairness_tests as fair

In [12]:
data_path='/Users/lifuchen/Desktop/research/data.csv'
df = pd.read_csv(data_path)

In [13]:
y = df.Class.values
X = df.drop(['GRID','Class', 'Race_B'], axis=1)
X.shape

(109490, 87)

In [14]:
def save_prediction(classifier, characteristic, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_white_scaled, y_val_white, X_test_white_scaled, y_test_white, X_val_black_scaled, y_val_black, X_test_black_scaled, y_test_black):
    method_to_call = getattr(uclf, classifier)
    y_val_score = method_to_call(X_train_scaled, y_train,X_val_scaled, y_val)
    y_test_score = method_to_call(X_train_scaled, y_train,X_test_scaled, y_test)

    y_val_score_white = method_to_call(X_train_scaled, y_train, X_val_white_scaled, y_val_white)
    y_test_score_white = method_to_call(X_train_scaled, y_train,X_test_white_scaled, y_test_white)

    y_val_score_black = method_to_call(X_train_scaled, y_train, X_val_black_scaled, y_val_black)
    y_test_score_black = method_to_call(X_train_scaled, y_train,X_test_black_scaled, y_test_black)

    my_dict = dict(val_score = y_val_score, test_score = y_test_score, val_1_score = y_val_score_white, test_1_score = y_test_score_white, val_2_score = y_val_score_black, test_2_score = y_test_score_black)
    overall_prediction = pd.DataFrame.from_dict(my_dict, orient='index')
    overall_prediction = overall_prediction.transpose()

    result_path='/Users/lifuchen/Desktop/research/predictions/'
    filename = str(classifier) + str(characteristic) + "prediction.csv"
    overall_prediction.to_csv(path.join(result_path, filename), index=False)

In [15]:
def get_result (classifier, characteristic, records, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_white_scaled, y_val_white, X_test_white_scaled, y_test_white, X_val_black_scaled, y_val_black, X_test_black_scaled, y_test_black):
    result_path='/Users/lifuchen/Desktop/research/predictions/'
    filename = str(classifier) + characteristic + "prediction.csv"
    prediction = pd.read_csv(path.join(result_path, filename))
    
    y_val_score = prediction['val_score'][prediction['val_score'].notna()]
    y_test_score = prediction['test_score'][prediction['test_score'].notna()]
    
    y_val_score_white = prediction['val_1_score'][prediction['val_1_score'].notna()]
    y_test_score_white = prediction['test_1_score'][prediction['test_1_score'].notna()]
    
    y_val_score_black = prediction['val_2_score'][prediction['val_2_score'].notna()]
    y_test_score_black = prediction['test_2_score'][prediction['test_2_score'].notna()]
    
    threshold, ba_val, ba_test = balance_accuracy (y_val, y_val_score,y_test, y_test_score)
    auroc = roc_auc_score(y_test, y_test_score)
    precision, recall, tpr, tnr, pd_overall = thres.calculate_precision_metrics(y_test, y_test_score,threshold)
    
    threshold_white, ba_val_white, ba_test_white = balance_accuracy (y_val_white, y_val_score_white,y_test_white, y_test_score_white)
    precision_white, recall_white, tpr_white, tnr_white, pd_white = thres.calculate_precision_metrics(y_test_white, y_test_score_white,threshold_white)
    
    threshold_black, ba_val_black, ba_test_black = balance_accuracy (y_val_black, y_val_score_black, y_test_black, y_test_score_black)
    precision_black, recall_black, tpr_black, tnr_black, pd_black = thres.calculate_precision_metrics(y_test_black, y_test_score_black,threshold_black)

    eod = fair.get_EOD(y_test_white, y_test_score_white,threshold_white, y_test_black, y_test_score_black, threshold_black)
    sp = fair.get_SP(y_test_white, y_test_score_white,threshold_white, y_test_black, y_test_score_black, threshold_black)

    records.append({
        'auroc': auroc,
        'overall threshold': threshold,
        'white threshold': threshold_white,
        'black threshold': threshold_black,
        'overall ba validation': ba_val,
        'overall ba test': ba_test,
        'white ba validation': ba_val_white,
        'white ba test': ba_test_white,
        'black ba validation': ba_val_black,
        'black ba test': ba_test_black,
        'overall precision':precision,
        'overall recall':recall,
        'overall tpr':tpr,
        'overall tnr':tnr,
        'overall pd':pd_overall,
        'white precision':precision_white,
        'white recall':recall_white,
        'white tpr':tpr_white,
        'white tnr':tnr_white,
        'white pd':pd_white,
        'black precision':precision_black,
        'black recall':recall_black,
        'black tpr':tpr_black,
        'black tnr':tnr_black,
        'black pd':pd_black,
        'eod': eod,
        'di': sp,
        })

In [16]:
def balance_accuracy (y_val, y_val_score,y_test, y_test_score):
    
    threshold, _ = thres.get_optimal_threshold_Jvalue (y_val, y_val_score)
    print ("Optimal threshold by J value is ",threshold)

    ba_val = thres.calculate_balanced_accuracy(y_val, y_val_score, threshold)
    print ("Balanced accuracy score of val is ", ba_val)

    ba_test = thres.calculate_balanced_accuracy(y_test, y_test_score, threshold)
    print ("Balanced accuracy score of test is ",ba_test)

    return threshold, ba_val, ba_test

In [17]:
def fairness_metrics (X, y, attribute, random_state):
    X_train, y_train, X_val, y_val, X_test, y_test, X_val_white, X_val_black, y_val_white, y_val_black, X_test_white, X_test_black, y_test_white, y_test_black \
        = fair.split_by_trait_balance_proportion(X, y, attribute, random_state)
    
    print("X train", X_train.shape[0])
    print("Y train", y_train.shape[0])
    print(X_val.shape[0], X_val_white.shape[0], X_val_black.shape[0])
    print(y_val.shape[0], y_val_white.shape[0], y_val_black.shape[0])
    print(X_test.shape[0], X_test_white.shape[0], X_test_black.shape[0])
    print(y_test.shape[0], y_test_white.shape[0], y_test_black.shape[0])

    max_abs_scaler = preprocessing.MaxAbsScaler()
    X_train_scaled = max_abs_scaler.fit_transform(X_train)
    X_test_scaled = max_abs_scaler.transform(X_test)
    X_test_white_scaled = max_abs_scaler.transform(X_test_white)
    X_test_black_scaled = max_abs_scaler.transform(X_test_black)
    X_val_scaled = max_abs_scaler.transform(X_val)
    X_val_white_scaled = max_abs_scaler.transform(X_val_white)
    X_val_black_scaled = max_abs_scaler.transform(X_val_black)

    characteristic = attribute + "resample-by-proportion" + str(random_state)
    save_prediction ("logic_regression", characteristic, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_white_scaled, y_val_white, X_test_white_scaled, y_test_white, X_val_black_scaled, y_val_black, X_test_black_scaled, y_test_black)
    save_prediction ("random_forest", characteristic, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_white_scaled, y_val_white, X_test_white_scaled, y_test_white, X_val_black_scaled, y_val_black, X_test_black_scaled, y_test_black)
    save_prediction ("decision_tree", characteristic, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_white_scaled, y_val_white, X_test_white_scaled, y_test_white, X_val_black_scaled, y_val_black, X_test_black_scaled, y_test_black)
    save_prediction ("gradiant_boosting", characteristic, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_white_scaled, y_val_white, X_test_white_scaled, y_test_white, X_val_black_scaled, y_val_black, X_test_black_scaled, y_test_black)

    get_result ("logic_regression", characteristic, records_lr, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_white_scaled, y_val_white, X_test_white_scaled, y_test_white, X_val_black_scaled, y_val_black, X_test_black_scaled, y_test_black)
    get_result ("random_forest", characteristic, records_rf, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_white_scaled, y_val_white, X_test_white_scaled, y_test_white, X_val_black_scaled, y_val_black, X_test_black_scaled, y_test_black)
    get_result ("decision_tree", characteristic, records_dt, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_white_scaled, y_val_white, X_test_white_scaled, y_test_white, X_val_black_scaled, y_val_black, X_test_black_scaled, y_test_black)
    get_result ("gradiant_boosting", characteristic, records_gbt, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_white_scaled, y_val_white, X_test_white_scaled, y_test_white, X_val_black_scaled, y_val_black, X_test_black_scaled, y_test_black)

In [18]:
records_lr = []
records_rf = []
records_dt = []
records_gbt = []
for random_state in range(10):
    fairness_metrics (X, y, "Race_W", random_state)

result_lr = pd.DataFrame(records_lr)
result_rf = pd.DataFrame(records_rf)
result_dt = pd.DataFrame(records_dt)
result_gbt = pd.DataFrame(records_gbt)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train ['Class'] = y_train


(9055, 88)
(56639, 88)
0.09307098020280058 0.0988475865280149
0.0987445678416224
(65741, 87)
X train 65741
Y train 65741
21898 18899 2999
21898 18899 2999
21898 18968 2930
21898 18968 2930


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2604923064941778
0.26193884229301617
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19904
           1       0.44      0.03      0.06      1994

    accuracy                           0.91     21898
   macro avg       0.68      0.51      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19825    79]
 [ 1931    63]]
done in 1.231599s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2604923064941778
0.2624918992182111
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19934
           1       0.42      0.03      0.06      1964

    accuracy                           0.91     21898
   macro avg       0.67      0.51      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19845    89]
 [ 1899    65]]
done in 1.164268s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2604923064941778
0.2627803492598507
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17180
           1       0.45      0.03      0.06      1719

    accuracy                           0.91     18899
   macro avg       0.68      0.51      0.50     18899
weighted avg       0.87      0.91      0.87     18899

Confusion_matrix
[[17118    62]
 [ 1668    51]]
done in 1.814597s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2604923064941778
0.26294153278235627
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17260
           1       0.42      0.03      0.06      1708

    accuracy                           0.91     18968
   macro avg       0.67      0.51      0.50     18968
weighted avg       0.87      0.91      0.87     18968

Confusion_matrix
[[17188    72]
 [ 1656    52]]
done in 1.525741s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2604923064941778
0.256635861243931
Classification report
              precision    recall  f1-score   support

           0       0.91      0.99      0.95      2724
           1       0.41      0.04      0.08       275

    accuracy                           0.91      2999
   macro avg       0.66      0.52      0.51      2999
weighted avg       0.87      0.91      0.87      2999

Confusion_matrix
[[2707   17]
 [ 263   12]]
done in 1.552738s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2604923064941778
0.2595810973599497
Classification report
              precision    recall  f1-score   support

           0       0.92      0.99      0.95      2674
           1       0.43      0.05      0.09       256

    accuracy                           0.91      2930
   macro avg       0.67      0.52      0.52      2930
weighted avg       0.87      0.91      0.88      2930

Confusion_matrix
[[2657   17]
 [ 243   13]]
done in 1.582875s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19904
           1       0.60      0.00      0.01      1994

    accuracy                           0.91     21898
   macro avg       0.75      0.50      0.48     21898
weighted avg       0.88      0.91      0.87     21898

Confusion_matrix
[[19898     6]
 [ 1985     9]]
done in 22.704571s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19934
    

  _warn_prf(average, modifier, msg_start, len(result))


0.2660099341853185
0.277763224668282
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17260
           1       0.00      0.00      0.00      1708

    accuracy                           0.91     18968
   macro avg       0.45      0.50      0.48     18968
weighted avg       0.83      0.91      0.87     18968

Confusion_matrix
[[17257     3]
 [ 1708     0]]
done in 0.696257s
0.2660099341853185
0.2786664340137434
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95      2724
           1       0.00      0.00      0.00       275

    accuracy                           0.91      2999
   macro avg       0.45      0.50      0.48      2999
weighted avg       0.82      0.91      0.86      2999

Confusion_matrix
[[2723    1]
 [ 275    0]]
done in 0.709711s
0.2660099341853185
0.270704920430903
Classification report
              precision    recall  f1-sc

  _warn_prf(average, modifier, msg_start, len(result))


Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19904
           1       0.41      0.02      0.03      1994

    accuracy                           0.91     21898
   macro avg       0.66      0.51      0.49     21898
weighted avg       0.86      0.91      0.87     21898

Confusion_matrix
[[19855    49]
 [ 1960    34]]
done in 37.624756s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19934
           1       0.45      0.02      0.04      1964

    accuracy                           0.91     21898
   macro avg       0.68      0.51      0.50     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19882    52]
 [ 1922    42]]
done in 39.034406s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17180
           1       0.43      0.02    

threshold:0.6000000000000001, J-value:0.003
threshold:0.7000000000000001, J-value:0.0
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.7146670845125459
Balanced accuracy score of test is  0.6984215103893884
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.45199999999999996
threshold:0.2, J-value:0.33799999999999997
threshold:0.30000000000000004, J-value:0.163
threshold:0.4, J-value:0.07200000000000001
threshold:0.5, J-value:0.013999999999999999
threshold:0.6000000000000001, J-value:0.0
threshold:0.7000000000000001, J-value:0.0
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.7263903350687491
Balanced accuracy score of test is  0.6898592347606582
True positive rate of class 1 is  0.675
True positive rate of class 2 is  0.652
Positive prediction rate of class 1 is  0.314
Positive prediction rate of class 2 is  0.306


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train ['Class'] = y_train


(8896, 88)
(56798, 88)
0.09772951628825272 0.10052315442743655
0.10044422507403751
(65716, 87)
X train 65716
Y train 65716
21898 18825 3073
21898 18825 3073
21898 18883 3015
21898 18883 3015


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26278023499042535
0.2560416876620835
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.95     19980
           1       0.47      0.04      0.07      1918

    accuracy                           0.91     21898
   macro avg       0.69      0.52      0.51     21898
weighted avg       0.88      0.91      0.88     21898

Confusion_matrix
[[19900    80]
 [ 1846    72]]
done in 1.289603s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26278023499042535
0.25784192743203266
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19972
           1       0.44      0.03      0.06      1926

    accuracy                           0.91     21898
   macro avg       0.68      0.52      0.51     21898
weighted avg       0.87      0.91      0.88     21898

Confusion_matrix
[[19888    84]
 [ 1860    66]]
done in 1.442554s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26278023499042535
0.2555892412007264
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17178
           1       0.45      0.03      0.06      1647

    accuracy                           0.91     18825
   macro avg       0.68      0.52      0.51     18825
weighted avg       0.87      0.91      0.88     18825

Confusion_matrix
[[17110    68]
 [ 1591    56]]
done in 1.432050s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26278023499042535
0.26065060756239294
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17196
           1       0.45      0.03      0.06      1687

    accuracy                           0.91     18883
   macro avg       0.68      0.52      0.51     18883
weighted avg       0.87      0.91      0.87     18883

Confusion_matrix
[[17125    71]
 [ 1629    58]]
done in 1.196534s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26278023499042535
0.2588133455322581
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.95      2802
           1       0.57      0.06      0.11       271

    accuracy                           0.91      3073
   macro avg       0.74      0.53      0.53      3073
weighted avg       0.89      0.91      0.88      3073

Confusion_matrix
[[2790   12]
 [ 255   16]]
done in 1.112309s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26278023499042535
0.24025111253929854
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96      2776
           1       0.38      0.03      0.06       239

    accuracy                           0.92      3015
   macro avg       0.65      0.51      0.51      3015
weighted avg       0.88      0.92      0.89      3015

Confusion_matrix
[[2763   13]
 [ 231    8]]
done in 1.560827s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19980
           1       0.78      0.00      0.01      1918

    accuracy                           0.91     21898
   macro avg       0.85      0.50      0.48     21898
weighted avg       0.90      0.91      0.87     21898

Confusion_matrix
[[19978     2]
 [ 1911     7]]
done in 21.937592s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19972
  

  _warn_prf(average, modifier, msg_start, len(result))


Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96      2776
           1       0.00      0.00      0.00       239

    accuracy                           0.92      3015
   macro avg       0.46      0.50      0.48      3015
weighted avg       0.85      0.92      0.88      3015

Confusion_matrix
[[2776    0]
 [ 239    0]]
done in 21.785367s


  _warn_prf(average, modifier, msg_start, len(result))


0.267873312153786
0.26841213982862927
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19980
           1       0.40      0.00      0.00      1918

    accuracy                           0.91     21898
   macro avg       0.66      0.50      0.48     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19977     3]
 [ 1916     2]]
done in 0.683696s
0.267873312153786
0.2693590617881709
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19972
           1       0.14      0.00      0.00      1926

    accuracy                           0.91     21898
   macro avg       0.53      0.50      0.48     21898
weighted avg       0.84      0.91      0.87     21898

Confusion_matrix
[[19966     6]
 [ 1925     1]]
done in 0.679691s
0.267873312153786
0.2653321701780912
Classification report
              precision    recall  f

Balanced accuracy score of val is  0.7035927157986783
Balanced accuracy score of test is  0.7088817177721776
True positive rate of class 1 is  0.743
True positive rate of class 2 is  0.753
Positive prediction rate of class 1 is  0.382
Positive prediction rate of class 2 is  0.368
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.35800000000000004
threshold:0.2, J-value:0.21799999999999997
threshold:0.30000000000000004, J-value:0.10700000000000001
threshold:0.4, J-value:0.001
threshold:0.5, J-value:0.001
threshold:0.6000000000000001, J-value:0.001
threshold:0.7000000000000001, J-value:0.001
threshold:0.8, J-value:0.001
threshold:0.9, J-value:0.001
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6794684152348386
Balanced accuracy score of test is  0.6874879244233724
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.35100000000000003
threshold:0.2, J-value:0.22399999999999998
threshold:0.30000000000000004, J-value:0.11500000000000002
threshold:0.4, J-value:0.

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train ['Class'] = y_train


0.09624096975633648 0.09822707389772771
0.09820007346638912
(65710, 87)
X train 65710
Y train 65710
21898 18936 2962
21898 18936 2962
21898 18829 3069
21898 18829 3069


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26003082919346904
0.2600490790326492
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19950
           1       0.47      0.03      0.06      1948

    accuracy                           0.91     21898
   macro avg       0.69      0.52      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19872    78]
 [ 1880    68]]
done in 1.181340s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26003082919346904
0.26235407891595325
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19883
           1       0.52      0.03      0.06      2015

    accuracy                           0.91     21898
   macro avg       0.72      0.51      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19823    60]
 [ 1950    65]]
done in 1.146051s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26003082919346904
0.262752981090605
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17239
           1       0.44      0.03      0.06      1697

    accuracy                           0.91     18936
   macro avg       0.68      0.51      0.50     18936
weighted avg       0.87      0.91      0.87     18936

Confusion_matrix
[[17174    65]
 [ 1645    52]]
done in 1.113778s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26003082919346904
0.2631951918283704
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17079
           1       0.54      0.03      0.06      1750

    accuracy                           0.91     18829
   macro avg       0.72      0.51      0.50     18829
weighted avg       0.87      0.91      0.87     18829

Confusion_matrix
[[17034    45]
 [ 1698    52]]
done in 1.140813s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26003082919346904
0.24276309342513688
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96      2711
           1       0.55      0.06      0.11       251

    accuracy                           0.92      2962
   macro avg       0.74      0.53      0.54      2962
weighted avg       0.89      0.92      0.88      2962

Confusion_matrix
[[2698   13]
 [ 235   16]]
done in 1.607748s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26003082919346904
0.2571936634620913
Classification report
              precision    recall  f1-score   support

           0       0.92      0.99      0.95      2804
           1       0.46      0.05      0.09       265

    accuracy                           0.91      3069
   macro avg       0.69      0.52      0.52      3069
weighted avg       0.88      0.91      0.88      3069

Confusion_matrix
[[2789   15]
 [ 252   13]]
done in 1.108017s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19950
           1       0.33      0.00      0.00      1948

    accuracy                           0.91     21898
   macro avg       0.62      0.50      0.48     21898
weighted avg       0.86      0.91      0.87     21898

Confusion_matrix
[[19942     8]
 [ 1944     4]]
done in 21.812113s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19883
   

  _warn_prf(average, modifier, msg_start, len(result))


0.26492006915398814
0.25148262740733723
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96      2711
           1       0.00      0.00      0.00       251

    accuracy                           0.92      2962
   macro avg       0.46      0.50      0.48      2962
weighted avg       0.84      0.92      0.87      2962

Confusion_matrix
[[2711    0]
 [ 251    0]]
done in 0.598833s


  _warn_prf(average, modifier, msg_start, len(result))


0.26492006915398814
0.29744700761607434
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95      2804
           1       0.00      0.00      0.00       265

    accuracy                           0.91      3069
   macro avg       0.46      0.50      0.48      3069
weighted avg       0.83      0.91      0.87      3069

Confusion_matrix
[[2801    3]
 [ 265    0]]
done in 0.644059s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19950
           1       0.41      0.02      0.03      1948

    accuracy                           0.91     21898
   macro avg       0.66      0.51      0.49     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19901    49]
 [ 1914    34]]
done in 35.975742s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19883
  

threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.7053028104141257
Balanced accuracy score of test is  0.7155300692674977
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.3950000000000001
threshold:0.2, J-value:0.263
threshold:0.30000000000000004, J-value:0.13
threshold:0.4, J-value:0.046
threshold:0.5, J-value:0.012
threshold:0.6000000000000001, J-value:0.003
threshold:0.7000000000000001, J-value:0.0
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6977241651333741
Balanced accuracy score of test is  0.7176981924334814
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.511
threshold:0.2, J-value:0.3
threshold:0.30000000000000004, J-value:0.167
threshold:0.4, J-value:0.066
threshold:0.5, J-value:0.038
threshold:0.6000000000000001, J-value:0.011
threshold:0.7000000000000001, J-value:0.0
threshold:0.8, J-value:0.0
threshold:0.9, 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train ['Class'] = y_train


(9002, 88)
(56692, 88)
0.09566699123661149 0.09817139315047264
0.0981012658227848
(65714, 87)
X train 65714
Y train 65714
21898 18932 2966
21898 18932 2966
21898 18882 3016
21898 18882 3016


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2594885582453179
0.26296014904277215
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19908
           1       0.46      0.04      0.07      1990

    accuracy                           0.91     21898
   macro avg       0.68      0.52      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19820    88]
 [ 1916    74]]
done in 1.177897s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2594885582453179
0.2608636681910417
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19918
           1       0.44      0.03      0.06      1980

    accuracy                           0.91     21898
   macro avg       0.68      0.52      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19833    85]
 [ 1912    68]]
done in 1.106302s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2594885582453179
0.2631469728160943
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17204
           1       0.47      0.04      0.07      1728

    accuracy                           0.91     18932
   macro avg       0.69      0.52      0.51     18932
weighted avg       0.87      0.91      0.87     18932

Confusion_matrix
[[17136    68]
 [ 1667    61]]
done in 1.188234s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2594885582453179
0.2632120757564372
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17156
           1       0.42      0.03      0.05      1726

    accuracy                           0.91     18882
   macro avg       0.66      0.51      0.50     18882
weighted avg       0.87      0.91      0.87     18882

Confusion_matrix
[[17086    70]
 [ 1676    50]]
done in 1.564604s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2594885582453179
0.26176765151191084
Classification report
              precision    recall  f1-score   support

           0       0.92      0.99      0.95      2704
           1       0.39      0.05      0.09       262

    accuracy                           0.91      2966
   macro avg       0.65      0.52      0.52      2966
weighted avg       0.87      0.91      0.88      2966

Confusion_matrix
[[2684   20]
 [ 249   13]]
done in 1.089544s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2594885582453179
0.24616120411617515
Classification report
              precision    recall  f1-score   support

           0       0.92      0.99      0.96      2762
           1       0.55      0.07      0.13       254

    accuracy                           0.92      3016
   macro avg       0.73      0.53      0.54      3016
weighted avg       0.89      0.92      0.89      3016

Confusion_matrix
[[2747   15]
 [ 236   18]]
done in 1.144924s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19908
           1       0.53      0.00      0.01      1990

    accuracy                           0.91     21898
   macro avg       0.72      0.50      0.48     21898
weighted avg       0.88      0.91      0.87     21898

Confusion_matrix
[[19901     7]
 [ 1982     8]]
done in 21.620444s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19918
   

  _warn_prf(average, modifier, msg_start, len(result))


Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96      2762
           1       0.00      0.00      0.00       254

    accuracy                           0.92      3016
   macro avg       0.46      0.50      0.48      3016
weighted avg       0.84      0.92      0.88      3016

Confusion_matrix
[[2760    2]
 [ 254    0]]
done in 22.550440s
0.2649198813059806
0.2711701793277212
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19908
           1       0.00      0.00      0.00      1990

    accuracy                           0.91     21898
   macro avg       0.45      0.50      0.48     21898
weighted avg       0.83      0.91      0.87     21898

Confusion_matrix
[[19905     3]
 [ 1990     0]]
done in 0.701912s
0.2649198813059806
0.27020473121508737
Classification report
              precision    recall  f1-score   support

           0       

  _warn_prf(average, modifier, msg_start, len(result))


0.2649198813059806
0.25836094684665445
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96      2762
           1       0.00      0.00      0.00       254

    accuracy                           0.92      3016
   macro avg       0.46      0.50      0.48      3016
weighted avg       0.84      0.92      0.88      3016

Confusion_matrix
[[2760    2]
 [ 254    0]]
done in 0.750100s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19908
           1       0.45      0.02      0.03      1990

    accuracy                           0.91     21898
   macro avg       0.68      0.51      0.49     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19865    43]
 [ 1955    35]]
done in 37.408461s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19918
   

Balanced accuracy score of test is  0.7110860842585915
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.404
threshold:0.2, J-value:0.27799999999999997
threshold:0.30000000000000004, J-value:0.14200000000000002
threshold:0.4, J-value:0.056
threshold:0.5, J-value:0.015000000000000001
threshold:0.6000000000000001, J-value:0.003
threshold:0.7000000000000001, J-value:0.001
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.7019873715845583
Balanced accuracy score of test is  0.7088171808720305
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.397
threshold:0.2, J-value:0.27
threshold:0.30000000000000004, J-value:0.134
threshold:0.4, J-value:0.05800000000000001
threshold:0.5, J-value:0.025
threshold:0.6000000000000001, J-value:0.0
threshold:0.7000000000000001, J-value:0.0
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6983038980983

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train ['Class'] = y_train


(9072, 88)
(56622, 88)
0.09936984973339796 0.10088853459840959
0.10082404265632573
(65706, 87)
X train 65706
Y train 65706
21898 18875 3023
21898 18875 3023
21898 19009 2889
21898 19009 2889


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26344522424062633
0.25475182747424246
Classification report
              precision    recall  f1-score   support

           0       0.92      0.99      0.95     20006
           1       0.38      0.03      0.06      1892

    accuracy                           0.91     21898
   macro avg       0.65      0.51      0.51     21898
weighted avg       0.87      0.91      0.88     21898

Confusion_matrix
[[19905   101]
 [ 1829    63]]
done in 1.353804s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26344522424062633
0.25633865636730396
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.95     19975
           1       0.47      0.04      0.08      1923

    accuracy                           0.91     21898
   macro avg       0.69      0.52      0.52     21898
weighted avg       0.88      0.91      0.88     21898

Confusion_matrix
[[19886    89]
 [ 1843    80]]
done in 1.336801s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26344522424062633
0.2557042936416006
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.95     17241
           1       0.38      0.03      0.06      1634

    accuracy                           0.91     18875
   macro avg       0.65      0.51      0.50     18875
weighted avg       0.87      0.91      0.88     18875

Confusion_matrix
[[17162    79]
 [ 1585    49]]
done in 1.374882s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26344522424062633
0.2597323527117874
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17310
           1       0.48      0.04      0.07      1699

    accuracy                           0.91     19009
   macro avg       0.70      0.52      0.51     19009
weighted avg       0.87      0.91      0.87     19009

Confusion_matrix
[[17245    65]
 [ 1638    61]]
done in 1.322292s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26344522424062633
0.24880482155003328
Classification report
              precision    recall  f1-score   support

           0       0.92      0.99      0.95      2765
           1       0.39      0.05      0.10       258

    accuracy                           0.91      3023
   macro avg       0.65      0.52      0.52      3023
weighted avg       0.87      0.91      0.88      3023

Confusion_matrix
[[2743   22]
 [ 244   14]]
done in 1.264627s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26344522424062633
0.23400886273203747
Classification report
              precision    recall  f1-score   support

           0       0.93      0.99      0.96      2665
           1       0.44      0.08      0.14       224

    accuracy                           0.92      2889
   macro avg       0.68      0.54      0.55      2889
weighted avg       0.89      0.92      0.90      2889

Confusion_matrix
[[2641   24]
 [ 205   19]]
done in 1.414232s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     20006
           1       0.50      0.00      0.00      1892

    accuracy                           0.91     21898
   macro avg       0.71      0.50      0.48     21898
weighted avg       0.88      0.91      0.87     21898

Confusion_matrix
[[20003     3]
 [ 1889     3]]
done in 24.664388s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19975
  

  _warn_prf(average, modifier, msg_start, len(result))


Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96      2665
           1       1.00      0.01      0.02       224

    accuracy                           0.92      2889
   macro avg       0.96      0.50      0.49      2889
weighted avg       0.93      0.92      0.89      2889

Confusion_matrix
[[2665    0]
 [ 222    2]]
done in 32.961406s
0.26876779382967964
0.26675807524478196
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     20006
           1       0.50      0.00      0.01      1892

    accuracy                           0.91     21898
   macro avg       0.71      0.50      0.48     21898
weighted avg       0.88      0.91      0.87     21898

Confusion_matrix
[[19997     9]
 [ 1883     9]]
done in 1.037151s
0.26876779382967964
0.2668530991218994
Classification report
              precision    recall  f1-score   support

           0     

Balanced accuracy score of test is  0.6935820132752164
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.429
threshold:0.2, J-value:0.272
threshold:0.30000000000000004, J-value:0.12
threshold:0.4, J-value:0.031000000000000003
threshold:0.5, J-value:0.0
threshold:0.6000000000000001, J-value:0.0
threshold:0.7000000000000001, J-value:0.0
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.7145920069529137
Balanced accuracy score of test is  0.7151643326186009
True positive rate of class 1 is  0.737
True positive rate of class 2 is  0.79
Positive prediction rate of class 1 is  0.384
Positive prediction rate of class 2 is  0.393
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.35500000000000004
threshold:0.2, J-value:0.216
threshold:0.30000000000000004, J-value:0.053000000000000005
threshold:0.4, J-value:0.049999999999999996
threshold:0.5, J-value:0.004
threshold:0.6000000000000001, J-value:0.005
threshold:0.70

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train ['Class'] = y_train


0.09865796155241205
(65731, 87)
X train 65731
Y train 65731
21898 18970 2928
21898 18970 2928
21898 18892 3006
21898 18892 3006


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2598358182856293
0.2641010244032931
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19911
           1       0.43      0.03      0.06      1987

    accuracy                           0.91     21898
   macro avg       0.67      0.52      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19820    91]
 [ 1918    69]]
done in 1.200746s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2598358182856293
0.2609736692584873
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19927
           1       0.44      0.03      0.06      1971

    accuracy                           0.91     21898
   macro avg       0.68      0.51      0.50     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19850    77]
 [ 1911    60]]
done in 1.194408s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2598358182856293
0.2660176601109082
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17229
           1       0.42      0.03      0.06      1741

    accuracy                           0.91     18970
   macro avg       0.66      0.51      0.50     18970
weighted avg       0.87      0.91      0.87     18970

Confusion_matrix
[[17152    77]
 [ 1686    55]]
done in 1.225098s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2598358182856293
0.2606531919068507
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17198
           1       0.44      0.03      0.05      1694

    accuracy                           0.91     18892
   macro avg       0.68      0.51      0.50     18892
weighted avg       0.87      0.91      0.87     18892

Confusion_matrix
[[17143    55]
 [ 1651    43]]
done in 1.343849s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2598358182856293
0.25168347680306796
Classification report
              precision    recall  f1-score   support

           0       0.92      0.99      0.96      2682
           1       0.50      0.06      0.10       246

    accuracy                           0.92      2928
   macro avg       0.71      0.53      0.53      2928
weighted avg       0.88      0.92      0.88      2928

Confusion_matrix
[[2668   14]
 [ 232   14]]
done in 1.443593s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2598358182856293
0.2629877937186066
Classification report
              precision    recall  f1-score   support

           0       0.91      0.99      0.95      2729
           1       0.44      0.06      0.11       277

    accuracy                           0.91      3006
   macro avg       0.67      0.53      0.53      3006
weighted avg       0.87      0.91      0.87      3006

Confusion_matrix
[[2707   22]
 [ 260   17]]
done in 1.112765s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19911
           1       0.36      0.00      0.00      1987

    accuracy                           0.91     21898
   macro avg       0.63      0.50      0.48     21898
weighted avg       0.86      0.91      0.87     21898

Confusion_matrix
[[19902     9]
 [ 1982     5]]
done in 22.579595s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19927
    

  _warn_prf(average, modifier, msg_start, len(result))


0.2655504065465148
0.27059828342160114
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19911
           1       0.32      0.00      0.01      1987

    accuracy                           0.91     21898
   macro avg       0.61      0.50      0.48     21898
weighted avg       0.86      0.91      0.87     21898

Confusion_matrix
[[19898    13]
 [ 1981     6]]
done in 0.659331s
0.2655504065465148
0.268983489796189
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19927
           1       0.30      0.00      0.01      1971

    accuracy                           0.91     21898
   macro avg       0.61      0.50      0.48     21898
weighted avg       0.86      0.91      0.87     21898

Confusion_matrix
[[19913    14]
 [ 1965     6]]
done in 0.690035s
0.2655504065465148
0.27302170531286035
Classification report
              precision    recall

Balanced accuracy score of val is  0.6951992506502246
Balanced accuracy score of test is  0.7090086026142528
True positive rate of class 1 is  0.743
True positive rate of class 2 is  0.762
Positive prediction rate of class 1 is  0.375
Positive prediction rate of class 2 is  0.382
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.348
threshold:0.2, J-value:0.20400000000000001
threshold:0.30000000000000004, J-value:0.10600000000000001
threshold:0.4, J-value:0.004
threshold:0.5, J-value:0.002
threshold:0.6000000000000001, J-value:0.002
threshold:0.7000000000000001, J-value:0.0
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6740769195946623
Balanced accuracy score of test is  0.6700628399696437
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.347
threshold:0.2, J-value:0.20899999999999996
threshold:0.30000000000000004, J-value:0.10700000000000001
threshold:0.4, J-value:0.003
threshold:0.5, J-value:0.001
t

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train ['Class'] = y_train


(65724, 87)
X train 65724
Y train 65724
21898 18842 3056
21898 18842 3056
21898 18914 2984
21898 18914 2984


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.262571921734257
0.2575944814978284
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19944
           1       0.43      0.03      0.06      1954

    accuracy                           0.91     21898
   macro avg       0.67      0.51      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19863    81]
 [ 1892    62]]
done in 1.171351s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.262571921734257
0.2583033257535221
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19955
           1       0.48      0.04      0.07      1943

    accuracy                           0.91     21898
   macro avg       0.70      0.52      0.51     21898
weighted avg       0.88      0.91      0.87     21898

Confusion_matrix
[[19880    75]
 [ 1873    70]]
done in 1.138417s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.262571921734257
0.25828492523572344
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17147
           1       0.45      0.03      0.06      1695

    accuracy                           0.91     18842
   macro avg       0.68      0.51      0.51     18842
weighted avg       0.87      0.91      0.87     18842

Confusion_matrix
[[17080    67]
 [ 1641    54]]
done in 1.718944s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.262571921734257
0.2598675641915402
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17231
           1       0.44      0.03      0.06      1683

    accuracy                           0.91     18914
   macro avg       0.68      0.51      0.50     18914
weighted avg       0.87      0.91      0.87     18914

Confusion_matrix
[[17166    65]
 [ 1632    51]]
done in 1.143627s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.262571921734257
0.2533374982159503
Classification report
              precision    recall  f1-score   support

           0       0.92      0.99      0.95      2797
           1       0.36      0.03      0.06       259

    accuracy                           0.91      3056
   macro avg       0.64      0.51      0.51      3056
weighted avg       0.87      0.91      0.88      3056

Confusion_matrix
[[2783   14]
 [ 251    8]]
done in 1.101697s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.262571921734257
0.24838844444766595
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96      2724
           1       0.66      0.07      0.13       260

    accuracy                           0.92      2984
   macro avg       0.79      0.53      0.54      2984
weighted avg       0.90      0.92      0.88      2984

Confusion_matrix
[[2714   10]
 [ 241   19]]
done in 1.188740s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19944
           1       0.60      0.00      0.01      1954

    accuracy                           0.91     21898
   macro avg       0.76      0.50      0.48     21898
weighted avg       0.88      0.91      0.87     21898

Confusion_matrix
[[19940     4]
 [ 1948     6]]
done in 22.040329s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19955
    

threshold:0.2, J-value:0.26899999999999996
threshold:0.30000000000000004, J-value:0.142
threshold:0.4, J-value:0.069
threshold:0.5, J-value:0.028
threshold:0.6000000000000001, J-value:0.012
threshold:0.7000000000000001, J-value:0.002
threshold:0.8, J-value:0.001
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.7070112076352169
Balanced accuracy score of test is  0.7048491891109088
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.41500000000000004
threshold:0.2, J-value:0.26899999999999996
threshold:0.30000000000000004, J-value:0.144
threshold:0.4, J-value:0.07100000000000001
threshold:0.5, J-value:0.028
threshold:0.6000000000000001, J-value:0.013000000000000001
threshold:0.7000000000000001, J-value:0.002
threshold:0.8, J-value:0.001
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.707709597024377
Balanced accuracy score of test is  0.7007828992316596
threshold:0.0, J-value:0.0
thr

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train ['Class'] = y_train


(8973, 88)
(56721, 88)
0.09320175438596491 0.0980099887723102
0.097953216374269
(65733, 87)
X train 65733
Y train 65733
21898 18920 2978
21898 18920 2978
21898 18865 3033
21898 18865 3033


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2595395326493107
0.26320790039113456
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19893
           1       0.42      0.03      0.06      2005

    accuracy                           0.91     21898
   macro avg       0.66      0.51      0.50     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19807    86]
 [ 1943    62]]
done in 1.154223s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2595395326493107
0.26226383268674364
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19907
           1       0.50      0.03      0.06      1991

    accuracy                           0.91     21898
   macro avg       0.70      0.51      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19840    67]
 [ 1925    66]]
done in 1.047240s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2595395326493107
0.2618165843439168
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17191
           1       0.43      0.03      0.06      1729

    accuracy                           0.91     18920
   macro avg       0.67      0.51      0.50     18920
weighted avg       0.87      0.91      0.87     18920

Confusion_matrix
[[17118    73]
 [ 1675    54]]
done in 1.542957s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2595395326493107
0.2643483883870174
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17135
           1       0.46      0.03      0.05      1730

    accuracy                           0.91     18865
   macro avg       0.69      0.51      0.50     18865
weighted avg       0.87      0.91      0.87     18865

Confusion_matrix
[[17082    53]
 [ 1684    46]]
done in 1.132809s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2595395326493107
0.27204728911288084
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95      2702
           1       0.38      0.03      0.05       276

    accuracy                           0.91      2978
   macro avg       0.65      0.51      0.50      2978
weighted avg       0.86      0.91      0.87      2978

Confusion_matrix
[[2689   13]
 [ 268    8]]
done in 1.085877s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2595395326493107
0.2492980749268803
Classification report
              precision    recall  f1-score   support

           0       0.92      0.99      0.96      2772
           1       0.59      0.08      0.14       261

    accuracy                           0.92      3033
   macro avg       0.75      0.54      0.55      3033
weighted avg       0.89      0.92      0.89      3033

Confusion_matrix
[[2758   14]
 [ 241   20]]
done in 1.006927s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19893
           1       0.60      0.00      0.01      2005

    accuracy                           0.91     21898
   macro avg       0.75      0.50      0.48     21898
weighted avg       0.88      0.91      0.87     21898

Confusion_matrix
[[19889     4]
 [ 1999     6]]
done in 19.975559s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19907
    

threshold:0.2, J-value:0.26699999999999996
threshold:0.30000000000000004, J-value:0.145
threshold:0.4, J-value:0.069
threshold:0.5, J-value:0.027
threshold:0.6000000000000001, J-value:0.006999999999999999
threshold:0.7000000000000001, J-value:0.004
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.7049561538269643
Balanced accuracy score of test is  0.6983989236539563
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.41200000000000003
threshold:0.2, J-value:0.27199999999999996
threshold:0.30000000000000004, J-value:0.147
threshold:0.4, J-value:0.067
threshold:0.5, J-value:0.027
threshold:0.6000000000000001, J-value:0.008
threshold:0.7000000000000001, J-value:0.005
threshold:0.8, J-value:0.001
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.7062568450228456
Balanced accuracy score of test is  0.6983336172624399
threshold:0.0, J-value:0.0
threshold:0.1, J-v

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train ['Class'] = y_train


(9011, 88)
(56683, 88)
0.09316996239233288 0.09987193418193108
0.09984229042824215
(65749, 87)
X train 65749
Y train 65749
21898 18905 2993
21898 18905 2993
21898 18918 2980
21898 18918 2980


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26151932101491215
0.26203843910485985
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19931
           1       0.51      0.04      0.08      1967

    accuracy                           0.91     21898
   macro avg       0.71      0.52      0.51     21898
weighted avg       0.88      0.91      0.87     21898

Confusion_matrix
[[19853    78]
 [ 1886    81]]
done in 1.250330s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26151932101491215
0.25905267214653277
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19956
           1       0.46      0.04      0.07      1942

    accuracy                           0.91     21898
   macro avg       0.69      0.52      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19876    80]
 [ 1873    69]]
done in 1.074485s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26151932101491215
0.2604508455560751
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17217
           1       0.53      0.04      0.07      1688

    accuracy                           0.91     18905
   macro avg       0.72      0.52      0.51     18905
weighted avg       0.88      0.91      0.87     18905

Confusion_matrix
[[17157    60]
 [ 1620    68]]
done in 1.029674s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26151932101491215
0.2601346174858
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17231
           1       0.44      0.03      0.06      1687

    accuracy                           0.91     18918
   macro avg       0.68      0.51      0.50     18918
weighted avg       0.87      0.91      0.87     18918

Confusion_matrix
[[17166    65]
 [ 1636    51]]
done in 0.983116s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26151932101491215
0.27206632284684995
Classification report
              precision    recall  f1-score   support

           0       0.91      0.99      0.95      2714
           1       0.42      0.05      0.08       279

    accuracy                           0.91      2993
   macro avg       0.66      0.52      0.52      2993
weighted avg       0.86      0.91      0.87      2993

Confusion_matrix
[[2696   18]
 [ 266   13]]
done in 1.016968s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26151932101491215
0.25218413458671396
Classification report
              precision    recall  f1-score   support

           0       0.92      0.99      0.96      2725
           1       0.55      0.07      0.12       255

    accuracy                           0.92      2980
   macro avg       0.73      0.53      0.54      2980
weighted avg       0.89      0.92      0.88      2980

Confusion_matrix
[[2710   15]
 [ 237   18]]
done in 0.977593s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19931
           1       0.43      0.00      0.01      1967

    accuracy                           0.91     21898
   macro avg       0.67      0.50      0.48     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19923     8]
 [ 1961     6]]
done in 20.163188s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19956
  

threshold:0.2, J-value:0.256
threshold:0.30000000000000004, J-value:0.148
threshold:0.4, J-value:0.069
threshold:0.5, J-value:0.037000000000000005
threshold:0.6000000000000001, J-value:0.011
threshold:0.7000000000000001, J-value:0.003
threshold:0.8, J-value:0.001
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6980308041390484
Balanced accuracy score of test is  0.7009267711312983
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.395
threshold:0.2, J-value:0.24900000000000003
threshold:0.30000000000000004, J-value:0.149
threshold:0.4, J-value:0.068
threshold:0.5, J-value:0.037
threshold:0.6000000000000001, J-value:0.011
threshold:0.7000000000000001, J-value:0.002
threshold:0.8, J-value:0.001
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6976362259884765
Balanced accuracy score of test is  0.7008078483875627
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.402
threshold:0.2, 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train ['Class'] = y_train


(8852, 88)
(56842, 88)
0.09297444128904803 0.09886328487472935
0.09877762686751451
(65741, 87)
X train 65741
Y train 65741
21898 18847 3051
21898 18847 3051
21898 18817 3081
21898 18817 3081


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2617168716935776
0.25522205244473595
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19973
           1       0.51      0.04      0.07      1925

    accuracy                           0.91     21898
   macro avg       0.71      0.52      0.51     21898
weighted avg       0.88      0.91      0.88     21898

Confusion_matrix
[[19905    68]
 [ 1855    70]]
done in 1.157667s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2617168716935776
0.26502121084684266
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19866
           1       0.44      0.03      0.06      2032

    accuracy                           0.91     21898
   macro avg       0.67      0.51      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19782    84]
 [ 1966    66]]
done in 1.027222s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2617168716935776
0.2551576320881057
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17189
           1       0.49      0.03      0.06      1658

    accuracy                           0.91     18847
   macro avg       0.70      0.51      0.51     18847
weighted avg       0.88      0.91      0.88     18847

Confusion_matrix
[[17132    57]
 [ 1603    55]]
done in 1.078338s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2617168716935776
0.26590495315534435
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17067
           1       0.42      0.03      0.05      1750

    accuracy                           0.91     18817
   macro avg       0.66      0.51      0.50     18817
weighted avg       0.86      0.91      0.87     18817

Confusion_matrix
[[17002    65]
 [ 1703    47]]
done in 1.036468s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2617168716935776
0.2556199975320547
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.95      2784
           1       0.58      0.06      0.10       267

    accuracy                           0.91      3051
   macro avg       0.75      0.53      0.53      3051
weighted avg       0.89      0.91      0.88      3051

Confusion_matrix
[[2773   11]
 [ 252   15]]
done in 1.032419s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2617168716935776
0.2596238142161781
Classification report
              precision    recall  f1-score   support

           0       0.91      0.99      0.95      2799
           1       0.50      0.07      0.12       282

    accuracy                           0.91      3081
   macro avg       0.71      0.53      0.54      3081
weighted avg       0.88      0.91      0.88      3081

Confusion_matrix
[[2780   19]
 [ 263   19]]
done in 1.054105s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19973
           1       0.38      0.00      0.01      1925

    accuracy                           0.91     21898
   macro avg       0.65      0.50      0.48     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19965     8]
 [ 1920     5]]
done in 19.901672s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19866
    

threshold:0.30000000000000004, J-value:0.156
threshold:0.4, J-value:0.078
threshold:0.5, J-value:0.032999999999999995
threshold:0.6000000000000001, J-value:0.011
threshold:0.7000000000000001, J-value:0.003
threshold:0.8, J-value:0.001
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.7040347586124385
Balanced accuracy score of test is  0.7064896816544867
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.40700000000000003
threshold:0.2, J-value:0.262
threshold:0.30000000000000004, J-value:0.151
threshold:0.4, J-value:0.07300000000000001
threshold:0.5, J-value:0.030000000000000002
threshold:0.6000000000000001, J-value:0.012
threshold:0.7000000000000001, J-value:0.004
threshold:0.8, J-value:0.001
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.7039884261268725
Balanced accuracy score of test is  0.7059168654630071
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.40900000000000003
t

In [19]:
def add_mean_sd(records, result_table, overall_records, type):
    records.append({
        'auroc': result_table["auroc"].mean(),
        'overall threshold': result_table["overall threshold"].mean(),
        'white threshold': result_table["white threshold"].mean(),
        'black threshold': result_table["black threshold"].mean(),
        'overall ba validation': result_table["overall ba validation"].mean(),
        'overall ba test': result_table["overall ba test"].mean(),
        'white ba validation': result_table["white ba validation"].mean(),
        'white ba test': result_table["white ba test"].mean(),
        'black ba validation': result_table["black ba validation"].mean(),
        'black ba test': result_table["black ba test"].mean(),
        'overall precision':result_table["overall precision"].mean(),
        'overall recall':result_table["overall recall"].mean(),
        'overall tpr':result_table["overall tpr"].mean(),
        'overall tnr':result_table["overall tnr"].mean(),
        'overall pd':result_table["overall pd"].mean(),
        'white precision':result_table["white precision"].mean(),
        'white recall':result_table["white recall"].mean(),
        'white tpr':result_table["white tpr"].mean(),
        'white tnr':result_table["white tnr"].mean(),
        'white pd':result_table["white pd"].mean(),
        'black precision':result_table["black precision"].mean(),
        'black recall':result_table["black recall"].mean(),
        'black tpr':result_table["black tpr"].mean(),
        'black tnr':result_table["black tnr"].mean(),
        'black pd':result_table["black pd"].mean(),
        'eod': result_table["eod"].mean(),
        'di': result_table["di"].mean(),
        })
    records.append({
        'auroc': result_table["auroc"].std(),
        'overall threshold': result_table["overall threshold"].std(),
        'white threshold': result_table["white threshold"].std(),
        'black threshold': result_table["black threshold"].std(),
        'overall ba validation': result_table["overall ba validation"].std(),
        'overall ba test': result_table["overall ba test"].std(),
        'white ba validation': result_table["white ba validation"].std(),
        'white ba test': result_table["white ba test"].std(),
        'black ba validation': result_table["black ba validation"].std(),
        'black ba test': result_table["black ba test"].std(),
        'overall precision':result_table["overall precision"].std(),
        'overall recall':result_table["overall recall"].std(),
        'overall tpr':result_table["overall tpr"].std(),
        'overall tnr':result_table["overall tnr"].std(),
        'overall pd':result_table["overall pd"].std(),
        'white precision':result_table["white precision"].std(),
        'white recall':result_table["white recall"].std(),
        'white tpr':result_table["white tpr"].std(),
        'white tnr':result_table["white tnr"].std(),
        'white pd':result_table["white pd"].std(),
        'black precision':result_table["black precision"].std(),
        'black recall':result_table["black recall"].std(),
        'black tpr':result_table["black tpr"].std(),
        'black tnr':result_table["black tnr"].std(),
        'black pd':result_table["black pd"].std(),
        'eod': result_table["eod"].std(),
        'di': result_table["di"].std(),
        })
    overall_records.append({
        'type': type,
        'auroc': result_table["auroc"].mean(),
        'overall threshold': result_table["overall threshold"].mean(),
        'white threshold': result_table["white threshold"].mean(),
        'black threshold': result_table["black threshold"].mean(),
        'overall ba test': result_table["overall ba test"].mean(),
        'white ba test': result_table["white ba test"].mean(),
        'black ba test': result_table["black ba test"].mean(),
        'overall tpr':result_table["overall tpr"].mean(),
        'overall pd':result_table["overall pd"].mean(),
        'white tpr':result_table["white tpr"].mean(),
        'white pd':result_table["white pd"].mean(),
        'black tpr':result_table["black tpr"].mean(),
        'black pd':result_table["black pd"].mean(),
        'eod': result_table["eod"].mean(),
        'di': result_table["di"].mean(),
        })
    pd_result = pd.DataFrame(records)
    return pd_result, overall_records

In [20]:
overall_table = []
result_lr, overall_records = add_mean_sd (records_lr, result_lr, overall_table, 'lr')
result_rf, overall_records = add_mean_sd (records_rf, result_rf, overall_records, 'rf')
result_dt, overall_records = add_mean_sd (records_dt, result_dt, overall_records, 'dt')
result_gbt, overall_records = add_mean_sd (records_gbt, result_gbt, overall_records, 'gbt')

result_path='/Users/lifuchen/Desktop/research/resample_data/'
result_lr.to_csv(path.join(result_path,'race-lr-resample-proportion-result.csv'), index=False)
result_rf.to_csv(path.join(result_path,'race-rf-resample-proportion-result.csv'), index=False)
result_dt.to_csv(path.join(result_path,'race-dt-resample-proportion-result.csv'), index=False)
result_gbt.to_csv(path.join(result_path,'race-gbt-resample-proportion-result.csv'), index=False)

overall_result = pd.DataFrame(overall_table)
result_path='/Users/lifuchen/Desktop/research/resample_result/'
overall_result.to_csv(path.join(result_path,'race-resample-proportion.csv'), index=False)