In [1]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from os import path
import imblearn
from imblearn.over_sampling import RandomOverSampler, SMOTE
from sklearn.metrics import balanced_accuracy_score, roc_auc_score
import sklearn.preprocessing
from sklearn import preprocessing
from sklearn.preprocessing import Normalizer
import src.lib.utility_classfier as uclf
import src.lib.optimal_threhold_related as thres
import src.lib.fairness_tests as fair

In [2]:
data_path='/Users/lifuchen/Desktop/research/data.csv'
df = pd.read_csv(data_path)

In [3]:
y = df.Class.values
X = df.drop(['GRID','Class','Race_B'], axis=1)
X.shape

(109490, 86)

In [4]:
def save_prediction(classifier, characteristic, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_white_scaled, y_val_white, X_test_white_scaled, y_test_white, X_val_black_scaled, y_val_black, X_test_black_scaled, y_test_black):
    method_to_call = getattr(uclf, classifier)
    y_val_score = method_to_call(X_train_scaled, y_train,X_val_scaled, y_val)
    y_test_score = method_to_call(X_train_scaled, y_train,X_test_scaled, y_test)

    y_val_score_white = method_to_call(X_train_scaled, y_train, X_val_white_scaled, y_val_white)
    y_test_score_white = method_to_call(X_train_scaled, y_train,X_test_white_scaled, y_test_white)

    y_val_score_black = method_to_call(X_train_scaled, y_train, X_val_black_scaled, y_val_black)
    y_test_score_black = method_to_call(X_train_scaled, y_train,X_test_black_scaled, y_test_black)

    my_dict = dict(val_score = y_val_score, test_score = y_test_score, val_1_score = y_val_score_white, test_1_score = y_test_score_white, val_2_score = y_val_score_black, test_2_score = y_test_score_black)
    overall_prediction = pd.DataFrame.from_dict(my_dict, orient='index')
    overall_prediction = overall_prediction.transpose()

    result_path='/Users/lifuchen/Desktop/research/predictions/'
    filename = str(classifier) + str(characteristic) + "prediction.csv"
    overall_prediction.to_csv(path.join(result_path, filename), index=False)

In [5]:
def get_result (classifier, characteristic, records, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_white_scaled, y_val_white, X_test_white_scaled, y_test_white, X_val_black_scaled, y_val_black, X_test_black_scaled, y_test_black):
    result_path='/Users/lifuchen/Desktop/research/predictions/'
    filename = str(classifier) + characteristic + "prediction.csv"
    prediction = pd.read_csv(path.join(result_path, filename))
    
    y_val_score = prediction['val_score'][prediction['val_score'].notna()]
    y_test_score = prediction['test_score'][prediction['test_score'].notna()]
    
    y_val_score_white = prediction['val_1_score'][prediction['val_1_score'].notna()]
    y_test_score_white = prediction['test_1_score'][prediction['test_1_score'].notna()]
    
    y_val_score_black = prediction['val_2_score'][prediction['val_2_score'].notna()]
    y_test_score_black = prediction['test_2_score'][prediction['test_2_score'].notna()]
    
    threshold, ba_val, ba_test = balance_accuracy (y_val, y_val_score,y_test, y_test_score)
    auroc = roc_auc_score(y_test, y_test_score)
    precision, recall, tpr, tnr, pd_overall = thres.calculate_precision_metrics(y_test, y_test_score,threshold)
    
    threshold_white, ba_val_white, ba_test_white = balance_accuracy (y_val_white, y_val_score_white,y_test_white, y_test_score_white)
    precision_white, recall_white, tpr_white, tnr_white, pd_white = thres.calculate_precision_metrics(y_test_white, y_test_score_white,threshold_white)
    
    threshold_black, ba_val_black, ba_test_black = balance_accuracy (y_val_black, y_val_score_black, y_test_black, y_test_score_black)
    precision_black, recall_black, tpr_black, tnr_black, pd_black = thres.calculate_precision_metrics(y_test_black, y_test_score_black,threshold_black)

    eod = fair.get_EOD(y_test_white, y_test_score_white,threshold_white, y_test_black, y_test_score_black, threshold_black)
    sp = fair.get_SP(y_test_white, y_test_score_white,threshold_white, y_test_black, y_test_score_black, threshold_black)

    records.append({
        'auroc': auroc,
        'overall threshold': threshold,
        'white threshold': threshold_white,
        'black threshold': threshold_black,
        'overall ba validation': ba_val,
        'overall ba test': ba_test,
        'white ba validation': ba_val_white,
        'white ba test': ba_test_white,
        'black ba validation': ba_val_black,
        'black ba test': ba_test_black,
        'overall precision':precision,
        'overall recall':recall,
        'overall tpr':tpr,
        'overall tnr':tnr,
        'overall pd':pd_overall,
        'white precision':precision_white,
        'white recall':recall_white,
        'white tpr':tpr_white,
        'white tnr':tnr_white,
        'white pd':pd_white,
        'black precision':precision_black,
        'black recall':recall_black,
        'black tpr':tpr_black,
        'black tnr':tnr_black,
        'black pd':pd_black,
        'eod': eod,
        'di': sp,
        })

In [6]:
def balance_accuracy (y_val, y_val_score,y_test, y_test_score):
    
    threshold, _ = thres.get_optimal_threshold_Jvalue (y_val, y_val_score)
    print ("Optimal threshold by J value is ",threshold)

    ba_val = thres.calculate_balanced_accuracy(y_val, y_val_score, threshold)
    print ("Balanced accuracy score of val is ", ba_val)

    ba_test = thres.calculate_balanced_accuracy(y_test, y_test_score, threshold)
    print ("Balanced accuracy score of test is ",ba_test)

    return threshold, ba_val, ba_test

In [7]:
def fairness_metrics (X, y, attribute, random_state):
    X_train, y_train, X_val, y_val, X_test, y_test, X_val_white, X_val_black, y_val_white, y_val_black, X_test_white, X_test_black, y_test_white, y_test_black \
        = fair.split_by_trait_balance_proportion_no_protected_trait(X, y, attribute, random_state)
    
    print("X train", X_train.shape[0])
    print("Y train", y_train.shape[0])
    print(X_val.shape[0], X_val_white.shape[0], X_val_black.shape[0])
    print(y_val.shape[0], y_val_white.shape[0], y_val_black.shape[0])
    print(X_test.shape[0], X_test_white.shape[0], X_test_black.shape[0])
    print(y_test.shape[0], y_test_white.shape[0], y_test_black.shape[0])

    max_abs_scaler = preprocessing.MaxAbsScaler()
    X_train_scaled = max_abs_scaler.fit_transform(X_train)
    X_test_scaled = max_abs_scaler.transform(X_test)
    X_test_white_scaled = max_abs_scaler.transform(X_test_white)
    X_test_black_scaled = max_abs_scaler.transform(X_test_black)
    X_val_scaled = max_abs_scaler.transform(X_val)
    X_val_white_scaled = max_abs_scaler.transform(X_val_white)
    X_val_black_scaled = max_abs_scaler.transform(X_val_black)

    characteristic = attribute + "resample-by-proportion" + str(random_state)
    save_prediction ("logic_regression", characteristic, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_white_scaled, y_val_white, X_test_white_scaled, y_test_white, X_val_black_scaled, y_val_black, X_test_black_scaled, y_test_black)
    save_prediction ("random_forest", characteristic, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_white_scaled, y_val_white, X_test_white_scaled, y_test_white, X_val_black_scaled, y_val_black, X_test_black_scaled, y_test_black)
    save_prediction ("decision_tree", characteristic, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_white_scaled, y_val_white, X_test_white_scaled, y_test_white, X_val_black_scaled, y_val_black, X_test_black_scaled, y_test_black)
    save_prediction ("gradiant_boosting", characteristic, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_white_scaled, y_val_white, X_test_white_scaled, y_test_white, X_val_black_scaled, y_val_black, X_test_black_scaled, y_test_black)

    get_result ("logic_regression", characteristic, records_lr, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_white_scaled, y_val_white, X_test_white_scaled, y_test_white, X_val_black_scaled, y_val_black, X_test_black_scaled, y_test_black)
    get_result ("random_forest", characteristic, records_rf, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_white_scaled, y_val_white, X_test_white_scaled, y_test_white, X_val_black_scaled, y_val_black, X_test_black_scaled, y_test_black)
    get_result ("decision_tree", characteristic, records_dt, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_white_scaled, y_val_white, X_test_white_scaled, y_test_white, X_val_black_scaled, y_val_black, X_test_black_scaled, y_test_black)
    get_result ("gradiant_boosting", characteristic, records_gbt, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_white_scaled, y_val_white, X_test_white_scaled, y_test_white, X_val_black_scaled, y_val_black, X_test_black_scaled, y_test_black)

In [8]:
records_lr = []
records_rf = []
records_dt = []
records_gbt = []
for random_state in range(10):
    fairness_metrics (X, y, "Race_W", random_state)

result_lr = pd.DataFrame(records_lr)
result_rf = pd.DataFrame(records_rf)
result_dt = pd.DataFrame(records_dt)
result_gbt = pd.DataFrame(records_gbt)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train ['Class'] = y_train


(9055, 87)
(56639, 87)
0.09307098020280058 0.0988475865280149
0.0987445678416224
(65741, 86)
X train 65741
Y train 65741
21898 18899 2999
21898 18899 2999
21898 18968 2930
21898 18968 2930


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26052423943952174
0.2619197793527146
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19904
           1       0.46      0.03      0.06      1994

    accuracy                           0.91     21898
   macro avg       0.68      0.51      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19826    78]
 [ 1928    66]]
done in 0.633689s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26052423943952174
0.2624419486803707
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19934
           1       0.42      0.03      0.06      1964

    accuracy                           0.91     21898
   macro avg       0.67      0.51      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19846    88]
 [ 1900    64]]
done in 0.633162s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26052423943952174
0.26280953220326864
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17180
           1       0.47      0.03      0.06      1719

    accuracy                           0.91     18899
   macro avg       0.69      0.51      0.50     18899
weighted avg       0.87      0.91      0.87     18899

Confusion_matrix
[[17120    60]
 [ 1666    53]]
done in 0.600093s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26052423943952174
0.2629030196799557
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17260
           1       0.41      0.03      0.05      1708

    accuracy                           0.91     18968
   macro avg       0.66      0.51      0.50     18968
weighted avg       0.87      0.91      0.87     18968

Confusion_matrix
[[17188    72]
 [ 1658    50]]
done in 0.618986s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26052423943952174
0.25631276397338126
Classification report
              precision    recall  f1-score   support

           0       0.91      0.99      0.95      2724
           1       0.42      0.05      0.08       275

    accuracy                           0.91      2999
   macro avg       0.67      0.52      0.52      2999
weighted avg       0.87      0.91      0.87      2999

Confusion_matrix
[[2706   18]
 [ 262   13]]
done in 0.594428s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26052423943952174
0.25945710406599304
Classification report
              precision    recall  f1-score   support

           0       0.92      0.99      0.95      2674
           1       0.47      0.05      0.10       256

    accuracy                           0.91      2930
   macro avg       0.69      0.52      0.53      2930
weighted avg       0.88      0.91      0.88      2930

Confusion_matrix
[[2658   16]
 [ 242   14]]
done in 0.610357s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19904
           1       0.25      0.00      0.00      1994

    accuracy                           0.91     21898
   macro avg       0.58      0.50      0.48     21898
weighted avg       0.85      0.91      0.87     21898

Confusion_matrix
[[19892    12]
 [ 1990     4]]
done in 18.857104s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19934
  

  _warn_prf(average, modifier, msg_start, len(result))


0.2659754894839782
0.2783021940103029
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17260
           1       0.00      0.00      0.00      1708

    accuracy                           0.91     18968
   macro avg       0.45      0.50      0.48     18968
weighted avg       0.83      0.91      0.87     18968

Confusion_matrix
[[17257     3]
 [ 1708     0]]
done in 0.592916s
0.2659754894839782
0.278556531900902
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95      2724
           1       0.00      0.00      0.00       275

    accuracy                           0.91      2999
   macro avg       0.45      0.50      0.48      2999
weighted avg       0.82      0.91      0.86      2999

Confusion_matrix
[[2723    1]
 [ 275    0]]
done in 0.561499s
0.2659754894839782
0.26937169011682616
Classification report
              precision    recall  f1-

  _warn_prf(average, modifier, msg_start, len(result))


Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19904
           1       0.41      0.02      0.03      1994

    accuracy                           0.91     21898
   macro avg       0.66      0.51      0.49     21898
weighted avg       0.86      0.91      0.87     21898

Confusion_matrix
[[19860    44]
 [ 1963    31]]
done in 32.414456s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19934
           1       0.44      0.02      0.04      1964

    accuracy                           0.91     21898
   macro avg       0.68      0.51      0.50     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19880    54]
 [ 1921    43]]
done in 32.726181s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17180
           1       0.45      0.02    

threshold:0.6000000000000001, J-value:0.003
threshold:0.7000000000000001, J-value:0.001
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.714171645940292
Balanced accuracy score of test is  0.698445933660967
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.45199999999999996
threshold:0.2, J-value:0.32599999999999996
threshold:0.30000000000000004, J-value:0.167
threshold:0.4, J-value:0.059000000000000004
threshold:0.5, J-value:0.008
threshold:0.6000000000000001, J-value:0.003
threshold:0.7000000000000001, J-value:0.004
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.7260058737151248
Balanced accuracy score of test is  0.6966739902767389
True positive rate of class 1 is  0.68
True positive rate of class 2 is  0.656
Positive prediction rate of class 1 is  0.319
Positive prediction rate of class 2 is  0.297


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train ['Class'] = y_train


(8896, 87)
(56798, 87)
0.09772951628825272 0.10052315442743655
0.10044422507403751
(65716, 86)
X train 65716
Y train 65716
21898 18825 3073
21898 18825 3073
21898 18883 3015
21898 18883 3015


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2628977766867542
0.25605708671099653
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.95     19980
           1       0.47      0.04      0.07      1918

    accuracy                           0.91     21898
   macro avg       0.69      0.52      0.51     21898
weighted avg       0.88      0.91      0.88     21898

Confusion_matrix
[[19901    79]
 [ 1847    71]]
done in 0.564627s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2628977766867542
0.2579563507090466
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19972
           1       0.43      0.03      0.06      1926

    accuracy                           0.91     21898
   macro avg       0.67      0.51      0.51     21898
weighted avg       0.87      0.91      0.88     21898

Confusion_matrix
[[19886    86]
 [ 1860    66]]
done in 0.547873s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2628977766867542
0.2555714900540008
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17178
           1       0.45      0.03      0.06      1647

    accuracy                           0.91     18825
   macro avg       0.68      0.51      0.51     18825
weighted avg       0.87      0.91      0.88     18825

Confusion_matrix
[[17112    66]
 [ 1592    55]]
done in 0.529280s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2628977766867542
0.26084811991730744
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17196
           1       0.44      0.03      0.06      1687

    accuracy                           0.91     18883
   macro avg       0.68      0.51      0.51     18883
weighted avg       0.87      0.91      0.87     18883

Confusion_matrix
[[17124    72]
 [ 1630    57]]
done in 0.579354s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2628977766867542
0.25903182054371543
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.95      2802
           1       0.55      0.06      0.11       271

    accuracy                           0.91      3073
   macro avg       0.73      0.53      0.53      3073
weighted avg       0.88      0.91      0.88      3073

Confusion_matrix
[[2789   13]
 [ 255   16]]
done in 0.607253s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2628977766867542
0.23984514740570043
Classification report
              precision    recall  f1-score   support

           0       0.92      0.99      0.96      2776
           1       0.39      0.04      0.07       239

    accuracy                           0.92      3015
   macro avg       0.66      0.52      0.51      3015
weighted avg       0.88      0.92      0.89      3015

Confusion_matrix
[[2762   14]
 [ 230    9]]
done in 0.556355s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19980
           1       0.67      0.00      0.00      1918

    accuracy                           0.91     21898
   macro avg       0.79      0.50      0.48     21898
weighted avg       0.89      0.91      0.87     21898

Confusion_matrix
[[19978     2]
 [ 1914     4]]
done in 18.950767s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19972
   

  _warn_prf(average, modifier, msg_start, len(result))


0.26792162888937726
0.2685942867524345
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19980
           1       0.40      0.00      0.00      1918

    accuracy                           0.91     21898
   macro avg       0.66      0.50      0.48     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19977     3]
 [ 1916     2]]
done in 0.630462s
0.26792162888937726
0.26931751906249957
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19972
           1       0.14      0.00      0.00      1926

    accuracy                           0.91     21898
   macro avg       0.53      0.50      0.48     21898
weighted avg       0.84      0.91      0.87     21898

Confusion_matrix
[[19966     6]
 [ 1925     1]]
done in 0.624156s
0.26792162888937726
0.2654946532081715
Classification report
              precision    rec

True positive rate of class 1 is  0.738
True positive rate of class 2 is  0.774
Positive prediction rate of class 1 is  0.384
Positive prediction rate of class 2 is  0.373
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.35800000000000004
threshold:0.2, J-value:0.20999999999999996
threshold:0.30000000000000004, J-value:0.10700000000000001
threshold:0.4, J-value:0.001
threshold:0.5, J-value:0.001
threshold:0.6000000000000001, J-value:0.001
threshold:0.7000000000000001, J-value:0.001
threshold:0.8, J-value:0.001
threshold:0.9, J-value:0.001
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6794684152348386
Balanced accuracy score of test is  0.6874879244233724
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.35100000000000003
threshold:0.2, J-value:0.21699999999999997
threshold:0.30000000000000004, J-value:0.11500000000000002
threshold:0.4, J-value:0.001
threshold:0.5, J-value:0.001
threshold:0.6000000000000001, J-value:0.001
threshold:0.7000000000000001, J-

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train ['Class'] = y_train


(8953, 87)
(56741, 87)
0.09624096975633648 0.09822707389772771
0.09820007346638912
(65710, 86)
X train 65710
Y train 65710
21898 18936 2962
21898 18936 2962
21898 18829 3069
21898 18829 3069


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.259968929643947
0.2601665091026239
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19950
           1       0.46      0.03      0.06      1948

    accuracy                           0.91     21898
   macro avg       0.69      0.52      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19872    78]
 [ 1881    67]]
done in 0.613166s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.259968929643947
0.2624529489116322
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19883
           1       0.50      0.03      0.05      2015

    accuracy                           0.91     21898
   macro avg       0.71      0.51      0.50     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19826    57]
 [ 1957    58]]
done in 0.573044s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.259968929643947
0.26291187565653557
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17239
           1       0.44      0.03      0.06      1697

    accuracy                           0.91     18936
   macro avg       0.68      0.51      0.50     18936
weighted avg       0.87      0.91      0.87     18936

Confusion_matrix
[[17174    65]
 [ 1646    51]]
done in 0.577484s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.259968929643947
0.2632969628273859
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17079
           1       0.53      0.03      0.05      1750

    accuracy                           0.91     18829
   macro avg       0.72      0.51      0.50     18829
weighted avg       0.87      0.91      0.87     18829

Confusion_matrix
[[17039    40]
 [ 1705    45]]
done in 0.714199s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.259968929643947
0.24261544189638803
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96      2711
           1       0.55      0.06      0.11       251

    accuracy                           0.92      2962
   macro avg       0.74      0.53      0.54      2962
weighted avg       0.89      0.92      0.88      2962

Confusion_matrix
[[2698   13]
 [ 235   16]]
done in 0.558590s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.259968929643947
0.2572747351547967
Classification report
              precision    recall  f1-score   support

           0       0.92      0.99      0.95      2804
           1       0.43      0.05      0.09       265

    accuracy                           0.91      3069
   macro avg       0.68      0.52      0.52      3069
weighted avg       0.88      0.91      0.88      3069

Confusion_matrix
[[2787   17]
 [ 252   13]]
done in 0.612691s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19950
           1       0.33      0.00      0.01      1948

    accuracy                           0.91     21898
   macro avg       0.62      0.50      0.48     21898
weighted avg       0.86      0.91      0.87     21898

Confusion_matrix
[[19938    12]
 [ 1942     6]]
done in 19.591834s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19883
     

  _warn_prf(average, modifier, msg_start, len(result))


0.26451451233769135
0.2812937742410675
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19883
           1       0.00      0.00      0.00      2015

    accuracy                           0.91     21898
   macro avg       0.45      0.50      0.48     21898
weighted avg       0.82      0.91      0.86     21898

Confusion_matrix
[[19878     5]
 [ 2015     0]]
done in 0.628619s
0.26451451233769135
0.271372572295425
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17239
           1       0.00      0.00      0.00      1697

    accuracy                           0.91     18936
   macro avg       0.46      0.50      0.48     18936
weighted avg       0.83      0.91      0.87     18936

Confusion_matrix
[[17239     0]
 [ 1697     0]]
done in 0.611576s


  _warn_prf(average, modifier, msg_start, len(result))


0.26451451233769135
0.27511831675186643
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17079
           1       0.00      0.00      0.00      1750

    accuracy                           0.91     18829
   macro avg       0.45      0.50      0.48     18829
weighted avg       0.82      0.91      0.86     18829

Confusion_matrix
[[17079     0]
 [ 1750     0]]
done in 0.754245s


  _warn_prf(average, modifier, msg_start, len(result))


0.26451451233769135
0.24997201422938214
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96      2711
           1       0.00      0.00      0.00       251

    accuracy                           0.92      2962
   macro avg       0.46      0.50      0.48      2962
weighted avg       0.84      0.92      0.87      2962

Confusion_matrix
[[2711    0]
 [ 251    0]]
done in 0.606412s


  _warn_prf(average, modifier, msg_start, len(result))


0.26451451233769135
0.31918158429781796
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95      2804
           1       0.00      0.00      0.00       265

    accuracy                           0.91      3069
   macro avg       0.46      0.50      0.48      3069
weighted avg       0.83      0.91      0.87      3069

Confusion_matrix
[[2799    5]
 [ 265    0]]
done in 0.578845s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19950
           1       0.46      0.02      0.04      1948

    accuracy                           0.91     21898
   macro avg       0.68      0.51      0.49     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19906    44]
 [ 1911    37]]
done in 33.556010s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19883
  

threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.3990000000000001
threshold:0.2, J-value:0.256
threshold:0.30000000000000004, J-value:0.129
threshold:0.4, J-value:0.043
threshold:0.5, J-value:0.013
threshold:0.6000000000000001, J-value:0.002
threshold:0.7000000000000001, J-value:0.0
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6995637914237232
Balanced accuracy score of test is  0.7175458750512325
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.502
threshold:0.2, J-value:0.285
threshold:0.30000000000000004, J-value:0.156
threshold:0.4, J-value:0.07100000000000001
threshold:0.5, J-value:0.041999999999999996
threshold:0.6000000000000001, J-value:0.012
threshold:0.7000000000000001, J-value:0.004
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.7507836599011553
Balanced accuracy score of test is  0.6971933625817566
True pos

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train ['Class'] = y_train


(9002, 87)
(56692, 87)
0.09566699123661149 0.09817139315047264
0.0981012658227848
(65714, 86)
X train 65714
Y train 65714
21898 18932 2966
21898 18932 2966
21898 18882 3016
21898 18882 3016


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25950205031724244
0.2629677218436161
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19908
           1       0.46      0.04      0.07      1990

    accuracy                           0.91     21898
   macro avg       0.69      0.52      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19822    86]
 [ 1916    74]]
done in 0.688490s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25950205031724244
0.2608569571200985
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19918
           1       0.44      0.03      0.06      1980

    accuracy                           0.91     21898
   macro avg       0.68      0.52      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19831    87]
 [ 1911    69]]
done in 0.769708s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25950205031724244
0.2631295050127756
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17204
           1       0.48      0.04      0.07      1728

    accuracy                           0.91     18932
   macro avg       0.69      0.52      0.51     18932
weighted avg       0.87      0.91      0.87     18932

Confusion_matrix
[[17137    67]
 [ 1667    61]]
done in 0.694592s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25950205031724244
0.26319439247262666
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17156
           1       0.41      0.03      0.05      1726

    accuracy                           0.91     18882
   macro avg       0.66      0.51      0.50     18882
weighted avg       0.87      0.91      0.87     18882

Confusion_matrix
[[17085    71]
 [ 1676    50]]
done in 0.693827s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25950205031724244
0.26193505867486067
Classification report
              precision    recall  f1-score   support

           0       0.92      0.99      0.95      2704
           1       0.41      0.05      0.09       262

    accuracy                           0.91      2966
   macro avg       0.66      0.52      0.52      2966
weighted avg       0.87      0.91      0.88      2966

Confusion_matrix
[[2685   19]
 [ 249   13]]
done in 0.678557s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25950205031724244
0.2462231857917041
Classification report
              precision    recall  f1-score   support

           0       0.92      0.99      0.96      2762
           1       0.54      0.07      0.13       254

    accuracy                           0.92      3016
   macro avg       0.73      0.53      0.54      3016
weighted avg       0.89      0.92      0.89      3016

Confusion_matrix
[[2746   16]
 [ 235   19]]
done in 0.665298s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19908
           1       0.44      0.00      0.01      1990

    accuracy                           0.91     21898
   macro avg       0.67      0.50      0.48     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19899     9]
 [ 1983     7]]
done in 20.794431s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19918
   

  _warn_prf(average, modifier, msg_start, len(result))


0.26488991426822145
0.2593327616459042
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96      2762
           1       0.00      0.00      0.00       254

    accuracy                           0.92      3016
   macro avg       0.46      0.50      0.48      3016
weighted avg       0.84      0.92      0.88      3016

Confusion_matrix
[[2760    2]
 [ 254    0]]
done in 0.577392s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19908
           1       0.51      0.02      0.04      1990

    accuracy                           0.91     21898
   macro avg       0.71      0.51      0.50     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19865    43]
 [ 1946    44]]
done in 33.302562s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19918
   

threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.702490425807963
Balanced accuracy score of test is  0.7109545855177947
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.406
threshold:0.2, J-value:0.27099999999999996
threshold:0.30000000000000004, J-value:0.14400000000000002
threshold:0.4, J-value:0.061000000000000006
threshold:0.5, J-value:0.017
threshold:0.6000000000000001, J-value:0.002
threshold:0.7000000000000001, J-value:0.0
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.7032687004314242
Balanced accuracy score of test is  0.7093143904466599
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.395
threshold:0.2, J-value:0.271
threshold:0.30000000000000004, J-value:0.152
threshold:0.4, J-value:0.068
threshold:0.5, J-value:0.034999999999999996
threshold:0.6000000000000001, J-value:0.008
threshold:0.7000000000000001, J-value:0.0
threshold:0.8, J-value

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train ['Class'] = y_train


0.09936984973339796 0.10088853459840959
0.10082404265632573
(65706, 86)
X train 65706
Y train 65706
21898 18875 3023
21898 18875 3023
21898 19009 2889
21898 19009 2889


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.263464598857356
0.25485150505345416
Classification report
              precision    recall  f1-score   support

           0       0.92      0.99      0.95     20006
           1       0.40      0.04      0.07      1892

    accuracy                           0.91     21898
   macro avg       0.66      0.52      0.51     21898
weighted avg       0.87      0.91      0.88     21898

Confusion_matrix
[[19904   102]
 [ 1825    67]]
done in 0.563977s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.263464598857356
0.25643792329866016
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.95     19975
           1       0.47      0.04      0.08      1923

    accuracy                           0.91     21898
   macro avg       0.69      0.52      0.52     21898
weighted avg       0.88      0.91      0.88     21898

Confusion_matrix
[[19880    95]
 [ 1839    84]]
done in 0.647398s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.263464598857356
0.2557917755637069
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.95     17241
           1       0.39      0.03      0.06      1634

    accuracy                           0.91     18875
   macro avg       0.65      0.51      0.51     18875
weighted avg       0.87      0.91      0.88     18875

Confusion_matrix
[[17162    79]
 [ 1583    51]]
done in 0.615009s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.263464598857356
0.25990447202063816
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17310
           1       0.46      0.04      0.07      1699

    accuracy                           0.91     19009
   macro avg       0.69      0.52      0.51     19009
weighted avg       0.87      0.91      0.87     19009

Confusion_matrix
[[17238    72]
 [ 1637    62]]
done in 0.695251s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.263464598857356
0.24898064634322484
Classification report
              precision    recall  f1-score   support

           0       0.92      0.99      0.95      2765
           1       0.41      0.06      0.11       258

    accuracy                           0.91      3023
   macro avg       0.66      0.53      0.53      3023
weighted avg       0.88      0.91      0.88      3023

Confusion_matrix
[[2742   23]
 [ 242   16]]
done in 0.769689s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.263464598857356
0.23362877665411882
Classification report
              precision    recall  f1-score   support

           0       0.93      0.99      0.96      2665
           1       0.49      0.10      0.16       224

    accuracy                           0.92      2889
   macro avg       0.71      0.54      0.56      2889
weighted avg       0.89      0.92      0.90      2889

Confusion_matrix
[[2642   23]
 [ 202   22]]
done in 0.771905s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     20006
           1       0.50      0.00      0.01      1892

    accuracy                           0.91     21898
   macro avg       0.71      0.50      0.48     21898
weighted avg       0.88      0.91      0.87     21898

Confusion_matrix
[[19999     7]
 [ 1885     7]]
done in 19.481995s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19975
    

threshold:0.2, J-value:0.266
threshold:0.30000000000000004, J-value:0.152
threshold:0.4, J-value:0.07100000000000001
threshold:0.5, J-value:0.030000000000000002
threshold:0.6000000000000001, J-value:0.012
threshold:0.7000000000000001, J-value:0.002
threshold:0.8, J-value:0.001
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.7015204106844057
Balanced accuracy score of test is  0.7060154105788763
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.399
threshold:0.2, J-value:0.263
threshold:0.30000000000000004, J-value:0.148
threshold:0.4, J-value:0.066
threshold:0.5, J-value:0.026
threshold:0.6000000000000001, J-value:0.01
threshold:0.7000000000000001, J-value:0.002
threshold:0.8, J-value:0.001
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6994874376832374
Balanced accuracy score of test is  0.703331538006691
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.42800000000000005
thr

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train ['Class'] = y_train


0.09865796155241205
(65731, 86)
X train 65731
Y train 65731
21898 18970 2928
21898 18970 2928
21898 18892 3006
21898 18892 3006


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2597885829565758
0.26394558318736044
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19911
           1       0.43      0.03      0.06      1987

    accuracy                           0.91     21898
   macro avg       0.67      0.52      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19820    91]
 [ 1918    69]]
done in 0.661306s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2597885829565758
0.2608880096839597
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19927
           1       0.44      0.03      0.06      1971

    accuracy                           0.91     21898
   macro avg       0.68      0.51      0.50     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19852    75]
 [ 1911    60]]
done in 0.670689s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2597885829565758
0.265881722628289
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17229
           1       0.42      0.03      0.06      1741

    accuracy                           0.91     18970
   macro avg       0.67      0.51      0.51     18970
weighted avg       0.87      0.91      0.87     18970

Confusion_matrix
[[17152    77]
 [ 1685    56]]
done in 0.615333s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2597885829565758
0.2605472600309407
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17198
           1       0.44      0.03      0.05      1694

    accuracy                           0.91     18892
   macro avg       0.68      0.51      0.50     18892
weighted avg       0.87      0.91      0.87     18892

Confusion_matrix
[[17143    55]
 [ 1651    43]]
done in 0.547099s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2597885829565758
0.25140167430948684
Classification report
              precision    recall  f1-score   support

           0       0.92      0.99      0.96      2682
           1       0.48      0.05      0.10       246

    accuracy                           0.92      2928
   macro avg       0.70      0.52      0.53      2928
weighted avg       0.88      0.92      0.88      2928

Confusion_matrix
[[2668   14]
 [ 233   13]]
done in 0.552607s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2597885829565758
0.26302954077006585
Classification report
              precision    recall  f1-score   support

           0       0.91      0.99      0.95      2729
           1       0.46      0.06      0.11       277

    accuracy                           0.91      3006
   macro avg       0.69      0.53      0.53      3006
weighted avg       0.87      0.91      0.87      3006

Confusion_matrix
[[2709   20]
 [ 260   17]]
done in 0.622625s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19911
           1       0.50      0.00      0.00      1987

    accuracy                           0.91     21898
   macro avg       0.70      0.50      0.48     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19907     4]
 [ 1983     4]]
done in 19.127515s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19927
   

  _warn_prf(average, modifier, msg_start, len(result))


0.2656621451580536
0.27092013518165314
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19911
           1       0.32      0.00      0.01      1987

    accuracy                           0.91     21898
   macro avg       0.61      0.50      0.48     21898
weighted avg       0.86      0.91      0.87     21898

Confusion_matrix
[[19898    13]
 [ 1981     6]]
done in 0.648963s
0.2656621451580536
0.26946899774235417
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19927
           1       0.32      0.00      0.01      1971

    accuracy                           0.91     21898
   macro avg       0.61      0.50      0.48     21898
weighted avg       0.86      0.91      0.87     21898

Confusion_matrix
[[19914    13]
 [ 1965     6]]
done in 0.652368s
0.2656621451580536
0.27309495825452984
Classification report
              precision    reca

True positive rate of class 1 is  0.738
True positive rate of class 2 is  0.751
Positive prediction rate of class 1 is  0.379
Positive prediction rate of class 2 is  0.39
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.346
threshold:0.2, J-value:0.20400000000000001
threshold:0.30000000000000004, J-value:0.10600000000000001
threshold:0.4, J-value:0.004
threshold:0.5, J-value:0.002
threshold:0.6000000000000001, J-value:0.002
threshold:0.7000000000000001, J-value:0.0
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.672955143594835
Balanced accuracy score of test is  0.6669901456908278
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.346
threshold:0.2, J-value:0.20899999999999996
threshold:0.30000000000000004, J-value:0.10700000000000001
threshold:0.4, J-value:0.003
threshold:0.5, J-value:0.001
threshold:0.6000000000000001, J-value:0.001
threshold:0.7000000000000001, J-value:0.0
threshold:0.8, J-value:0.0

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train ['Class'] = y_train


(8944, 87)
(56750, 87)
0.09594412449454724 0.09967833197690192
0.09962014459012376
(65724, 86)
X train 65724
Y train 65724
21898 18842 3056
21898 18842 3056
21898 18914 2984
21898 18914 2984


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26260772139083455
0.25769002306734395
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19944
           1       0.42      0.03      0.06      1954

    accuracy                           0.91     21898
   macro avg       0.67      0.51      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19860    84]
 [ 1893    61]]
done in 0.673687s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26260772139083455
0.25838909838441804
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19955
           1       0.47      0.04      0.07      1943

    accuracy                           0.91     21898
   macro avg       0.69      0.52      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19874    81]
 [ 1871    72]]
done in 0.730400s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26260772139083455
0.2584035388263612
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17147
           1       0.42      0.03      0.06      1695

    accuracy                           0.91     18842
   macro avg       0.67      0.51      0.50     18842
weighted avg       0.87      0.91      0.87     18842

Confusion_matrix
[[17078    69]
 [ 1644    51]]
done in 0.540645s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26260772139083455
0.2599797563647646
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17231
           1       0.43      0.03      0.06      1683

    accuracy                           0.91     18914
   macro avg       0.67      0.51      0.51     18914
weighted avg       0.87      0.91      0.87     18914

Confusion_matrix
[[17163    68]
 [ 1631    52]]
done in 0.530653s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26260772139083455
0.2532907874876963
Classification report
              precision    recall  f1-score   support

           0       0.92      0.99      0.95      2797
           1       0.40      0.04      0.07       259

    accuracy                           0.91      3056
   macro avg       0.66      0.52      0.51      3056
weighted avg       0.87      0.91      0.88      3056

Confusion_matrix
[[2782   15]
 [ 249   10]]
done in 0.529196s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.26260772139083455
0.24830675755322662
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96      2724
           1       0.61      0.08      0.14       260

    accuracy                           0.92      2984
   macro avg       0.76      0.54      0.55      2984
weighted avg       0.89      0.92      0.88      2984

Confusion_matrix
[[2711   13]
 [ 240   20]]
done in 0.519074s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19944
           1       0.43      0.00      0.00      1954

    accuracy                           0.91     21898
   macro avg       0.67      0.50      0.48     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19940     4]
 [ 1951     3]]
done in 19.385488s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19955
  

threshold:0.30000000000000004, J-value:0.147
threshold:0.4, J-value:0.07400000000000001
threshold:0.5, J-value:0.027
threshold:0.6000000000000001, J-value:0.012
threshold:0.7000000000000001, J-value:0.001
threshold:0.8, J-value:0.001
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.7060576933735853
Balanced accuracy score of test is  0.70496770074407
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.41500000000000004
threshold:0.2, J-value:0.27099999999999996
threshold:0.30000000000000004, J-value:0.148
threshold:0.4, J-value:0.076
threshold:0.5, J-value:0.026
threshold:0.6000000000000001, J-value:0.013000000000000001
threshold:0.7000000000000001, J-value:0.002
threshold:0.8, J-value:0.001
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.7074112227204876
Balanced accuracy score of test is  0.70107307391682
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.393
threshold:0.2, J-val

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train ['Class'] = y_train


(8973, 87)
(56721, 87)
0.09320175438596491 0.0980099887723102
0.097953216374269
(65733, 86)
X train 65733
Y train 65733
21898 18920 2978
21898 18920 2978
21898 18865 3033
21898 18865 3033


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25956929144131974
0.2633176829237143
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19893
           1       0.42      0.03      0.06      2005

    accuracy                           0.91     21898
   macro avg       0.67      0.51      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19803    90]
 [ 1939    66]]
done in 0.609400s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25956929144131974
0.26230421956300937
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19907
           1       0.49      0.03      0.06      1991

    accuracy                           0.91     21898
   macro avg       0.70      0.52      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19837    70]
 [ 1924    67]]
done in 0.622782s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25956929144131974
0.261926883185371
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17191
           1       0.42      0.03      0.06      1729

    accuracy                           0.91     18920
   macro avg       0.66      0.51      0.50     18920
weighted avg       0.87      0.91      0.87     18920

Confusion_matrix
[[17116    75]
 [ 1675    54]]
done in 0.663796s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25956929144131974
0.26440701415101514
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17135
           1       0.47      0.03      0.05      1730

    accuracy                           0.91     18865
   macro avg       0.69      0.51      0.50     18865
weighted avg       0.87      0.91      0.87     18865

Confusion_matrix
[[17082    53]
 [ 1683    47]]
done in 0.640148s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25956929144131974
0.2721537914023766
Classification report
              precision    recall  f1-score   support

           0       0.91      0.99      0.95      2702
           1       0.44      0.04      0.08       276

    accuracy                           0.91      2978
   macro avg       0.68      0.52      0.51      2978
weighted avg       0.87      0.91      0.87      2978

Confusion_matrix
[[2687   15]
 [ 264   12]]
done in 0.634659s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.25956929144131974
0.24922501748495804
Classification report
              precision    recall  f1-score   support

           0       0.92      0.99      0.96      2772
           1       0.54      0.08      0.13       261

    accuracy                           0.91      3033
   macro avg       0.73      0.54      0.54      3033
weighted avg       0.89      0.91      0.88      3033

Confusion_matrix
[[2755   17]
 [ 241   20]]
done in 0.645759s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19893
           1       0.63      0.01      0.01      2005

    accuracy                           0.91     21898
   macro avg       0.77      0.50      0.48     21898
weighted avg       0.88      0.91      0.87     21898

Confusion_matrix
[[19886     7]
 [ 1993    12]]
done in 17.746685s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19907
  

  _warn_prf(average, modifier, msg_start, len(result))


0.2651559464007411
0.2709021114332017
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19893
           1       0.08      0.00      0.00      2005

    accuracy                           0.91     21898
   macro avg       0.50      0.50      0.48     21898
weighted avg       0.83      0.91      0.86     21898

Confusion_matrix
[[19882    11]
 [ 2004     1]]
done in 0.609815s
0.2651559464007411
0.27202870881314756
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19907
           1       0.33      0.00      0.00      1991

    accuracy                           0.91     21898
   macro avg       0.62      0.50      0.48     21898
weighted avg       0.86      0.91      0.87     21898

Confusion_matrix
[[19903     4]
 [ 1989     2]]
done in 0.599353s
0.2651559464007411
0.2696438972228694
Classification report
              precision    recall

threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6922207382615132
Balanced accuracy score of test is  0.7041411653480618
True positive rate of class 1 is  0.734
True positive rate of class 2 is  0.77
Positive prediction rate of class 1 is  0.375
Positive prediction rate of class 2 is  0.397
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.37400000000000005
threshold:0.2, J-value:0.21000000000000002
threshold:0.30000000000000004, J-value:0.098
threshold:0.4, J-value:0.008
threshold:0.5, J-value:-0.001
threshold:0.6000000000000001, J-value:-0.001
threshold:0.7000000000000001, J-value:-0.001
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.687285719246347
Balanced accuracy score of test is  0.6872702163503284
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.38099999999999995
threshold:0.2, J-value:0.21500000000000002
threshold:0.30000000000000004, J-valu

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train ['Class'] = y_train


(9011, 87)
(56683, 87)
0.09316996239233288 0.09987193418193108
0.09984229042824215
(65749, 86)
X train 65749
Y train 65749
21898 18905 2993
21898 18905 2993
21898 18918 2980
21898 18918 2980


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2616645787273284
0.2622271780959184
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19931
           1       0.50      0.04      0.08      1967

    accuracy                           0.91     21898
   macro avg       0.71      0.52      0.52     21898
weighted avg       0.88      0.91      0.87     21898

Confusion_matrix
[[19847    84]
 [ 1882    85]]
done in 0.811297s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2616645787273284
0.2594175931917696
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19956
           1       0.44      0.03      0.06      1942

    accuracy                           0.91     21898
   macro avg       0.68      0.52      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19871    85]
 [ 1875    67]]
done in 1.181156s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2616645787273284
0.2606922108291378
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17217
           1       0.52      0.04      0.07      1688

    accuracy                           0.91     18905
   macro avg       0.72      0.52      0.51     18905
weighted avg       0.88      0.91      0.87     18905

Confusion_matrix
[[17154    63]
 [ 1620    68]]
done in 0.882652s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2616645787273284
0.2604733500510281
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17231
           1       0.42      0.03      0.05      1687

    accuracy                           0.91     18918
   macro avg       0.67      0.51      0.50     18918
weighted avg       0.87      0.91      0.87     18918

Confusion_matrix
[[17163    68]
 [ 1638    49]]
done in 0.826409s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2616645787273284
0.2719226529300268
Classification report
              precision    recall  f1-score   support

           0       0.91      0.99      0.95      2714
           1       0.45      0.06      0.11       279

    accuracy                           0.91      2993
   macro avg       0.68      0.53      0.53      2993
weighted avg       0.87      0.91      0.87      2993

Confusion_matrix
[[2693   21]
 [ 262   17]]
done in 0.798110s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2616645787273284
0.25271530853960406
Classification report
              precision    recall  f1-score   support

           0       0.92      0.99      0.96      2725
           1       0.51      0.07      0.12       255

    accuracy                           0.91      2980
   macro avg       0.72      0.53      0.54      2980
weighted avg       0.88      0.91      0.88      2980

Confusion_matrix
[[2708   17]
 [ 237   18]]
done in 0.801922s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19931
           1       0.47      0.00      0.01      1967

    accuracy                           0.91     21898
   macro avg       0.69      0.50      0.48     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19923     8]
 [ 1960     7]]
done in 24.858357s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19956
   

threshold:0.2, J-value:0.26
threshold:0.30000000000000004, J-value:0.154
threshold:0.4, J-value:0.07200000000000001
threshold:0.5, J-value:0.03899999999999999
threshold:0.6000000000000001, J-value:0.009999999999999998
threshold:0.7000000000000001, J-value:0.003
threshold:0.8, J-value:0.001
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6952195674976993
Balanced accuracy score of test is  0.7013302592170334
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.386
threshold:0.2, J-value:0.254
threshold:0.30000000000000004, J-value:0.155
threshold:0.4, J-value:0.07100000000000001
threshold:0.5, J-value:0.036000000000000004
threshold:0.6000000000000001, J-value:0.009999999999999998
threshold:0.7000000000000001, J-value:0.002
threshold:0.8, J-value:0.001
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6930072042484186
Balanced accuracy score of test is  0.7013550005354556
threshold:0.0,

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train ['Class'] = y_train


(8852, 87)
(56842, 87)
0.09297444128904803 0.09886328487472935
0.09877762686751451
(65741, 86)
X train 65741
Y train 65741
21898 18847 3051
21898 18847 3051
21898 18817 3081
21898 18817 3081


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2615714069393591
0.25524203471459056
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19973
           1       0.50      0.04      0.07      1925

    accuracy                           0.91     21898
   macro avg       0.71      0.52      0.51     21898
weighted avg       0.88      0.91      0.88     21898

Confusion_matrix
[[19901    72]
 [ 1852    73]]
done in 0.594829s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2615714069393591
0.26498524637218457
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19866
           1       0.44      0.04      0.07      2032

    accuracy                           0.91     21898
   macro avg       0.68      0.52      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19775    91]
 [ 1960    72]]
done in 0.553952s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2615714069393591
0.2551367281798623
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17189
           1       0.49      0.03      0.06      1658

    accuracy                           0.91     18847
   macro avg       0.70      0.51      0.51     18847
weighted avg       0.88      0.91      0.88     18847

Confusion_matrix
[[17131    58]
 [ 1603    55]]
done in 0.533497s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2615714069393591
0.2658462786043097
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     17067
           1       0.44      0.03      0.06      1750

    accuracy                           0.91     18817
   macro avg       0.67      0.51      0.50     18817
weighted avg       0.87      0.91      0.87     18817

Confusion_matrix
[[17000    67]
 [ 1698    52]]
done in 0.565340s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2615714069393591
0.2558925467631066
Classification report
              precision    recall  f1-score   support

           0       0.92      0.99      0.95      2784
           1       0.56      0.07      0.12       267

    accuracy                           0.91      3051
   macro avg       0.74      0.53      0.54      3051
weighted avg       0.89      0.91      0.88      3051

Confusion_matrix
[[2770   14]
 [ 249   18]]
done in 0.578154s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2615714069393591
0.2597265500035059
Classification report
              precision    recall  f1-score   support

           0       0.91      0.99      0.95      2799
           1       0.45      0.07      0.12       282

    accuracy                           0.91      3081
   macro avg       0.68      0.53      0.54      3081
weighted avg       0.87      0.91      0.88      3081

Confusion_matrix
[[2775   24]
 [ 262   20]]
done in 0.525899s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19973
           1       0.38      0.00      0.01      1925

    accuracy                           0.91     21898
   macro avg       0.65      0.50      0.48     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19965     8]
 [ 1920     5]]
done in 19.015445s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19866
    

threshold:0.30000000000000004, J-value:0.16
threshold:0.4, J-value:0.079
threshold:0.5, J-value:0.034
threshold:0.6000000000000001, J-value:0.012
threshold:0.7000000000000001, J-value:0.004
threshold:0.8, J-value:0.001
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.70201313331439
Balanced accuracy score of test is  0.7067917052123242
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.403
threshold:0.2, J-value:0.26
threshold:0.30000000000000004, J-value:0.155
threshold:0.4, J-value:0.076
threshold:0.5, J-value:0.030000000000000002
threshold:0.6000000000000001, J-value:0.013000000000000001
threshold:0.7000000000000001, J-value:0.004
threshold:0.8, J-value:0.001
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.7017581305855198
Balanced accuracy score of test is  0.7062684210966861
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.40700000000000003
threshold:0.2, J-value:0.28800000

In [9]:
def add_mean_sd(records, result_table, overall_records, type):
    records.append({
        'auroc': result_table["auroc"].mean(),
        'overall threshold': result_table["overall threshold"].mean(),
        'white threshold': result_table["white threshold"].mean(),
        'black threshold': result_table["black threshold"].mean(),
        'overall ba validation': result_table["overall ba validation"].mean(),
        'overall ba test': result_table["overall ba test"].mean(),
        'white ba validation': result_table["white ba validation"].mean(),
        'white ba test': result_table["white ba test"].mean(),
        'black ba validation': result_table["black ba validation"].mean(),
        'black ba test': result_table["black ba test"].mean(),
        'overall precision':result_table["overall precision"].mean(),
        'overall recall':result_table["overall recall"].mean(),
        'overall tpr':result_table["overall tpr"].mean(),
        'overall tnr':result_table["overall tnr"].mean(),
        'overall pd':result_table["overall pd"].mean(),
        'white precision':result_table["white precision"].mean(),
        'white recall':result_table["white recall"].mean(),
        'white tpr':result_table["white tpr"].mean(),
        'white tnr':result_table["white tnr"].mean(),
        'white pd':result_table["white pd"].mean(),
        'black precision':result_table["black precision"].mean(),
        'black recall':result_table["black recall"].mean(),
        'black tpr':result_table["black tpr"].mean(),
        'black tnr':result_table["black tnr"].mean(),
        'black pd':result_table["black pd"].mean(),
        'eod': result_table["eod"].mean(),
        'di': result_table["di"].mean(),
        })
    records.append({
        'auroc': result_table["auroc"].std(),
        'overall threshold': result_table["overall threshold"].std(),
        'white threshold': result_table["white threshold"].std(),
        'black threshold': result_table["black threshold"].std(),
        'overall ba validation': result_table["overall ba validation"].std(),
        'overall ba test': result_table["overall ba test"].std(),
        'white ba validation': result_table["white ba validation"].std(),
        'white ba test': result_table["white ba test"].std(),
        'black ba validation': result_table["black ba validation"].std(),
        'black ba test': result_table["black ba test"].std(),
        'overall precision':result_table["overall precision"].std(),
        'overall recall':result_table["overall recall"].std(),
        'overall tpr':result_table["overall tpr"].std(),
        'overall tnr':result_table["overall tnr"].std(),
        'overall pd':result_table["overall pd"].std(),
        'white precision':result_table["white precision"].std(),
        'white recall':result_table["white recall"].std(),
        'white tpr':result_table["white tpr"].std(),
        'white tnr':result_table["white tnr"].std(),
        'white pd':result_table["white pd"].std(),
        'black precision':result_table["black precision"].std(),
        'black recall':result_table["black recall"].std(),
        'black tpr':result_table["black tpr"].std(),
        'black tnr':result_table["black tnr"].std(),
        'black pd':result_table["black pd"].std(),
        'eod': result_table["eod"].std(),
        'di': result_table["di"].std(),
        })
    overall_records.append({
        'type': type,
        'auroc': result_table["auroc"].mean(),
        'overall threshold': result_table["overall threshold"].mean(),
        'white threshold': result_table["white threshold"].mean(),
        'black threshold': result_table["black threshold"].mean(),
        'overall ba test': result_table["overall ba test"].mean(),
        'white ba test': result_table["white ba test"].mean(),
        'black ba test': result_table["black ba test"].mean(),
        'overall tpr':result_table["overall tpr"].mean(),
        'overall pd':result_table["overall pd"].mean(),
        'white tpr':result_table["white tpr"].mean(),
        'white pd':result_table["white pd"].mean(),
        'black tpr':result_table["black tpr"].mean(),
        'black pd':result_table["black pd"].mean(),
        'eod': result_table["eod"].mean(),
        'di': result_table["di"].mean(),
        })
    pd_result = pd.DataFrame(records)
    return pd_result, overall_records

In [10]:
overall_table = []
result_lr, overall_records = add_mean_sd (records_lr, result_lr, overall_table, 'lr')
result_rf, overall_records = add_mean_sd (records_rf, result_rf, overall_records, 'rf')
result_dt, overall_records = add_mean_sd (records_dt, result_dt, overall_records, 'dt')
result_gbt, overall_records = add_mean_sd (records_gbt, result_gbt, overall_records, 'gbt')

result_path='/Users/lifuchen/Desktop/research/resample_data/'
result_lr.to_csv(path.join(result_path,'race-lr-resample-proportion-result.csv'), index=False)
result_rf.to_csv(path.join(result_path,'race-rf-resample-proportion-result.csv'), index=False)
result_dt.to_csv(path.join(result_path,'race-dt-resample-proportion-result.csv'), index=False)
result_gbt.to_csv(path.join(result_path,'race-gbt-resample-proportion-result.csv'), index=False)

overall_result = pd.DataFrame(overall_table)
result_path='/Users/lifuchen/Desktop/research/resample_result/'
overall_result.to_csv(path.join(result_path,'race-resample-proportion.csv'), index=False)