In [1]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import imblearn
from os import path
from sklearn.metrics import balanced_accuracy_score, roc_auc_score
import sklearn.preprocessing
from sklearn import preprocessing
from sklearn.preprocessing import Normalizer
import src.lib.utility_classfier as uclf
import src.lib.optimal_threhold_related as thres
import src.lib.fairness_tests as fair

In [2]:
data_path='/Users/lifuchen/Desktop/research/data.csv'
df = pd.read_csv(data_path)
df.shape

(109490, 89)

In [3]:
y = df.Class.values
X = df.drop(['GRID','Class'], axis=1)
X.shape

(109490, 87)

In [4]:
def save_prediction(classifier, characteristic, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_male_scaled, y_val_male, X_test_male_scaled, y_test_male, X_val_female_scaled, y_val_female, X_test_female_scaled, y_test_female):
    method_to_call = getattr(uclf, classifier)
    y_val_score = method_to_call(X_train_scaled, y_train,X_val_scaled, y_val)
    y_test_score = method_to_call(X_train_scaled, y_train,X_test_scaled, y_test)

    y_val_score_male = method_to_call(X_train_scaled, y_train, X_val_male_scaled, y_val_male)
    y_test_score_male = method_to_call(X_train_scaled, y_train,X_test_male_scaled, y_test_male)

    y_val_score_female = method_to_call(X_train_scaled, y_train, X_val_female_scaled, y_val_female)
    y_test_score_female = method_to_call(X_train_scaled, y_train,X_test_female_scaled, y_test_female)

    my_dict = dict(val_score = y_val_score, test_score = y_test_score, val_1_score = y_val_score_male, test_1_score = y_test_score_male, val_2_score = y_val_score_female, test_2_score = y_test_score_female)
    overall_prediction = pd.DataFrame.from_dict(my_dict, orient='index')
    overall_prediction = overall_prediction.transpose()

    result_path='/Users/lifuchen/Desktop/research/predictions/'
    filename = str(classifier) + str(characteristic) + "prediction.csv"
    overall_prediction.to_csv(path.join(result_path, filename), index=False)

In [5]:
def get_result (classifier, characteristic, records, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_male_scaled, y_val_male, X_test_male_scaled, y_test_male, X_val_female_scaled, y_val_female, X_test_female_scaled, y_test_female):
    result_path='/Users/lifuchen/Desktop/research/predictions/'
    filename = str(classifier) + characteristic + "prediction.csv"
    prediction = pd.read_csv(path.join(result_path, filename))

    y_val_score = prediction['val_score'][prediction['val_score'].notna()]
    y_test_score = prediction['test_score'][prediction['test_score'].notna()]

    y_val_score_male = prediction['val_1_score'][prediction['val_1_score'].notna()]
    y_test_score_male = prediction['test_1_score'][prediction['test_1_score'].notna()]

    y_val_score_female = prediction['val_2_score'][prediction['val_2_score'].notna()]
    y_test_score_female = prediction['test_2_score'][prediction['test_2_score'].notna()]

    threshold, ba_val, ba_test = balance_accuracy (y_val, y_val_score,y_test, y_test_score)
    auroc = roc_auc_score(y_test, y_test_score)
    precision, recall, tpr, tnr, pd_overall = thres.calculate_precision_metrics(y_test, y_test_score,threshold)

    threshold_male, ba_val_male, ba_test_male = balance_accuracy (y_val_male, y_val_score_male,y_test_male, y_test_score_male)
    precision_male, recall_male, tpr_male, tnr_male, pd_male = thres.calculate_precision_metrics(y_test_male, y_test_score_male,threshold_male)

    threshold_female, ba_val_female, ba_test_female = balance_accuracy (y_val_female, y_val_score_female, y_test_female, y_test_score_female)
    precision_female, recall_female, tpr_female, tnr_female, pd_female = thres.calculate_precision_metrics(y_test_female, y_test_score_female,threshold_female)

    eod = fair.get_EOD(y_test_male, y_test_score_male,threshold_male, y_test_female, y_test_score_female, threshold_female)
    sp = fair.get_SP(y_test_male, y_test_score_male,threshold_male, y_test_female, y_test_score_female, threshold_female)

    records.append({
        'auroc': auroc,
        'overall threshold': threshold,
        'male threshold': threshold_male,
        'female threshold': threshold_female,
        'overall ba validation': ba_val,
        'overall ba test': ba_test,
        'male ba validation': ba_val_male,
        'male ba test': ba_test_male,
        'female ba validation': ba_val_female,
        'female ba test': ba_test_female,
        'overall precision':precision,
        'overall recall':recall,
        'overall tpr':tpr,
        'overall tnr':tnr,
        'overall pd':pd_overall,
        'male precision':precision_male,
        'male recall':recall_male,
        'male tpr':tpr_male,
        'male tnr':tnr_male,
        'male pd':pd_male,
        'female precision':precision_female,
        'female recall':recall_female,
        'female tpr':tpr_female,
        'female tnr':tnr_female,
        'female pd':pd_female,
        'eod': eod,
        'di': sp,
        })

In [6]:
def balance_accuracy (y_val, y_val_score,y_test, y_test_score):
    
    threshold, _ = thres.get_optimal_threshold_Jvalue (y_val, y_val_score)
    print ("Optimal threshold by J value is ",threshold)

    ba_val = thres.calculate_balanced_accuracy(y_val, y_val_score, threshold)
    print ("Balanced accuracy score of val is ", ba_val)

    ba_test = thres.calculate_balanced_accuracy(y_test, y_test_score, threshold)
    print ("Balanced accuracy score of test is ",ba_test)

    return threshold, ba_val, ba_test

In [7]:
def fairness_metrics (X, y, attribute, random_state):
    X_train, y_train, X_val, y_val, X_test, y_test, X_val_female, X_val_male, y_val_female, y_val_male, X_test_female, X_test_male, y_test_female, y_test_male \
        = fair.split_by_trait_balance_proportion_no_protected_trait(X, y, attribute, random_state)
    print("X train", X_train.shape[0])
    print("Y train", y_train.shape[0])
    print(X_val.shape[0], X_val_male.shape[0], X_val_female.shape[0])
    print(y_val.shape[0], y_val_male.shape[0], y_val_female.shape[0])
    print(X_test.shape[0], X_test_male.shape[0], X_test_female.shape[0])
    print(y_test.shape[0], y_test_male.shape[0], y_test_female.shape[0])

    max_abs_scaler = preprocessing.MaxAbsScaler()
    X_train_scaled = max_abs_scaler.fit_transform(X_train)
    X_test_scaled = max_abs_scaler.transform(X_test)
    X_test_male_scaled = max_abs_scaler.transform(X_test_male)
    X_test_female_scaled = max_abs_scaler.transform(X_test_female)
    X_val_scaled = max_abs_scaler.transform(X_val)
    X_val_male_scaled = max_abs_scaler.transform(X_val_male)
    X_val_female_scaled = max_abs_scaler.transform(X_val_female)

    characteristic = attribute + "resample-by-proportion" + str(random_state)
    save_prediction ("logic_regression", characteristic, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_male_scaled, y_val_male, X_test_male_scaled, y_test_male, X_val_female_scaled, y_val_female, X_test_female_scaled, y_test_female)
    save_prediction ("random_forest", characteristic, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_male_scaled, y_val_male, X_test_male_scaled, y_test_male, X_val_female_scaled, y_val_female, X_test_female_scaled, y_test_female)
    save_prediction ("decision_tree", characteristic, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_male_scaled, y_val_male, X_test_male_scaled, y_test_male, X_val_female_scaled, y_val_female, X_test_female_scaled, y_test_female)
    save_prediction ("gradiant_boosting", characteristic, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_male_scaled, y_val_male, X_test_male_scaled, y_test_male, X_val_female_scaled, y_val_female, X_test_female_scaled, y_test_female)

    get_result ("logic_regression", characteristic, records_lr, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_male_scaled, y_val_male, X_test_male_scaled, y_test_male, X_val_female_scaled, y_val_female, X_test_female_scaled, y_test_female)
    get_result ("random_forest", characteristic, records_rf, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_male_scaled, y_val_male, X_test_male_scaled, y_test_male, X_val_female_scaled, y_val_female, X_test_female_scaled, y_test_female)
    get_result ("decision_tree", characteristic, records_dt, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_male_scaled, y_val_male, X_test_male_scaled, y_test_male, X_val_female_scaled, y_val_female, X_test_female_scaled, y_test_female)
    get_result ("gradiant_boosting", characteristic, records_gbt, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_male_scaled, y_val_male, X_test_male_scaled, y_test_male, X_val_female_scaled, y_val_female, X_test_female_scaled, y_test_female)

In [8]:
records_lr = []
records_rf = []
records_dt = []
records_gbt = []
for random_state in range(10):
    fairness_metrics (X, y, "GENDER", random_state)

result_lr = pd.DataFrame(records_lr)
result_rf = pd.DataFrame(records_rf)
result_dt = pd.DataFrame(records_dt)
result_gbt = pd.DataFrame(records_gbt)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train ['Class'] = y_train


(23364, 88)
(42330, 88)
0.12175917034760898 0.08538461538461538
0.12174358974358974
(67112, 87)
X train 67112
Y train 67112
21898 7782 14116
21898 7782 14116
21898 7707 14191
21898 7707 14191


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2900474063766968
0.26632413896430224
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19904
           1       0.44      0.04      0.07      1994

    accuracy                           0.91     21898
   macro avg       0.68      0.52      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19816    88]
 [ 1924    70]]
done in 0.660612s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2900474063766968
0.26813915239089947
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19934
           1       0.39      0.03      0.06      1964

    accuracy                           0.91     21898
   macro avg       0.65      0.51      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19836    98]
 [ 1902    62]]
done in 0.644881s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2900474063766968
0.30757232922912725
Classification report
              precision    recall  f1-score   support

           0       0.89      1.00      0.94      6915
           1       0.50      0.03      0.05       867

    accuracy                           0.89      7782
   macro avg       0.70      0.51      0.50      7782
weighted avg       0.85      0.89      0.84      7782

Confusion_matrix
[[6892   23]
 [ 844   23]]
done in 0.670199s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2900474063766968
0.30475578647059626
Classification report
              precision    recall  f1-score   support

           0       0.89      1.00      0.94      6860
           1       0.51      0.02      0.05       847

    accuracy                           0.89      7707
   macro avg       0.70      0.51      0.49      7707
weighted avg       0.85      0.89      0.84      7707

Confusion_matrix
[[6840   20]
 [ 826   21]]
done in 0.544270s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2900474063766968
0.2435844523221325
Classification report
              precision    recall  f1-score   support

           0       0.92      0.99      0.96     12989
           1       0.42      0.04      0.08      1127

    accuracy                           0.92     14116
   macro avg       0.67      0.52      0.52     14116
weighted avg       0.88      0.92      0.89     14116

Confusion_matrix
[[12924    65]
 [ 1080    47]]
done in 0.583437s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2900474063766968
0.24825299927609265
Classification report
              precision    recall  f1-score   support

           0       0.92      0.99      0.96     13074
           1       0.34      0.04      0.07      1117

    accuracy                           0.92     14191
   macro avg       0.63      0.52      0.51     14191
weighted avg       0.88      0.92      0.89     14191

Confusion_matrix
[[12996    78]
 [ 1076    41]]
done in 0.577790s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19904
           1       0.38      0.00      0.01      1994

    accuracy                           0.91     21898
   macro avg       0.65      0.50      0.48     21898
weighted avg       0.86      0.91      0.87     21898

Confusion_matrix
[[19891    13]
 [ 1986     8]]
done in 17.403412s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19934

threshold:0.2, J-value:0.28
threshold:0.30000000000000004, J-value:0.15
threshold:0.4, J-value:0.079
threshold:0.5, J-value:0.031000000000000003
threshold:0.6000000000000001, J-value:0.011
threshold:0.7000000000000001, J-value:0.003
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.7029095727697562
Balanced accuracy score of test is  0.6906734688831596
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.381
threshold:0.2, J-value:0.241
threshold:0.30000000000000004, J-value:0.10699999999999998
threshold:0.4, J-value:0.049999999999999996
threshold:0.5, J-value:0.024
threshold:0.6000000000000001, J-value:0.006
threshold:0.7000000000000001, J-value:0.0
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6907807859650177
Balanced accuracy score of test is  0.6872436932269956
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.418
threshol

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train ['Class'] = y_train


(23381, 88)
(42313, 88)
0.12489776280971855 0.0869274833671556
0.12489403786380333
(67172, 87)
X train 67172
Y train 67172
21898 7665 14233
21898 7665 14233
21898 7807 14091
21898 7807 14091


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.29384293083333485
0.2613568837094544
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19980
           1       0.42      0.03      0.06      1918

    accuracy                           0.91     21898
   macro avg       0.67      0.51      0.51     21898
weighted avg       0.87      0.91      0.88     21898

Confusion_matrix
[[19893    87]
 [ 1854    64]]
done in 0.595534s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.29384293083333485
0.2618675763395544
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19972
           1       0.43      0.03      0.06      1926

    accuracy                           0.91     21898
   macro avg       0.67      0.51      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19886    86]
 [ 1862    64]]
done in 0.606153s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.29384293083333485
0.29908473402159264
Classification report
              precision    recall  f1-score   support

           0       0.89      1.00      0.94      6836
           1       0.44      0.02      0.04       829

    accuracy                           0.89      7665
   macro avg       0.67      0.51      0.49      7665
weighted avg       0.84      0.89      0.84      7665

Confusion_matrix
[[6813   23]
 [ 811   18]]
done in 0.594872s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.29384293083333485
0.2983175136242497
Classification report
              precision    recall  f1-score   support

           0       0.90      1.00      0.94      6982
           1       0.36      0.02      0.04       825

    accuracy                           0.89      7807
   macro avg       0.63      0.51      0.49      7807
weighted avg       0.84      0.89      0.85      7807

Confusion_matrix
[[6953   29]
 [ 809   16]]
done in 0.613644s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.29384293083333485
0.24103903275445265
Classification report
              precision    recall  f1-score   support

           0       0.93      1.00      0.96     13144
           1       0.42      0.04      0.08      1089

    accuracy                           0.92     14233
   macro avg       0.67      0.52      0.52     14233
weighted avg       0.89      0.92      0.89     14233

Confusion_matrix
[[13080    64]
 [ 1043    46]]
done in 0.677310s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.29384293083333485
0.24167279524654345
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96     12990
           1       0.46      0.04      0.08      1101

    accuracy                           0.92     14091
   macro avg       0.69      0.52      0.52     14091
weighted avg       0.89      0.92      0.89     14091

Confusion_matrix
[[12933    57]
 [ 1053    48]]
done in 0.597819s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19980
           1       0.58      0.00      0.01      1918

    accuracy                           0.91     21898
   macro avg       0.75      0.50      0.48     21898
weighted avg       0.88      0.91      0.87     21898

Confusion_matrix
[[19975     5]
 [ 1911     7]]
done in 18.085458s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     1997

  _warn_prf(average, modifier, msg_start, len(result))


0.30658506233917926
0.2696422537016649
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19972
           1       0.00      0.00      0.00      1926

    accuracy                           0.91     21898
   macro avg       0.46      0.50      0.48     21898
weighted avg       0.83      0.91      0.87     21898

Confusion_matrix
[[19972     0]
 [ 1926     0]]
done in 0.636676s


  _warn_prf(average, modifier, msg_start, len(result))


0.30658506233917926
0.3067476162817166
Classification report
              precision    recall  f1-score   support

           0       0.89      1.00      0.94      6836
           1       0.00      0.00      0.00       829

    accuracy                           0.89      7665
   macro avg       0.45      0.50      0.47      7665
weighted avg       0.80      0.89      0.84      7665

Confusion_matrix
[[6836    0]
 [ 829    0]]
done in 0.571423s


  _warn_prf(average, modifier, msg_start, len(result))


0.30658506233917926
0.30759040710100444
Classification report
              precision    recall  f1-score   support

           0       0.89      1.00      0.94      6982
           1       0.00      0.00      0.00       825

    accuracy                           0.89      7807
   macro avg       0.45      0.50      0.47      7807
weighted avg       0.80      0.89      0.84      7807

Confusion_matrix
[[6982    0]
 [ 825    0]]
done in 0.558284s


  _warn_prf(average, modifier, msg_start, len(result))


0.30658506233917926
0.24867592756009804
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96     13144
           1       0.00      0.00      0.00      1089

    accuracy                           0.92     14233
   macro avg       0.46      0.50      0.48     14233
weighted avg       0.85      0.92      0.89     14233

Confusion_matrix
[[13144     0]
 [ 1089     0]]
done in 0.567335s


  _warn_prf(average, modifier, msg_start, len(result))


0.30658506233917926
0.24861739857508453
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96     12990
           1       0.00      0.00      0.00      1101

    accuracy                           0.92     14091
   macro avg       0.46      0.50      0.48     14091
weighted avg       0.85      0.92      0.88     14091

Confusion_matrix
[[12990     0]
 [ 1101     0]]
done in 0.586580s


  _warn_prf(average, modifier, msg_start, len(result))


Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19980
           1       0.43      0.01      0.02      1918

    accuracy                           0.91     21898
   macro avg       0.67      0.50      0.49     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19959    21]
 [ 1902    16]]
done in 32.332005s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19972
           1       0.42      0.01      0.02      1926

    accuracy                           0.91     21898
   macro avg       0.67      0.50      0.49     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19942    30]
 [ 1904    22]]
done in 31.979195s
Classification report
              precision    recall  f1-score   support

           0       0.89      1.00      0.94      6836
           1       0.33      0.00    

threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.40199999999999997
threshold:0.2, J-value:0.28400000000000003
threshold:0.30000000000000004, J-value:0.139
threshold:0.4, J-value:0.045
threshold:0.5, J-value:0.009000000000000001
threshold:0.6000000000000001, J-value:0.003
threshold:0.7000000000000001, J-value:0.001
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.7006200862160028
Balanced accuracy score of test is  0.7034520370941386
True positive rate of class 1 is  0.784
True positive rate of class 2 is  0.731
Positive prediction rate of class 1 is  0.441
Positive prediction rate of class 2 is  0.356


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train ['Class'] = y_train


(23370, 88)
(42324, 88)
0.12097083653108212 0.0856483262793382
0.12096960369372836
(67071, 87)
X train 67071
Y train 67071
21898 7743 14155
21898 7743 14155
21898 7740 14158
21898 7740 14158


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2893187733670113
0.26466851013847176
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19950
           1       0.41      0.03      0.06      1948

    accuracy                           0.91     21898
   macro avg       0.66      0.51      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19860    90]
 [ 1886    62]]
done in 0.701703s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2893187733670113
0.2673361408383091
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19883
           1       0.44      0.03      0.05      2015

    accuracy                           0.91     21898
   macro avg       0.67      0.51      0.50     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19809    74]
 [ 1957    58]]
done in 0.644388s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2893187733670113
0.30377746124965377
Classification report
              precision    recall  f1-score   support

           0       0.89      1.00      0.94      6895
           1       0.43      0.02      0.04       848

    accuracy                           0.89      7743
   macro avg       0.66      0.51      0.49      7743
weighted avg       0.84      0.89      0.84      7743

Confusion_matrix
[[6868   27]
 [ 828   20]]
done in 0.605811s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2893187733670113
0.3080505385005307
Classification report
              precision    recall  f1-score   support

           0       0.89      1.00      0.94      6860
           1       0.45      0.02      0.04       880

    accuracy                           0.89      7740
   macro avg       0.67      0.51      0.49      7740
weighted avg       0.84      0.89      0.84      7740

Confusion_matrix
[[6839   21]
 [ 863   17]]
done in 0.582326s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2893187733670113
0.24327531985561188
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96     13055
           1       0.40      0.04      0.07      1100

    accuracy                           0.92     14155
   macro avg       0.66      0.52      0.51     14155
weighted avg       0.88      0.92      0.89     14155

Confusion_matrix
[[12992    63]
 [ 1058    42]]
done in 0.594825s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2893187733670113
0.24507809323938298
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96     13023
           1       0.44      0.04      0.07      1135

    accuracy                           0.92     14158
   macro avg       0.68      0.52      0.51     14158
weighted avg       0.88      0.92      0.89     14158

Confusion_matrix
[[12970    53]
 [ 1094    41]]
done in 0.600231s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19950
           1       0.45      0.00      0.01      1948

    accuracy                           0.91     21898
   macro avg       0.68      0.50      0.48     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19944     6]
 [ 1943     5]]
done in 18.697857s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19883

  _warn_prf(average, modifier, msg_start, len(result))


0.3000629835927724
0.2759129739704951
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19883
           1       0.00      0.00      0.00      2015

    accuracy                           0.91     21898
   macro avg       0.45      0.50      0.48     21898
weighted avg       0.82      0.91      0.86     21898

Confusion_matrix
[[19883     0]
 [ 2015     0]]
done in 0.622639s


  _warn_prf(average, modifier, msg_start, len(result))


0.3000629835927724
0.3112362245271311
Classification report
              precision    recall  f1-score   support

           0       0.89      1.00      0.94      6895
           1       0.00      0.00      0.00       848

    accuracy                           0.89      7743
   macro avg       0.45      0.50      0.47      7743
weighted avg       0.79      0.89      0.84      7743

Confusion_matrix
[[6895    0]
 [ 848    0]]
done in 0.556140s


  _warn_prf(average, modifier, msg_start, len(result))


0.3000629835927724
0.31776697127669407
Classification report
              precision    recall  f1-score   support

           0       0.89      1.00      0.94      6860
           1       0.00      0.00      0.00       880

    accuracy                           0.89      7740
   macro avg       0.44      0.50      0.47      7740
weighted avg       0.79      0.89      0.83      7740

Confusion_matrix
[[6860    0]
 [ 880    0]]
done in 0.554997s


  _warn_prf(average, modifier, msg_start, len(result))


0.3000629835927724
0.24958001747988381
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96     13055
           1       0.00      0.00      0.00      1100

    accuracy                           0.92     14155
   macro avg       0.46      0.50      0.48     14155
weighted avg       0.85      0.92      0.89     14155

Confusion_matrix
[[13055     0]
 [ 1100     0]]
done in 0.699804s


  _warn_prf(average, modifier, msg_start, len(result))


0.3000629835927724
0.2530319216220009
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96     13023
           1       0.00      0.00      0.00      1135

    accuracy                           0.92     14158
   macro avg       0.46      0.50      0.48     14158
weighted avg       0.85      0.92      0.88     14158

Confusion_matrix
[[13023     0]
 [ 1135     0]]
done in 0.582101s


  _warn_prf(average, modifier, msg_start, len(result))


Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19950
           1       0.48      0.01      0.02      1948

    accuracy                           0.91     21898
   macro avg       0.69      0.50      0.49     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19928    22]
 [ 1928    20]]
done in 32.462374s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19883
           1       0.42      0.01      0.02      2015

    accuracy                           0.91     21898
   macro avg       0.67      0.50      0.48     21898
weighted avg       0.86      0.91      0.87     21898

Confusion_matrix
[[19857    26]
 [ 1996    19]]
done in 32.584423s
Classification report
              precision    recall  f1-score   support

           0       0.89      1.00      0.94      6895
           1       0.62      0.01    

threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.39599999999999996
threshold:0.2, J-value:0.313
threshold:0.30000000000000004, J-value:0.148
threshold:0.4, J-value:0.049
threshold:0.5, J-value:0.008
threshold:0.6000000000000001, J-value:0.003
threshold:0.7000000000000001, J-value:0.0
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6978933532954981
Balanced accuracy score of test is  0.7108439456995941
True positive rate of class 1 is  0.791
True positive rate of class 2 is  0.741
Positive prediction rate of class 1 is  0.435
Positive prediction rate of class 2 is  0.353


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train ['Class'] = y_train


(23345, 88)
(42349, 88)
0.12230181241286477 0.08478700786393094
0.1222879684418146
(67158, 87)
X train 67158
Y train 67158
21898 7751 14147
21898 7751 14147
21898 7757 14141
21898 7757 14141


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2911272230137969
0.26575357908367847
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19908
           1       0.46      0.04      0.07      1990

    accuracy                           0.91     21898
   macro avg       0.68      0.52      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19820    88]
 [ 1916    74]]
done in 1.486704s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2911272230137969
0.2648087868792824
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19918
           1       0.43      0.03      0.06      1980

    accuracy                           0.91     21898
   macro avg       0.67      0.51      0.50     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19840    78]
 [ 1921    59]]
done in 0.865046s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2911272230137969
0.3027345783222792
Classification report
              precision    recall  f1-score   support

           0       0.89      1.00      0.94      6908
           1       0.46      0.03      0.05       843

    accuracy                           0.89      7751
   macro avg       0.68      0.51      0.50      7751
weighted avg       0.85      0.89      0.85      7751

Confusion_matrix
[[6880   28]
 [ 819   24]]
done in 0.751957s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2911272230137969
0.30498106185217216
Classification report
              precision    recall  f1-score   support

           0       0.89      1.00      0.94      6894
           1       0.45      0.02      0.04       863

    accuracy                           0.89      7757
   macro avg       0.67      0.51      0.49      7757
weighted avg       0.84      0.89      0.84      7757

Confusion_matrix
[[6870   24]
 [ 843   20]]
done in 1.200068s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2911272230137969
0.2454920589664526
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96     13000
           1       0.45      0.04      0.08      1147

    accuracy                           0.92     14147
   macro avg       0.69      0.52      0.52     14147
weighted avg       0.88      0.92      0.89     14147

Confusion_matrix
[[12940    60]
 [ 1097    50]]
done in 0.894958s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2911272230137969
0.24277241484302564
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96     13024
           1       0.42      0.03      0.06      1117

    accuracy                           0.92     14141
   macro avg       0.67      0.52      0.51     14141
weighted avg       0.88      0.92      0.89     14141

Confusion_matrix
[[12970    54]
 [ 1078    39]]
done in 0.627408s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19908
           1       0.69      0.01      0.01      1990

    accuracy                           0.91     21898
   macro avg       0.80      0.50      0.48     21898
weighted avg       0.89      0.91      0.87     21898

Confusion_matrix
[[19903     5]
 [ 1979    11]]
done in 18.107810s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19918

  _warn_prf(average, modifier, msg_start, len(result))


0.302941220949911
0.2728952533315954
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19918
           1       0.33      0.00      0.00      1980

    accuracy                           0.91     21898
   macro avg       0.62      0.50      0.48     21898
weighted avg       0.86      0.91      0.87     21898

Confusion_matrix
[[19916     2]
 [ 1979     1]]
done in 0.595148s
0.302941220949911
0.30958774797380123
Classification report
              precision    recall  f1-score   support

           0       0.89      1.00      0.94      6908
           1       0.00      0.00      0.00       843

    accuracy                           0.89      7751
   macro avg       0.45      0.50      0.47      7751
weighted avg       0.79      0.89      0.84      7751

Confusion_matrix
[[6908    0]
 [ 843    0]]
done in 0.564240s


  _warn_prf(average, modifier, msg_start, len(result))


0.302941220949911
0.31146757757447097
Classification report
              precision    recall  f1-score   support

           0       0.89      1.00      0.94      6894
           1       0.00      0.00      0.00       863

    accuracy                           0.89      7757
   macro avg       0.44      0.50      0.47      7757
weighted avg       0.79      0.89      0.84      7757

Confusion_matrix
[[6894    0]
 [ 863    0]]
done in 0.575934s


  _warn_prf(average, modifier, msg_start, len(result))


0.302941220949911
0.25294718862601745
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96     13000
           1       0.00      0.00      0.00      1147

    accuracy                           0.92     14147
   macro avg       0.46      0.50      0.48     14147
weighted avg       0.84      0.92      0.88     14147

Confusion_matrix
[[13000     0]
 [ 1147     0]]
done in 0.573356s


  _warn_prf(average, modifier, msg_start, len(result))


0.302941220949911
0.2517365291146387
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96     13024
           1       0.33      0.00      0.00      1117

    accuracy                           0.92     14141
   macro avg       0.63      0.50      0.48     14141
weighted avg       0.87      0.92      0.88     14141

Confusion_matrix
[[13022     2]
 [ 1116     1]]
done in 0.578001s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19908
           1       0.54      0.02      0.03      1990

    accuracy                           0.91     21898
   macro avg       0.73      0.51      0.49     21898
weighted avg       0.88      0.91      0.87     21898

Confusion_matrix
[[19881    27]
 [ 1958    32]]
done in 32.363167s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19918
 

threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.377
threshold:0.2, J-value:0.289
threshold:0.30000000000000004, J-value:0.153
threshold:0.4, J-value:0.048
threshold:0.5, J-value:0.016
threshold:0.6000000000000001, J-value:0.002
threshold:0.7000000000000001, J-value:0.0
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6885803315014277
Balanced accuracy score of test is  0.7051011829185605
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.407
threshold:0.2, J-value:0.32299999999999995
threshold:0.30000000000000004, J-value:0.158
threshold:0.4, J-value:0.049
threshold:0.5, J-value:0.013999999999999999
threshold:0.6000000000000001, J-value:0.005
threshold:0.7000000000000001, J-value:0.001
threshold:0.8, J-value:0.001
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.7035697471665214
Balanced accuracy score of test is  0.7084750499869121
True positive rate 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train ['Class'] = y_train


(23252, 88)
(42442, 88)
0.12775244931613153 0.08639004786648578
0.12772928558630045
(67309, 87)
X train 67309
Y train 67309
21898 7842 14056
21898 7842 14056
21898 7759 14139
21898 7759 14139


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.29752136737961843
0.2592216992631809
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.95     20006
           1       0.42      0.03      0.06      1892

    accuracy                           0.91     21898
   macro avg       0.67      0.52      0.51     21898
weighted avg       0.87      0.91      0.88     21898

Confusion_matrix
[[19916    90]
 [ 1826    66]]
done in 0.570570s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.29752136737961843
0.26239128801752915
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19975
           1       0.43      0.04      0.07      1923

    accuracy                           0.91     21898
   macro avg       0.67      0.52      0.51     21898
weighted avg       0.87      0.91      0.88     21898

Confusion_matrix
[[19879    96]
 [ 1852    71]]
done in 0.577667s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.29752136737961843
0.2928411058415271
Classification report
              precision    recall  f1-score   support

           0       0.90      1.00      0.95      7042
           1       0.38      0.03      0.05       800

    accuracy                           0.90      7842
   macro avg       0.64      0.51      0.50      7842
weighted avg       0.85      0.90      0.85      7842

Confusion_matrix
[[7008   34]
 [ 779   21]]
done in 0.598521s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.29752136737961843
0.29441151762554985
Classification report
              precision    recall  f1-score   support

           0       0.90      1.00      0.94      6943
           1       0.52      0.03      0.06       816

    accuracy                           0.90      7759
   macro avg       0.71      0.52      0.50      7759
weighted avg       0.86      0.90      0.85      7759

Confusion_matrix
[[6917   26]
 [ 788   28]]
done in 0.625347s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.29752136737961843
0.24046505538246157
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96     12964
           1       0.45      0.04      0.08      1092

    accuracy                           0.92     14056
   macro avg       0.69      0.52      0.52     14056
weighted avg       0.89      0.92      0.89     14056

Confusion_matrix
[[12908    56]
 [ 1047    45]]
done in 1.069592s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.29752136737961843
0.24481968029925819
Classification report
              precision    recall  f1-score   support

           0       0.92      0.99      0.96     13032
           1       0.38      0.04      0.07      1107

    accuracy                           0.92     14139
   macro avg       0.65      0.52      0.51     14139
weighted avg       0.88      0.92      0.89     14139

Confusion_matrix
[[12962    70]
 [ 1064    43]]
done in 0.782944s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     20006
           1       0.38      0.00      0.00      1892

    accuracy                           0.91     21898
   macro avg       0.64      0.50      0.48     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[20001     5]
 [ 1889     3]]
done in 42.321842s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     1997

threshold:0.2, J-value:0.295
threshold:0.30000000000000004, J-value:0.16
threshold:0.4, J-value:0.075
threshold:0.5, J-value:0.031000000000000003
threshold:0.6000000000000001, J-value:0.009999999999999998
threshold:0.7000000000000001, J-value:0.002
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.699130562099869
Balanced accuracy score of test is  0.6961581331839006
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.369
threshold:0.2, J-value:0.272
threshold:0.30000000000000004, J-value:0.15
threshold:0.4, J-value:0.059
threshold:0.5, J-value:0.020999999999999998
threshold:0.6000000000000001, J-value:0.008
threshold:0.7000000000000001, J-value:0.0
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.684486119000284
Balanced accuracy score of test is  0.6839039284877138
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.413
threshold

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train ['Class'] = y_train


(23261, 88)
(42433, 88)
0.12388268831231579 0.08438322557563058
0.12386598860238686
(67239, 87)
X train 67239
Y train 67239
21898 7814 14084
21898 7814 14084
21898 7778 14120
21898 7778 14120


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.29281746550269033
0.2684413978458392
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19911
           1       0.39      0.03      0.05      1987

    accuracy                           0.91     21898
   macro avg       0.65      0.51      0.50     21898
weighted avg       0.86      0.91      0.87     21898

Confusion_matrix
[[19822    89]
 [ 1930    57]]
done in 0.757685s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.29281746550269033
0.2655069707345037
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19927
           1       0.38      0.03      0.05      1971

    accuracy                           0.91     21898
   macro avg       0.65      0.51      0.50     21898
weighted avg       0.86      0.91      0.87     21898

Confusion_matrix
[[19847    80]
 [ 1921    50]]
done in 0.656072s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.29281746550269033
0.29880350828427577
Classification report
              precision    recall  f1-score   support

           0       0.89      1.00      0.94      6979
           1       0.35      0.02      0.04       835

    accuracy                           0.89      7814
   macro avg       0.62      0.51      0.49      7814
weighted avg       0.84      0.89      0.85      7814

Confusion_matrix
[[6947   32]
 [ 818   17]]
done in 0.662179s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.29281746550269033
0.30433381114839797
Classification report
              precision    recall  f1-score   support

           0       0.89      1.00      0.94      6927
           1       0.52      0.02      0.04       851

    accuracy                           0.89      7778
   macro avg       0.70      0.51      0.49      7778
weighted avg       0.85      0.89      0.84      7778

Confusion_matrix
[[6911   16]
 [ 834   17]]
done in 0.702791s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.29281746550269033
0.2515960747156245
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96     12932
           1       0.41      0.03      0.06      1152

    accuracy                           0.92     14084
   macro avg       0.67      0.52      0.51     14084
weighted avg       0.88      0.92      0.88     14084

Confusion_matrix
[[12875    57]
 [ 1112    40]]
done in 0.677231s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.29281746550269033
0.2441192111920625
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96     13000
           1       0.34      0.03      0.05      1120

    accuracy                           0.92     14120
   macro avg       0.63      0.51      0.51     14120
weighted avg       0.88      0.92      0.89     14120

Confusion_matrix
[[12936    64]
 [ 1087    33]]
done in 0.624617s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19911
           1       0.33      0.00      0.00      1987

    accuracy                           0.91     21898
   macro avg       0.62      0.50      0.48     21898
weighted avg       0.86      0.91      0.87     21898

Confusion_matrix
[[19907     4]
 [ 1985     2]]
done in 19.632073s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19927

  _warn_prf(average, modifier, msg_start, len(result))


0.3054786536678037
0.2731869826446992
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19927
           1       1.00      0.00      0.00      1971

    accuracy                           0.91     21898
   macro avg       0.96      0.50      0.48     21898
weighted avg       0.92      0.91      0.87     21898

Confusion_matrix
[[19927     0]
 [ 1970     1]]
done in 0.601841s
0.3054786536678037
0.307866868236583
Classification report
              precision    recall  f1-score   support

           0       0.89      1.00      0.94      6979
           1       0.00      0.00      0.00       835

    accuracy                           0.89      7814
   macro avg       0.45      0.50      0.47      7814
weighted avg       0.80      0.89      0.84      7814

Confusion_matrix
[[6979    0]
 [ 835    0]]
done in 0.569481s


  _warn_prf(average, modifier, msg_start, len(result))


0.3054786536678037
0.3117481834809884
Classification report
              precision    recall  f1-score   support

           0       0.89      1.00      0.94      6927
           1       0.00      0.00      0.00       851

    accuracy                           0.89      7778
   macro avg       0.45      0.50      0.47      7778
weighted avg       0.79      0.89      0.84      7778

Confusion_matrix
[[6927    0]
 [ 851    0]]
done in 0.575021s


  _warn_prf(average, modifier, msg_start, len(result))


0.3054786536678037
0.2586900523241105
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96     12932
           1       0.00      0.00      0.00      1152

    accuracy                           0.92     14084
   macro avg       0.46      0.50      0.48     14084
weighted avg       0.84      0.92      0.88     14084

Confusion_matrix
[[12932     0]
 [ 1152     0]]
done in 0.640487s


  _warn_prf(average, modifier, msg_start, len(result))


0.3054786536678037
0.2519455506259558
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96     13000
           1       1.00      0.00      0.00      1120

    accuracy                           0.92     14120
   macro avg       0.96      0.50      0.48     14120
weighted avg       0.93      0.92      0.88     14120

Confusion_matrix
[[13000     0]
 [ 1119     1]]
done in 0.621642s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19911
           1       0.53      0.01      0.02      1987

    accuracy                           0.91     21898
   macro avg       0.72      0.51      0.49     21898
weighted avg       0.88      0.91      0.87     21898

Confusion_matrix
[[19889    22]
 [ 1962    25]]
done in 32.873380s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19927


Balanced accuracy score of val is  0.699258934265534
Balanced accuracy score of test is  0.7030921488496431
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.39
threshold:0.2, J-value:0.296
threshold:0.30000000000000004, J-value:0.149
threshold:0.4, J-value:0.044
threshold:0.5, J-value:0.012
threshold:0.6000000000000001, J-value:0.004
threshold:0.7000000000000001, J-value:0.0
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6951402539526192
Balanced accuracy score of test is  0.6928243795417615
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.39499999999999996
threshold:0.2, J-value:0.29700000000000004
threshold:0.30000000000000004, J-value:0.138
threshold:0.4, J-value:0.043
threshold:0.5, J-value:0.011
threshold:0.6000000000000001, J-value:0.004
threshold:0.7000000000000001, J-value:0.001
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train ['Class'] = y_train


(23328, 88)
(42366, 88)
0.12332065295902152 0.08630769230769231
0.12330769230769231
(67137, 87)
X train 67137
Y train 67137
21898 7644 14254
21898 7644 14254
21898 7881 14017
21898 7881 14017


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2931188218278894
0.26198865113994235
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19944
           1       0.44      0.03      0.06      1954

    accuracy                           0.91     21898
   macro avg       0.68      0.51      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19868    76]
 [ 1894    60]]
done in 0.709687s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2931188218278894
0.2634831313226838
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19955
           1       0.45      0.03      0.06      1943

    accuracy                           0.91     21898
   macro avg       0.68      0.51      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19877    78]
 [ 1880    63]]
done in 0.761163s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2931188218278894
0.3026873797485269
Classification report
              precision    recall  f1-score   support

           0       0.89      1.00      0.94      6814
           1       0.37      0.02      0.03       830

    accuracy                           0.89      7644
   macro avg       0.63      0.51      0.49      7644
weighted avg       0.84      0.89      0.84      7644

Confusion_matrix
[[6792   22]
 [ 817   13]]
done in 0.622228s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2931188218278894
0.29873830964435155
Classification report
              precision    recall  f1-score   support

           0       0.89      1.00      0.94      7022
           1       0.42      0.02      0.04       859

    accuracy                           0.89      7881
   macro avg       0.66      0.51      0.49      7881
weighted avg       0.84      0.89      0.84      7881

Confusion_matrix
[[6996   26]
 [ 840   19]]
done in 0.725680s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2931188218278894
0.24016312276306423
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96     13130
           1       0.47      0.04      0.08      1124

    accuracy                           0.92     14254
   macro avg       0.69      0.52      0.52     14254
weighted avg       0.89      0.92      0.89     14254

Confusion_matrix
[[13076    54]
 [ 1077    47]]
done in 0.841808s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2931188218278894
0.24366105382014666
Classification report
              precision    recall  f1-score   support

           0       0.93      1.00      0.96     12933
           1       0.46      0.04      0.07      1084

    accuracy                           0.92     14017
   macro avg       0.69      0.52      0.52     14017
weighted avg       0.89      0.92      0.89     14017

Confusion_matrix
[[12881    52]
 [ 1040    44]]
done in 0.815245s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19944
           1       0.45      0.00      0.01      1954

    accuracy                           0.91     21898
   macro avg       0.68      0.50      0.48     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19938     6]
 [ 1949     5]]
done in 18.953298s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19955

threshold:0.30000000000000004, J-value:0.154
threshold:0.4, J-value:0.065
threshold:0.5, J-value:0.027
threshold:0.6000000000000001, J-value:0.009999999999999998
threshold:0.7000000000000001, J-value:0.003
threshold:0.8, J-value:0.001
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.7017226791823656
Balanced accuracy score of test is  0.6920526537256434
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.365
threshold:0.2, J-value:0.253
threshold:0.30000000000000004, J-value:0.118
threshold:0.4, J-value:0.048999999999999995
threshold:0.5, J-value:0.013000000000000001
threshold:0.6000000000000001, J-value:0.005
threshold:0.7000000000000001, J-value:0.001
threshold:0.8, J-value:0.001
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6822286504397396
Balanced accuracy score of test is  0.696607436001073
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.426
threshold:0.2, J-value:0.324


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train ['Class'] = y_train


(23296, 88)
(42398, 88)
0.12005384874272802 0.085263777612819
0.1200501702203906
(67053, 87)
X train 67053
Y train 67053
21898 7795 14103
21898 7795 14103
21898 7762 14136
21898 7762 14136


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2896755097159808
0.2675567031128867
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19893
           1       0.42      0.03      0.06      2005

    accuracy                           0.91     21898
   macro avg       0.66      0.51      0.50     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19806    87]
 [ 1943    62]]
done in 0.873394s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2896755097159808
0.26570775704499433
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19907
           1       0.47      0.03      0.06      1991

    accuracy                           0.91     21898
   macro avg       0.69      0.51      0.50     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19839    68]
 [ 1931    60]]
done in 0.824732s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2896755097159808
0.30772431727222116
Classification report
              precision    recall  f1-score   support

           0       0.89      1.00      0.94      6922
           1       0.34      0.02      0.03       873

    accuracy                           0.89      7795
   macro avg       0.62      0.51      0.49      7795
weighted avg       0.83      0.89      0.84      7795

Confusion_matrix
[[6893   29]
 [ 858   15]]
done in 0.688888s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2896755097159808
0.3074825621617685
Classification report
              precision    recall  f1-score   support

           0       0.89      1.00      0.94      6882
           1       0.58      0.03      0.05       880

    accuracy                           0.89      7762
   macro avg       0.74      0.51      0.50      7762
weighted avg       0.85      0.89      0.84      7762

Confusion_matrix
[[6864   18]
 [ 855   25]]
done in 0.990580s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2896755097159808
0.2453552883520548
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96     12971
           1       0.45      0.04      0.08      1132

    accuracy                           0.92     14103
   macro avg       0.69      0.52      0.52     14103
weighted avg       0.88      0.92      0.89     14103

Confusion_matrix
[[12913    58]
 [ 1085    47]]
done in 0.674091s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2896755097159808
0.24276944087943103
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96     13025
           1       0.41      0.03      0.06      1111

    accuracy                           0.92     14136
   macro avg       0.67      0.51      0.51     14136
weighted avg       0.88      0.92      0.89     14136

Confusion_matrix
[[12975    50]
 [ 1076    35]]
done in 0.652389s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19893
           1       0.50      0.00      0.01      2005

    accuracy                           0.91     21898
   macro avg       0.70      0.50      0.48     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19885     8]
 [ 1997     8]]
done in 19.102137s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19907

threshold:0.30000000000000004, J-value:0.165
threshold:0.4, J-value:0.07300000000000001
threshold:0.5, J-value:0.027
threshold:0.6000000000000001, J-value:0.005
threshold:0.7000000000000001, J-value:0.003
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.7018502604896295
Balanced accuracy score of test is  0.6977655162300781
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.388
threshold:0.2, J-value:0.266
threshold:0.30000000000000004, J-value:0.131
threshold:0.4, J-value:0.051000000000000004
threshold:0.5, J-value:0.013000000000000001
threshold:0.6000000000000001, J-value:0.001
threshold:0.7000000000000001, J-value:0.002
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6938525934376607
Balanced accuracy score of test is  0.6958221050962987
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.409
threshold:0.2, J-value:0.314
thre

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train ['Class'] = y_train


(23200, 88)
(42494, 88)
0.12305160228482913 0.08621967741110913
0.12302855243986606
(67134, 87)
X train 67134
Y train 67134
21898 7804 14094
21898 7804 14094
21898 7849 14049
21898 7849 14049


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.29196351864161635
0.26569594508936734
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19931
           1       0.51      0.03      0.06      1967

    accuracy                           0.91     21898
   macro avg       0.71      0.52      0.51     21898
weighted avg       0.88      0.91      0.87     21898

Confusion_matrix
[[19866    65]
 [ 1899    68]]
done in 0.762245s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.29196351864161635
0.2626298436035843
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19956
           1       0.42      0.03      0.06      1942

    accuracy                           0.91     21898
   macro avg       0.67      0.51      0.50     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19876    80]
 [ 1883    59]]
done in 0.646569s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.29196351864161635
0.3068925974834796
Classification report
              precision    recall  f1-score   support

           0       0.89      1.00      0.94      6937
           1       0.53      0.03      0.06       867

    accuracy                           0.89      7804
   macro avg       0.71      0.51      0.50      7804
weighted avg       0.85      0.89      0.84      7804

Confusion_matrix
[[6913   24]
 [ 840   27]]
done in 0.610850s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.29196351864161635
0.2976828937985205
Classification report
              precision    recall  f1-score   support

           0       0.90      1.00      0.94      7008
           1       0.54      0.03      0.06       841

    accuracy                           0.89      7849
   macro avg       0.72      0.51      0.50      7849
weighted avg       0.86      0.89      0.85      7849

Confusion_matrix
[[6985   23]
 [ 814   27]]
done in 0.581048s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.29196351864161635
0.242884913779331
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96     12994
           1       0.50      0.04      0.07      1100

    accuracy                           0.92     14094
   macro avg       0.71      0.52      0.51     14094
weighted avg       0.89      0.92      0.89     14094

Confusion_matrix
[[12953    41]
 [ 1059    41]]
done in 0.587410s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.29196351864161635
0.24304614433815233
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96     12948
           1       0.36      0.03      0.05      1101

    accuracy                           0.92     14049
   macro avg       0.64      0.51      0.51     14049
weighted avg       0.88      0.92      0.89     14049

Confusion_matrix
[[12891    57]
 [ 1069    32]]
done in 0.675523s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19931
           1       0.37      0.00      0.01      1967

    accuracy                           0.91     21898
   macro avg       0.64      0.50      0.48     21898
weighted avg       0.86      0.91      0.87     21898

Confusion_matrix
[[19919    12]
 [ 1960     7]]
done in 18.039911s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     1995

threshold:0.30000000000000004, J-value:0.151
threshold:0.4, J-value:0.069
threshold:0.5, J-value:0.032
threshold:0.6000000000000001, J-value:0.009000000000000001
threshold:0.7000000000000001, J-value:0.002
threshold:0.8, J-value:0.001
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6938424983580236
Balanced accuracy score of test is  0.6981046768389942
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.367
threshold:0.2, J-value:0.271
threshold:0.30000000000000004, J-value:0.14600000000000002
threshold:0.4, J-value:0.061
threshold:0.5, J-value:0.028
threshold:0.6000000000000001, J-value:0.009000000000000001
threshold:0.7000000000000001, J-value:0.003
threshold:0.8, J-value:0.001
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6830570537706386
Balanced accuracy score of test is  0.6898843143083631
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.39599999999999996
threshold:0.2, 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train ['Class'] = y_train


(23317, 88)
(42377, 88)
0.12501206214416674 0.08378302345208563
0.12500959054755634
(67306, 87)
X train 67306
Y train 67306
21898 7696 14202
21898 7696 14202
21898 7840 14058
21898 7840 14058


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.29494459237783427
0.2605526667929227
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19973
           1       0.43      0.03      0.06      1925

    accuracy                           0.91     21898
   macro avg       0.67      0.51      0.51     21898
weighted avg       0.87      0.91      0.88     21898

Confusion_matrix
[[19884    89]
 [ 1859    66]]
done in 0.609032s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.29494459237783427
0.2689075301841586
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19866
           1       0.48      0.04      0.07      2032

    accuracy                           0.91     21898
   macro avg       0.70      0.52      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19787    79]
 [ 1959    73]]
done in 0.605621s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.29494459237783427
0.2882783214318874
Classification report
              precision    recall  f1-score   support

           0       0.90      1.00      0.95      6911
           1       0.48      0.03      0.05       785

    accuracy                           0.90      7696
   macro avg       0.69      0.51      0.50      7696
weighted avg       0.86      0.90      0.85      7696

Confusion_matrix
[[6888   23]
 [ 764   21]]
done in 0.583947s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.29494459237783427
0.30604269132342016
Classification report
              precision    recall  f1-score   support

           0       0.89      1.00      0.94      6966
           1       0.40      0.02      0.04       874

    accuracy                           0.89      7840
   macro avg       0.64      0.51      0.49      7840
weighted avg       0.84      0.89      0.84      7840

Confusion_matrix
[[6937   29]
 [ 855   19]]
done in 0.605736s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.29494459237783427
0.2455282590967199
Classification report
              precision    recall  f1-score   support

           0       0.92      0.99      0.96     13062
           1       0.41      0.04      0.07      1140

    accuracy                           0.92     14202
   macro avg       0.66      0.52      0.51     14202
weighted avg       0.88      0.92      0.89     14202

Confusion_matrix
[[12996    66]
 [ 1095    45]]
done in 0.629856s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.29494459237783427
0.24819763807064243
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96     12900
           1       0.52      0.05      0.09      1158

    accuracy                           0.92     14058
   macro avg       0.72      0.52      0.52     14058
weighted avg       0.89      0.92      0.89     14058

Confusion_matrix
[[12850    50]
 [ 1104    54]]
done in 1.153846s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19973
           1       0.39      0.00      0.01      1925

    accuracy                           0.91     21898
   macro avg       0.65      0.50      0.48     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19962    11]
 [ 1918     7]]
done in 19.752912s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     1986

threshold:0.30000000000000004, J-value:0.173
threshold:0.4, J-value:0.079
threshold:0.5, J-value:0.030000000000000002
threshold:0.6000000000000001, J-value:0.009999999999999998
threshold:0.7000000000000001, J-value:0.003
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6994431053350594
Balanced accuracy score of test is  0.7041274967478959
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.394
threshold:0.2, J-value:0.276
threshold:0.30000000000000004, J-value:0.158
threshold:0.4, J-value:0.069
threshold:0.5, J-value:0.024
threshold:0.6000000000000001, J-value:0.009000000000000001
threshold:0.7000000000000001, J-value:0.0
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6968231205306412
Balanced accuracy score of test is  0.6965309108445007
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.39699999999999996
threshold:0.2, J-val

In [9]:
def add_mean_sd(records, result_table, overall_records, type):
    records.append({
        'auroc': result_table["auroc"].mean(),
        'overall threshold': result_table["overall threshold"].mean(),
        'male threshold': result_table["male threshold"].mean(),
        'female threshold': result_table["female threshold"].mean(),
        'overall ba validation': result_table["overall ba validation"].mean(),
        'overall ba test': result_table["overall ba test"].mean(),
        'male ba validation': result_table["male ba validation"].mean(),
        'male ba test': result_table["male ba test"].mean(),
        'female ba validation': result_table["female ba validation"].mean(),
        'female ba test': result_table["female ba test"].mean(),
        'overall precision':result_table["overall precision"].mean(),
        'overall recall':result_table["overall recall"].mean(),
        'overall tpr':result_table["overall tpr"].mean(),
        'overall tnr':result_table["overall tnr"].mean(),
        'overall pd':result_table["overall pd"].mean(),
        'male precision':result_table["male precision"].mean(),
        'male recall':result_table["male recall"].mean(),
        'male tpr':result_table["male tpr"].mean(),
        'male tnr':result_table["male tnr"].mean(),
        'male pd':result_table["male pd"].mean(),
        'female precision':result_table["female precision"].mean(),
        'female recall':result_table["female recall"].mean(),
        'female tpr':result_table["female tpr"].mean(),
        'female tnr':result_table["female tnr"].mean(),
        'female pd':result_table["female pd"].mean(),
        'eod': result_table["eod"].mean(),
        'di': result_table["di"].mean(),
        })
    records.append({
        'auroc': result_table["auroc"].std(),
        'overall threshold': result_table["overall threshold"].std(),
        'male threshold': result_table["male threshold"].std(),
        'female threshold': result_table["female threshold"].std(),
        'overall ba validation': result_table["overall ba validation"].std(),
        'overall ba test': result_table["overall ba test"].std(),
        'male ba validation': result_table["male ba validation"].std(),
        'male ba test': result_table["male ba test"].std(),
        'female ba validation': result_table["female ba validation"].std(),
        'female ba test': result_table["female ba test"].std(),
        'overall precision':result_table["overall precision"].std(),
        'overall recall':result_table["overall recall"].std(),
        'overall tpr':result_table["overall tpr"].std(),
        'overall tnr':result_table["overall tnr"].std(),
        'overall pd':result_table["overall pd"].std(),
        'male precision':result_table["male precision"].std(),
        'male recall':result_table["male recall"].std(),
        'male tpr':result_table["male tpr"].std(),
        'male tnr':result_table["male tnr"].std(),
        'male pd':result_table["male pd"].std(),
        'female precision':result_table["female precision"].std(),
        'female recall':result_table["female recall"].std(),
        'female tpr':result_table["female tpr"].std(),
        'female tnr':result_table["female tnr"].std(),
        'female pd':result_table["female pd"].std(),
        'eod': result_table["eod"].std(),
        'di': result_table["di"].std(),
        })
    overall_records.append({
        'type': type,
        'auroc': result_table["auroc"].mean(),
        'overall threshold': result_table["overall threshold"].mean(),
        'male threshold': result_table["male threshold"].mean(),
        'female threshold': result_table["female threshold"].mean(),
        'overall ba test': result_table["overall ba test"].mean(),
        'male ba test': result_table["male ba test"].mean(),
        'female ba test': result_table["female ba test"].mean(),
        'overall tpr':result_table["overall tpr"].mean(),
        'overall pd':result_table["overall pd"].mean(),
        'male tpr':result_table["male tpr"].mean(),
        'male pd':result_table["male pd"].mean(),
        'female tpr':result_table["female tpr"].mean(),
        'female pd':result_table["female pd"].mean(),
        'eod': result_table["eod"].mean(),
        'di': result_table["di"].mean(),
        })
    pd_result = pd.DataFrame(records)
    return pd_result, overall_records

In [10]:
overall_table = []
result_lr, overall_records = add_mean_sd (records_lr, result_lr, overall_table, 'lr')
result_rf, overall_records = add_mean_sd (records_rf, result_rf, overall_records, 'rf')
result_dt, overall_records = add_mean_sd (records_dt, result_dt, overall_records, 'dt')
result_gbt, overall_records = add_mean_sd (records_gbt, result_gbt, overall_records, 'gbt')

result_path='/Users/lifuchen/Desktop/research/resample_data/'
result_lr.to_csv(path.join(result_path,'gender-lr-resample-proportion-result.csv'), index=False)
result_rf.to_csv(path.join(result_path,'gender-rf-resample-proportion-result.csv'), index=False)
result_dt.to_csv(path.join(result_path,'gender-dt-resample-proportion-result.csv'), index=False)
result_gbt.to_csv(path.join(result_path,'gender-gbt-resample-proportion-result.csv'), index=False)

overall_result = pd.DataFrame(overall_table)
result_path='/Users/lifuchen/Desktop/research/resample_result/'
overall_result.to_csv(path.join(result_path,'gender-resample-proportion.csv'), index=False)