In [1]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import imblearn
from os import path
from sklearn.metrics import balanced_accuracy_score, roc_auc_score
import sklearn.preprocessing
from sklearn import preprocessing
from sklearn.preprocessing import Normalizer
import src.lib.utility_classfier as uclf
import src.lib.optimal_threhold_related as thres
import src.lib.fairness_tests as fair

In [2]:
data_path='/Users/lifuchen/Desktop/research/data.csv'
df = pd.read_csv(data_path)
df.shape

(109490, 89)

In [3]:
y = df.Class.values
X = df.drop(['GRID','Class'], axis=1)
# copy this row to include the gender information
X.loc[:,'gender_copy'] = X['GENDER']
X.shape

(109490, 88)

In [4]:
def save_prediction(classifier, characteristic, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_male_scaled, y_val_male, X_test_male_scaled, y_test_male, X_val_female_scaled, y_val_female, X_test_female_scaled, y_test_female):
    method_to_call = getattr(uclf, classifier)
    y_val_score = method_to_call(X_train_scaled, y_train,X_val_scaled, y_val)
    y_test_score = method_to_call(X_train_scaled, y_train,X_test_scaled, y_test)

    y_val_score_male = method_to_call(X_train_scaled, y_train, X_val_male_scaled, y_val_male)
    y_test_score_male = method_to_call(X_train_scaled, y_train,X_test_male_scaled, y_test_male)

    y_val_score_female = method_to_call(X_train_scaled, y_train, X_val_female_scaled, y_val_female)
    y_test_score_female = method_to_call(X_train_scaled, y_train,X_test_female_scaled, y_test_female)

    my_dict = dict(val_score = y_val_score, test_score = y_test_score, val_1_score = y_val_score_male, test_1_score = y_test_score_male, val_2_score = y_val_score_female, test_2_score = y_test_score_female)
    overall_prediction = pd.DataFrame.from_dict(my_dict, orient='index')
    overall_prediction = overall_prediction.transpose()

    result_path='/Users/lifuchen/Desktop/research/predictions/'
    filename = str(classifier) + str(characteristic) + "prediction.csv"
    overall_prediction.to_csv(path.join(result_path, filename), index=False)

In [5]:
def get_result (classifier,characteristic, records, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_male_scaled, y_val_male, X_test_male_scaled, y_test_male, X_val_female_scaled, y_val_female, X_test_female_scaled, y_test_female):
    result_path='/Users/lifuchen/Desktop/research/predictions/'
    filename = str(classifier) + characteristic + "prediction.csv"
    prediction = pd.read_csv(path.join(result_path, filename))
    
    y_val_score = prediction['val_score'][prediction['val_score'].notna()]
    y_test_score = prediction['test_score'][prediction['test_score'].notna()]
    
    y_val_score_male = prediction['val_1_score'][prediction['val_1_score'].notna()]
    y_test_score_male = prediction['test_1_score'][prediction['test_1_score'].notna()]
    
    y_val_score_female = prediction['val_2_score'][prediction['val_2_score'].notna()]
    y_test_score_female = prediction['test_2_score'][prediction['test_2_score'].notna()]
    
    threshold, ba_val, ba_test = balance_accuracy (y_val, y_val_score,y_test, y_test_score)
    auroc = roc_auc_score(y_test, y_test_score)
    precision, recall, tpr, tnr, pd_overall = thres.calculate_precision_metrics(y_test, y_test_score,threshold)
    
    threshold_male, ba_val_male, ba_test_male = balance_accuracy (y_val_male, y_val_score_male,y_test_male, y_test_score_male)
    precision_male, recall_male, tpr_male, tnr_male, pd_male = thres.calculate_precision_metrics(y_test_male, y_test_score_male,threshold_male)
    
    threshold_female, ba_val_female, ba_test_female = balance_accuracy (y_val_female, y_val_score_female, y_test_female, y_test_score_female)
    precision_female, recall_female, tpr_female, tnr_female, pd_female = thres.calculate_precision_metrics(y_test_female, y_test_score_female,threshold_female)

    eod = fair.get_EOD(y_test_male, y_test_score_male,threshold_male, y_test_female, y_test_score_female, threshold_female)
    sp = fair.get_SP(y_test_male, y_test_score_male,threshold_male, y_test_female, y_test_score_female, threshold_female)

    records.append({
        'auroc': auroc,
        'overall threshold': threshold,
        'male threshold': threshold_male,
        'female threshold': threshold_female,
        'overall ba validation': ba_val,
        'overall ba test': ba_test,
        'male ba validation': ba_val_male,
        'male ba test': ba_test_male,
        'female ba validation': ba_val_female,
        'female ba test': ba_test_female,
        'overall precision':precision,
        'overall recall':recall,
        'overall tpr':tpr,
        'overall tnr':tnr,
        'overall pd':pd_overall,
        'male precision':precision_male,
        'male recall':recall_male,
        'male tpr':tpr_male,
        'male tnr':tnr_male,
        'male pd':pd_male,
        'female precision':precision_female,
        'female recall':recall_female,
        'female tpr':tpr_female,
        'female tnr':tnr_female,
        'female pd':pd_female,
        'eod': eod,
        'di': sp,
        })

In [6]:
def balance_accuracy (y_val, y_val_score,y_test, y_test_score):
    
    threshold, _ = thres.get_optimal_threshold_Jvalue (y_val, y_val_score)
    print ("Optimal threshold by J value is ",threshold)

    ba_val = thres.calculate_balanced_accuracy(y_val, y_val_score, threshold)
    print ("Balanced accuracy score of val is ", ba_val)

    ba_test = thres.calculate_balanced_accuracy(y_test, y_test_score, threshold)
    print ("Balanced accuracy score of test is ",ba_test)

    return threshold, ba_val, ba_test

In [7]:
def fairness_metrics (X, y, attribute, random_state):
    # call this split method that resamples by size, and drop the "attribute"
    # a copy of attribute is included in the data, so we still have them in out model
    X_train, y_train, X_val, y_val, X_test, y_test, X_val_female, X_val_male, y_val_female, y_val_male, X_test_female, X_test_male, y_test_female, y_test_male \
        = fair.split_by_trait_balance_size(X, y, attribute, random_state)
    
    print("X train", X_train.shape[0])
    print("Y train", y_train.shape[0])
    print(X_val.shape[0], X_val_male.shape[0], X_val_female.shape[0])
    print(y_val.shape[0], y_val_male.shape[0], y_val_female.shape[0])
    print(X_test.shape[0], X_test_male.shape[0], X_test_female.shape[0])
    print(y_test.shape[0], y_test_male.shape[0], y_test_female.shape[0])

    max_abs_scaler = preprocessing.MaxAbsScaler()
    X_train_scaled = max_abs_scaler.fit_transform(X_train)
    X_test_scaled = max_abs_scaler.transform(X_test)
    X_test_male_scaled = max_abs_scaler.transform(X_test_male)
    X_test_female_scaled = max_abs_scaler.transform(X_test_female)
    X_val_scaled = max_abs_scaler.transform(X_val)
    X_val_male_scaled = max_abs_scaler.transform(X_val_male)
    X_val_female_scaled = max_abs_scaler.transform(X_val_female)

    characteristic = attribute + "resample-by-size" + str(random_state)
    save_prediction ("logic_regression", characteristic, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_male_scaled, y_val_male, X_test_male_scaled, y_test_male, X_val_female_scaled, y_val_female, X_test_female_scaled, y_test_female)
    save_prediction ("random_forest", characteristic, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_male_scaled, y_val_male, X_test_male_scaled, y_test_male, X_val_female_scaled, y_val_female, X_test_female_scaled, y_test_female)
    save_prediction ("decision_tree", characteristic, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_male_scaled, y_val_male, X_test_male_scaled, y_test_male, X_val_female_scaled, y_val_female, X_test_female_scaled, y_test_female)
    save_prediction ("gradiant_boosting", characteristic, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_male_scaled, y_val_male, X_test_male_scaled, y_test_male, X_val_female_scaled, y_val_female, X_test_female_scaled, y_test_female)

    get_result ("logic_regression", characteristic, records_lr, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_male_scaled, y_val_male, X_test_male_scaled, y_test_male, X_val_female_scaled, y_val_female, X_test_female_scaled, y_test_female)
    get_result ("random_forest", characteristic, records_rf, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_male_scaled, y_val_male, X_test_male_scaled, y_test_male, X_val_female_scaled, y_val_female, X_test_female_scaled, y_test_female)
    get_result ("decision_tree", characteristic, records_dt, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_male_scaled, y_val_male, X_test_male_scaled, y_test_male, X_val_female_scaled, y_val_female, X_test_female_scaled, y_test_female)
    get_result ("gradiant_boosting", characteristic, records_gbt, X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test, X_val_male_scaled, y_val_male, X_test_male_scaled, y_test_male, X_val_female_scaled, y_val_female, X_test_female_scaled, y_test_female)

In [8]:
records_lr = []
records_rf = []
records_dt = []
records_gbt = []
for random_state in range(10):
    fairness_metrics (X, y, "GENDER", random_state)

result_lr = pd.DataFrame(records_lr)
result_rf = pd.DataFrame(records_rf)
result_dt = pd.DataFrame(records_dt)
result_gbt = pd.DataFrame(records_gbt)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train ['Class'] = y_train


(42330,)
(42330,)
(84660, 88)
X train 84660
Y train 84660
21898 7782 14116
21898 7782 14116
21898 7707 14191
21898 7707 14191


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2211159646901643
0.2638848476879455
Classification report
              precision    recall  f1-score   support

           0       0.91      0.99      0.95     19904
           1       0.45      0.04      0.08      1994

    accuracy                           0.91     21898
   macro avg       0.68      0.52      0.52     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19797   107]
 [ 1907    87]]
done in 0.664993s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2211159646901643
0.26339766107062257
Classification report
              precision    recall  f1-score   support

           0       0.91      0.99      0.95     19934
           1       0.40      0.04      0.08      1964

    accuracy                           0.91     21898
   macro avg       0.66      0.52      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19809   125]
 [ 1880    84]]
done in 0.655637s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2211159646901643
0.31004486375897117
Classification report
              precision    recall  f1-score   support

           0       0.89      0.99      0.94      6915
           1       0.45      0.04      0.07       867

    accuracy                           0.89      7782
   macro avg       0.67      0.52      0.50      7782
weighted avg       0.84      0.89      0.84      7782

Confusion_matrix
[[6877   38]
 [ 836   31]]
done in 0.638693s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2211159646901643
0.30438388697817514
Classification report
              precision    recall  f1-score   support

           0       0.89      1.00      0.94      6860
           1       0.49      0.04      0.07       847

    accuracy                           0.89      7707
   macro avg       0.69      0.52      0.50      7707
weighted avg       0.85      0.89      0.85      7707

Confusion_matrix
[[6829   31]
 [ 817   30]]
done in 0.816236s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2211159646901643
0.23843732395142514
Classification report
              precision    recall  f1-score   support

           0       0.92      0.99      0.96     12989
           1       0.45      0.05      0.09      1127

    accuracy                           0.92     14116
   macro avg       0.69      0.52      0.52     14116
weighted avg       0.89      0.92      0.89     14116

Confusion_matrix
[[12920    69]
 [ 1071    56]]
done in 0.717617s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2211159646901643
0.24113842330940016
Classification report
              precision    recall  f1-score   support

           0       0.92      0.99      0.96     13074
           1       0.36      0.05      0.09      1117

    accuracy                           0.92     14191
   macro avg       0.64      0.52      0.52     14191
weighted avg       0.88      0.92      0.89     14191

Confusion_matrix
[[12980    94]
 [ 1063    54]]
done in 0.658634s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19904
           1       0.40      0.00      0.00      1994

    accuracy                           0.91     21898
   macro avg       0.65      0.50      0.48     21898
weighted avg       0.86      0.91      0.87     21898

Confusion_matrix
[[19898     6]
 [ 1990     4]]
done in 26.518023s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19934

  _warn_prf(average, modifier, msg_start, len(result))


0.2279434818472589
0.3189396784660774
Classification report
              precision    recall  f1-score   support

           0       0.89      1.00      0.94      6860
           1       0.00      0.00      0.00       847

    accuracy                           0.89      7707
   macro avg       0.45      0.50      0.47      7707
weighted avg       0.79      0.89      0.84      7707

Confusion_matrix
[[6859    1]
 [ 847    0]]
done in 0.830851s
0.2279434818472589
0.24534045517840228
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96     12989
           1       0.00      0.00      0.00      1127

    accuracy                           0.92     14116
   macro avg       0.46      0.50      0.48     14116
weighted avg       0.85      0.92      0.88     14116

Confusion_matrix
[[12989     0]
 [ 1127     0]]
done in 0.843870s


  _warn_prf(average, modifier, msg_start, len(result))


0.2279434818472589
0.2480698469431118
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96     13074
           1       0.00      0.00      0.00      1117

    accuracy                           0.92     14191
   macro avg       0.46      0.50      0.48     14191
weighted avg       0.85      0.92      0.88     14191

Confusion_matrix
[[13074     0]
 [ 1117     0]]
done in 0.844304s


  _warn_prf(average, modifier, msg_start, len(result))


Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19904
           1       0.44      0.02      0.05      1994

    accuracy                           0.91     21898
   macro avg       0.67      0.51      0.50     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19842    62]
 [ 1946    48]]
done in 46.861980s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19934
           1       0.46      0.03      0.06      1964

    accuracy                           0.91     21898
   macro avg       0.69      0.52      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19855    79]
 [ 1897    67]]
done in 46.710815s
Classification report
              precision    recall  f1-score   support

           0       0.89      1.00      0.94      6915
           1       0.37      0.02    

threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.42500000000000004
threshold:0.2, J-value:0.264
threshold:0.30000000000000004, J-value:0.147
threshold:0.4, J-value:0.07
threshold:0.5, J-value:0.024
threshold:0.6000000000000001, J-value:0.003
threshold:0.7000000000000001, J-value:0.0
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.7127408264299537
Balanced accuracy score of test is  0.6910936972092883
True positive rate of class 1 is  0.642
True positive rate of class 2 is  0.592
Positive prediction rate of class 1 is  0.304
Positive prediction rate of class 2 is  0.24


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train ['Class'] = y_train


(42313,)
(42313,)
(84626, 88)
X train 84626
Y train 84626
21898 7665 14233
21898 7665 14233
21898 7807 14091
21898 7807 14091


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.22395623753595287
0.2574016970759688
Classification report
              precision    recall  f1-score   support

           0       0.92      0.99      0.95     19980
           1       0.45      0.05      0.08      1918

    accuracy                           0.91     21898
   macro avg       0.68      0.52      0.52     21898
weighted avg       0.87      0.91      0.88     21898

Confusion_matrix
[[19873   107]
 [ 1831    87]]
done in 0.628504s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.22395623753595287
0.25934955502292095
Classification report
              precision    recall  f1-score   support

           0       0.92      0.99      0.95     19972
           1       0.45      0.04      0.08      1926

    accuracy                           0.91     21898
   macro avg       0.68      0.52      0.52     21898
weighted avg       0.87      0.91      0.88     21898

Confusion_matrix
[[19867   105]
 [ 1840    86]]
done in 0.619321s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.22395623753595287
0.2993283026023977
Classification report
              precision    recall  f1-score   support

           0       0.90      0.99      0.94      6836
           1       0.51      0.04      0.08       829

    accuracy                           0.89      7665
   macro avg       0.70      0.52      0.51      7665
weighted avg       0.85      0.89      0.85      7665

Confusion_matrix
[[6801   35]
 [ 792   37]]
done in 0.611958s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.22395623753595287
0.3000982378133813
Classification report
              precision    recall  f1-score   support

           0       0.90      0.99      0.94      6982
           1       0.40      0.04      0.06       825

    accuracy                           0.89      7807
   macro avg       0.65      0.51      0.50      7807
weighted avg       0.84      0.89      0.85      7807

Confusion_matrix
[[6939   43]
 [ 796   29]]
done in 0.613082s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.22395623753595287
0.2348226602348195
Classification report
              precision    recall  f1-score   support

           0       0.93      0.99      0.96     13144
           1       0.41      0.05      0.08      1089

    accuracy                           0.92     14233
   macro avg       0.67      0.52      0.52     14233
weighted avg       0.89      0.92      0.89     14233

Confusion_matrix
[[13072    72]
 [ 1039    50]]
done in 0.615082s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.22395623753595287
0.23677309014852424
Classification report
              precision    recall  f1-score   support

           0       0.93      1.00      0.96     12990
           1       0.48      0.05      0.09      1101

    accuracy                           0.92     14091
   macro avg       0.70      0.52      0.53     14091
weighted avg       0.89      0.92      0.89     14091

Confusion_matrix
[[12928    62]
 [ 1044    57]]
done in 0.620571s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19980
           1       0.00      0.00      0.00      1918

    accuracy                           0.91     21898
   macro avg       0.46      0.50      0.48     21898
weighted avg       0.83      0.91      0.87     21898

Confusion_matrix
[[19980     0]
 [ 1918     0]]
done in 25.982166s


  _warn_prf(average, modifier, msg_start, len(result))


Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19972
           1       0.50      0.00      0.00      1926

    accuracy                           0.91     21898
   macro avg       0.71      0.50      0.48     21898
weighted avg       0.88      0.91      0.87     21898

Confusion_matrix
[[19970     2]
 [ 1924     2]]
done in 25.817233s
Classification report
              precision    recall  f1-score   support

           0       0.89      1.00      0.94      6836
           1       0.00      0.00      0.00       829

    accuracy                           0.89      7665
   macro avg       0.45      0.50      0.47      7665
weighted avg       0.80      0.89      0.84      7665

Confusion_matrix
[[6834    2]
 [ 829    0]]
done in 25.500187s
Classification report
              precision    recall  f1-score   support

           0       0.89      1.00      0.94      6982
           1       1.00      0.00      0.

threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.389
threshold:0.2, J-value:0.25
threshold:0.30000000000000004, J-value:0.14
threshold:0.4, J-value:0.08
threshold:0.5, J-value:0.041
threshold:0.6000000000000001, J-value:0.019000000000000003
threshold:0.7000000000000001, J-value:0.007
threshold:0.8, J-value:0.001
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6942469080222912
Balanced accuracy score of test is  0.6943471502916727
True positive rate of class 1 is  0.602
True positive rate of class 2 is  0.589
Positive prediction rate of class 1 is  0.287
Positive prediction rate of class 2 is  0.231
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.39199999999999996
threshold:0.2, J-value:0.28700000000000003
threshold:0.30000000000000004, J-value:0.09699999999999999
threshold:0.4, J-value:0.016
threshold:0.5, J-value:0.0
threshold:0.6000000000000001, J-value:0.0
threshold:0.7000000000000001, J-value:0.0
threshold:0.8, J-value:0.0

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train ['Class'] = y_train


(42324,)
(42324,)
(84648, 88)
X train 84648
Y train 84648
21898 7743 14155
21898 7743 14155
21898 7740 14158
21898 7740 14158


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.22135260162616016
0.2620542652277454
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19950
           1       0.46      0.04      0.07      1948

    accuracy                           0.91     21898
   macro avg       0.69      0.52      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19861    89]
 [ 1871    77]]
done in 0.632555s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.22135260162616016
0.2643760709996185
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19883
           1       0.45      0.03      0.06      2015

    accuracy                           0.91     21898
   macro avg       0.68      0.51      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19801    82]
 [ 1947    68]]
done in 0.643028s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.22135260162616016
0.30552468199496025
Classification report
              precision    recall  f1-score   support

           0       0.89      1.00      0.94      6895
           1       0.45      0.03      0.06       848

    accuracy                           0.89      7743
   macro avg       0.67      0.51      0.50      7743
weighted avg       0.84      0.89      0.84      7743

Confusion_matrix
[[6865   30]
 [ 823   25]]
done in 0.628009s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.22135260162616016
0.30985204808104294
Classification report
              precision    recall  f1-score   support

           0       0.89      1.00      0.94      6860
           1       0.43      0.03      0.05       880

    accuracy                           0.89      7740
   macro avg       0.66      0.51      0.49      7740
weighted avg       0.84      0.89      0.84      7740

Confusion_matrix
[[6829   31]
 [ 857   23]]
done in 0.624664s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.22135260162616016
0.23827528698482442
Classification report
              precision    recall  f1-score   support

           0       0.93      1.00      0.96     13055
           1       0.47      0.05      0.09      1100

    accuracy                           0.92     14155
   macro avg       0.70      0.52      0.52     14155
weighted avg       0.89      0.92      0.89     14155

Confusion_matrix
[[12996    59]
 [ 1048    52]]
done in 0.635705s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.22135260162616016
0.23951492799847254
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96     13023
           1       0.47      0.04      0.07      1135

    accuracy                           0.92     14158
   macro avg       0.70      0.52      0.52     14158
weighted avg       0.89      0.92      0.89     14158

Confusion_matrix
[[12972    51]
 [ 1090    45]]
done in 0.636587s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19950
           1       0.50      0.00      0.00      1948

    accuracy                           0.91     21898
   macro avg       0.71      0.50      0.48     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19947     3]
 [ 1945     3]]
done in 25.198119s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     1988

threshold:0.30000000000000004, J-value:0.133
threshold:0.4, J-value:0.076
threshold:0.5, J-value:0.036000000000000004
threshold:0.6000000000000001, J-value:0.016
threshold:0.7000000000000001, J-value:0.003
threshold:0.8, J-value:0.002
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6888008265015721
Balanced accuracy score of test is  0.696947452772416
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.373
threshold:0.2, J-value:0.22799999999999998
threshold:0.30000000000000004, J-value:0.11499999999999999
threshold:0.4, J-value:0.064
threshold:0.5, J-value:0.025
threshold:0.6000000000000001, J-value:0.013
threshold:0.7000000000000001, J-value:0.003
threshold:0.8, J-value:0.004
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6867343029540137
Balanced accuracy score of test is  0.6937417174662073
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.375
threshold:0.2, J-value:0.243
th

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train ['Class'] = y_train


(42349,)
(42349,)
(84698, 88)
X train 84698
Y train 84698
21898 7751 14147
21898 7751 14147
21898 7757 14141
21898 7757 14141


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.22053116933271968
0.2649419071406302
Classification report
              precision    recall  f1-score   support

           0       0.91      0.99      0.95     19908
           1       0.43      0.04      0.08      1990

    accuracy                           0.91     21898
   macro avg       0.67      0.52      0.52     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19795   113]
 [ 1903    87]]
done in 0.619980s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.22053116933271968
0.2626009795074523
Classification report
              precision    recall  f1-score   support

           0       0.91      0.99      0.95     19918
           1       0.44      0.04      0.07      1980

    accuracy                           0.91     21898
   macro avg       0.67      0.52      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19818   100]
 [ 1903    77]]
done in 0.631037s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.22053116933271968
0.3047093388682754
Classification report
              precision    recall  f1-score   support

           0       0.89      0.99      0.94      6908
           1       0.44      0.04      0.07       843

    accuracy                           0.89      7751
   macro avg       0.67      0.52      0.51      7751
weighted avg       0.84      0.89      0.85      7751

Confusion_matrix
[[6867   41]
 [ 811   32]]
done in 0.607563s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.22053116933271968
0.30597312232704654
Classification report
              precision    recall  f1-score   support

           0       0.89      0.99      0.94      6894
           1       0.42      0.04      0.07       863

    accuracy                           0.89      7757
   macro avg       0.66      0.52      0.51      7757
weighted avg       0.84      0.89      0.84      7757

Confusion_matrix
[[6849   45]
 [ 830   33]]
done in 0.614925s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.22053116933271968
0.24315372849349806
Classification report
              precision    recall  f1-score   support

           0       0.92      0.99      0.96     13000
           1       0.43      0.05      0.09      1147

    accuracy                           0.92     14147
   macro avg       0.68      0.52      0.52     14147
weighted avg       0.88      0.92      0.89     14147

Confusion_matrix
[[12928    72]
 [ 1092    55]]
done in 0.623975s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.22053116933271968
0.23880933027107643
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96     13024
           1       0.44      0.04      0.07      1117

    accuracy                           0.92     14141
   macro avg       0.68      0.52      0.52     14141
weighted avg       0.89      0.92      0.89     14141

Confusion_matrix
[[12969    55]
 [ 1073    44]]
done in 0.617126s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19908
           1       0.42      0.00      0.00      1990

    accuracy                           0.91     21898
   macro avg       0.66      0.50      0.48     21898
weighted avg       0.86      0.91      0.87     21898

Confusion_matrix
[[19901     7]
 [ 1985     5]]
done in 25.476221s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     1991

threshold:0.30000000000000004, J-value:0.145
threshold:0.4, J-value:0.078
threshold:0.5, J-value:0.038
threshold:0.6000000000000001, J-value:0.019000000000000003
threshold:0.7000000000000001, J-value:0.005
threshold:0.8, J-value:0.001
threshold:0.9, J-value:0.001
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6896155228624536
Balanced accuracy score of test is  0.6938086558932025
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.36
threshold:0.2, J-value:0.24100000000000002
threshold:0.30000000000000004, J-value:0.131
threshold:0.4, J-value:0.077
threshold:0.5, J-value:0.032
threshold:0.6000000000000001, J-value:0.018000000000000002
threshold:0.7000000000000001, J-value:0.004
threshold:0.8, J-value:0.002
threshold:0.9, J-value:0.002
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6801370460504128
Balanced accuracy score of test is  0.6869588851003492
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.38499999999999995
threshold:0.

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train ['Class'] = y_train


(42442,)
(42442,)
(84884, 88)
X train 84884
Y train 84884
21898 7842 14056
21898 7842 14056
21898 7759 14139
21898 7759 14139


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2252408452125645
0.2557495334973233
Classification report
              precision    recall  f1-score   support

           0       0.92      0.99      0.95     20006
           1       0.41      0.04      0.08      1892

    accuracy                           0.91     21898
   macro avg       0.66      0.52      0.52     21898
weighted avg       0.87      0.91      0.88     21898

Confusion_matrix
[[19889   117]
 [ 1810    82]]
done in 0.638149s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2252408452125645
0.25692384737393564
Classification report
              precision    recall  f1-score   support

           0       0.92      0.99      0.95     19975
           1       0.46      0.05      0.09      1923

    accuracy                           0.91     21898
   macro avg       0.69      0.52      0.52     21898
weighted avg       0.88      0.91      0.88     21898

Confusion_matrix
[[19867   108]
 [ 1830    93]]
done in 0.634951s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2252408452125645
0.2922321801989826
Classification report
              precision    recall  f1-score   support

           0       0.90      0.99      0.94      7042
           1       0.34      0.04      0.07       800

    accuracy                           0.89      7842
   macro avg       0.62      0.51      0.50      7842
weighted avg       0.84      0.89      0.85      7842

Confusion_matrix
[[6985   57]
 [ 771   29]]
done in 0.609076s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2252408452125645
0.29295321579170447
Classification report
              precision    recall  f1-score   support

           0       0.90      0.99      0.94      6943
           1       0.48      0.05      0.09       816

    accuracy                           0.89      7759
   macro avg       0.69      0.52      0.52      7759
weighted avg       0.85      0.89      0.85      7759

Confusion_matrix
[[6899   44]
 [ 776   40]]
done in 0.605516s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2252408452125645
0.2353954558483184
Classification report
              precision    recall  f1-score   support

           0       0.93      1.00      0.96     12964
           1       0.47      0.05      0.09      1092

    accuracy                           0.92     14056
   macro avg       0.70      0.52      0.52     14056
weighted avg       0.89      0.92      0.89     14056

Confusion_matrix
[[12904    60]
 [ 1039    53]]
done in 0.639200s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2252408452125645
0.23715216128910158
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96     13032
           1       0.45      0.05      0.09      1107

    accuracy                           0.92     14139
   macro avg       0.69      0.52      0.52     14139
weighted avg       0.89      0.92      0.89     14139

Confusion_matrix
[[12968    64]
 [ 1054    53]]
done in 0.618290s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     20006
           1       0.67      0.00      0.01      1892

    accuracy                           0.91     21898
   macro avg       0.79      0.50      0.48     21898
weighted avg       0.89      0.91      0.87     21898

Confusion_matrix
[[20003     3]
 [ 1886     6]]
done in 25.901927s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19975

threshold:0.30000000000000004, J-value:0.145
threshold:0.4, J-value:0.077
threshold:0.5, J-value:0.037
threshold:0.6000000000000001, J-value:0.016
threshold:0.7000000000000001, J-value:0.003
threshold:0.8, J-value:0.001
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6928875618498382
Balanced accuracy score of test is  0.6961336095496385
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.367
threshold:0.2, J-value:0.24100000000000002
threshold:0.30000000000000004, J-value:0.131
threshold:0.4, J-value:0.066
threshold:0.5, J-value:0.027999999999999997
threshold:0.6000000000000001, J-value:0.015
threshold:0.7000000000000001, J-value:0.003
threshold:0.8, J-value:0.001
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6832332788980404
Balanced accuracy score of test is  0.6947641580036883
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.39199999999999996
threshold:0.2, J-value:0.249
t

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train ['Class'] = y_train


(42433,)
(42433,)
(84866, 88)
X train 84866
Y train 84866
21898 7814 14084
21898 7814 14084
21898 7778 14120
21898 7778 14120


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.22083003987003127
0.2653232742786787
Classification report
              precision    recall  f1-score   support

           0       0.91      0.99      0.95     19911
           1       0.43      0.04      0.07      1987

    accuracy                           0.91     21898
   macro avg       0.67      0.52      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19805   106]
 [ 1908    79]]
done in 0.625565s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.22083003987003127
0.2624720399619388
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19927
           1       0.42      0.03      0.06      1971

    accuracy                           0.91     21898
   macro avg       0.67      0.51      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19836    91]
 [ 1905    66]]
done in 0.610441s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.22083003987003127
0.3000591030983827
Classification report
              precision    recall  f1-score   support

           0       0.90      0.99      0.94      6979
           1       0.38      0.03      0.05       835

    accuracy                           0.89      7814
   macro avg       0.64      0.51      0.50      7814
weighted avg       0.84      0.89      0.85      7814

Confusion_matrix
[[6939   40]
 [ 811   24]]
done in 0.589842s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.22083003987003127
0.3060602060369958
Classification report
              precision    recall  f1-score   support

           0       0.89      0.99      0.94      6927
           1       0.43      0.03      0.06       851

    accuracy                           0.89      7778
   macro avg       0.66      0.51      0.50      7778
weighted avg       0.84      0.89      0.84      7778

Confusion_matrix
[[6892   35]
 [ 825   26]]
done in 0.604867s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.22083003987003127
0.2460513510752446
Classification report
              precision    recall  f1-score   support

           0       0.92      0.99      0.96     12932
           1       0.45      0.05      0.09      1152

    accuracy                           0.92     14084
   macro avg       0.69      0.52      0.52     14084
weighted avg       0.88      0.92      0.89     14084

Confusion_matrix
[[12866    66]
 [ 1097    55]]
done in 0.604765s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.22083003987003127
0.23846150485345488
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96     13000
           1       0.42      0.04      0.07      1120

    accuracy                           0.92     14120
   macro avg       0.67      0.52      0.51     14120
weighted avg       0.88      0.92      0.89     14120

Confusion_matrix
[[12944    56]
 [ 1080    40]]
done in 0.604234s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19911
           1       0.30      0.00      0.00      1987

    accuracy                           0.91     21898
   macro avg       0.60      0.50      0.48     21898
weighted avg       0.85      0.91      0.87     21898

Confusion_matrix
[[19904     7]
 [ 1984     3]]
done in 25.286492s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     1992

threshold:0.30000000000000004, J-value:0.138
threshold:0.4, J-value:0.076
threshold:0.5, J-value:0.035
threshold:0.6000000000000001, J-value:0.017
threshold:0.7000000000000001, J-value:0.008
threshold:0.8, J-value:0.002
threshold:0.9, J-value:0.001
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6867376256146596
Balanced accuracy score of test is  0.6937801412497064
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.373
threshold:0.2, J-value:0.24800000000000003
threshold:0.30000000000000004, J-value:0.125
threshold:0.4, J-value:0.065
threshold:0.5, J-value:0.023
threshold:0.6000000000000001, J-value:0.014
threshold:0.7000000000000001, J-value:0.006
threshold:0.8, J-value:0.002
threshold:0.9, J-value:0.001
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6861903932498952
Balanced accuracy score of test is  0.6846494846287717
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.36799999999999994
threshold:0.2, J-value:0.2319999999999999

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train ['Class'] = y_train


(42366,)
(42366,)
(84732, 88)
X train 84732
Y train 84732
21898 7644 14254
21898 7644 14254
21898 7881 14017
21898 7881 14017


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.22323307696418437
0.2590295274060498
Classification report
              precision    recall  f1-score   support

           0       0.91      0.99      0.95     19944
           1       0.43      0.04      0.08      1954

    accuracy                           0.91     21898
   macro avg       0.67      0.52      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19837   107]
 [ 1872    82]]
done in 0.611741s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.22323307696418437
0.25977448995017227
Classification report
              precision    recall  f1-score   support

           0       0.91      0.99      0.95     19955
           1       0.44      0.04      0.08      1943

    accuracy                           0.91     21898
   macro avg       0.68      0.52      0.52     21898
weighted avg       0.87      0.91      0.88     21898

Confusion_matrix
[[19848   107]
 [ 1859    84]]
done in 0.599841s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.22323307696418437
0.30464190856173173
Classification report
              precision    recall  f1-score   support

           0       0.89      0.99      0.94      6814
           1       0.35      0.03      0.05       830

    accuracy                           0.89      7644
   macro avg       0.62      0.51      0.49      7644
weighted avg       0.83      0.89      0.84      7644

Confusion_matrix
[[6775   39]
 [ 809   21]]
done in 0.583402s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.22323307696418437
0.29917661289027725
Classification report
              precision    recall  f1-score   support

           0       0.89      0.99      0.94      7022
           1       0.45      0.04      0.07       859

    accuracy                           0.89      7881
   macro avg       0.67      0.52      0.51      7881
weighted avg       0.85      0.89      0.85      7881

Confusion_matrix
[[6979   43]
 [ 824   35]]
done in 0.587618s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.22323307696418437
0.2345689520199102
Classification report
              precision    recall  f1-score   support

           0       0.92      0.99      0.96     13130
           1       0.47      0.05      0.10      1124

    accuracy                           0.92     14254
   macro avg       0.70      0.52      0.53     14254
weighted avg       0.89      0.92      0.89     14254

Confusion_matrix
[[13062    68]
 [ 1063    61]]
done in 0.597465s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.22323307696418437
0.2376208100692443
Classification report
              precision    recall  f1-score   support

           0       0.93      1.00      0.96     12933
           1       0.43      0.05      0.08      1084

    accuracy                           0.92     14017
   macro avg       0.68      0.52      0.52     14017
weighted avg       0.89      0.92      0.89     14017

Confusion_matrix
[[12869    64]
 [ 1035    49]]
done in 0.603894s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19944
           1       0.45      0.00      0.01      1954

    accuracy                           0.91     21898
   macro avg       0.68      0.50      0.48     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19938     6]
 [ 1949     5]]
done in 25.424137s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19955

threshold:0.30000000000000004, J-value:0.14
threshold:0.4, J-value:0.078
threshold:0.5, J-value:0.037000000000000005
threshold:0.6000000000000001, J-value:0.015000000000000001
threshold:0.7000000000000001, J-value:0.003
threshold:0.8, J-value:0.001
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.696010138520919
Balanced accuracy score of test is  0.6978586534060875
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.353
threshold:0.2, J-value:0.22599999999999998
threshold:0.30000000000000004, J-value:0.118
threshold:0.4, J-value:0.064
threshold:0.5, J-value:0.019000000000000003
threshold:0.6000000000000001, J-value:0.009000000000000001
threshold:0.7000000000000001, J-value:0.002
threshold:0.8, J-value:0.001
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6762533550698244
Balanced accuracy score of test is  0.7058718499550225
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.41500

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train ['Class'] = y_train


(42398,)
(42398,)
(84796, 88)
X train 84796
Y train 84796
21898 7795 14103
21898 7795 14103
21898 7762 14136
21898 7762 14136


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2191308581807407
0.2650543524397864
Classification report
              precision    recall  f1-score   support

           0       0.91      0.99      0.95     19893
           1       0.41      0.04      0.07      2005

    accuracy                           0.91     21898
   macro avg       0.66      0.52      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19775   118]
 [ 1924    81]]
done in 0.612661s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2191308581807407
0.2648123231051037
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19907
           1       0.48      0.04      0.08      1991

    accuracy                           0.91     21898
   macro avg       0.70      0.52      0.51     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19816    91]
 [ 1907    84]]
done in 0.619513s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2191308581807407
0.3097026877692941
Classification report
              precision    recall  f1-score   support

           0       0.89      0.99      0.94      6922
           1       0.33      0.03      0.05       873

    accuracy                           0.88      7795
   macro avg       0.61      0.51      0.49      7795
weighted avg       0.83      0.88      0.84      7795

Confusion_matrix
[[6875   47]
 [ 850   23]]
done in 0.634855s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2191308581807407
0.31063635087435204
Classification report
              precision    recall  f1-score   support

           0       0.89      1.00      0.94      6882
           1       0.49      0.04      0.07       880

    accuracy                           0.89      7762
   macro avg       0.69      0.52      0.50      7762
weighted avg       0.84      0.89      0.84      7762

Confusion_matrix
[[6850   32]
 [ 849   31]]
done in 0.633575s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2191308581807407
0.2403763567017511
Classification report
              precision    recall  f1-score   support

           0       0.92      0.99      0.96     12971
           1       0.45      0.05      0.09      1132

    accuracy                           0.92     14103
   macro avg       0.69      0.52      0.52     14103
weighted avg       0.89      0.92      0.89     14103

Confusion_matrix
[[12900    71]
 [ 1074    58]]
done in 0.633599s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2191308581807407
0.23965060100939736
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96     13025
           1       0.47      0.05      0.09      1111

    accuracy                           0.92     14136
   macro avg       0.70      0.52      0.52     14136
weighted avg       0.89      0.92      0.89     14136

Confusion_matrix
[[12966    59]
 [ 1058    53]]
done in 0.643224s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19893
           1       0.25      0.00      0.00      2005

    accuracy                           0.91     21898
   macro avg       0.58      0.50      0.48     21898
weighted avg       0.85      0.91      0.86     21898

Confusion_matrix
[[19890     3]
 [ 2004     1]]
done in 27.558079s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19907

  _warn_prf(average, modifier, msg_start, len(result))


Classification report
              precision    recall  f1-score   support

           0       0.89      1.00      0.94      6882
           1       1.00      0.00      0.00       880

    accuracy                           0.89      7762
   macro avg       0.94      0.50      0.47      7762
weighted avg       0.90      0.89      0.83      7762

Confusion_matrix
[[6882    0]
 [ 879    1]]
done in 25.972691s
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96     12971
           1       0.60      0.01      0.01      1132

    accuracy                           0.92     14103
   macro avg       0.76      0.50      0.48     14103
weighted avg       0.89      0.92      0.88     14103

Confusion_matrix
[[12967     4]
 [ 1126     6]]
done in 25.320403s
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96     13025
           1       0.33      0.00      0.

threshold:0.4, J-value:0.025
threshold:0.5, J-value:0.0
threshold:0.6000000000000001, J-value:0.0
threshold:0.7000000000000001, J-value:0.0
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.7007452464199677
Balanced accuracy score of test is  0.6916638007114801
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.366
threshold:0.2, J-value:0.266
threshold:0.30000000000000004, J-value:0.092
threshold:0.4, J-value:0.017
threshold:0.5, J-value:0.0
threshold:0.6000000000000001, J-value:0.0
threshold:0.7000000000000001, J-value:0.0
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6829551874545128
Balanced accuracy score of test is  0.6895068822488177
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.425
threshold:0.2, J-value:0.29300000000000004
threshold:0.30000000000000004, J-value:0.095
threshold:0.4, J-value:0.029
threshold:0.5, J-

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train ['Class'] = y_train


(42494,)
(42494,)
(84988, 88)
X train 84988
Y train 84988
21898 7804 14094
21898 7804 14094
21898 7849 14049
21898 7849 14049


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.22152156177631513
0.26436219660532567
Classification report
              precision    recall  f1-score   support

           0       0.91      0.99      0.95     19931
           1       0.45      0.05      0.08      1967

    accuracy                           0.91     21898
   macro avg       0.68      0.52      0.52     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19821   110]
 [ 1878    89]]
done in 0.642579s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.22152156177631513
0.2609633191398929
Classification report
              precision    recall  f1-score   support

           0       0.91      0.99      0.95     19956
           1       0.46      0.05      0.08      1942

    accuracy                           0.91     21898
   macro avg       0.69      0.52      0.52     21898
weighted avg       0.87      0.91      0.88     21898

Confusion_matrix
[[19851   105]
 [ 1853    89]]
done in 0.636858s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.22152156177631513
0.31133357600357214
Classification report
              precision    recall  f1-score   support

           0       0.89      0.99      0.94      6937
           1       0.46      0.05      0.08       867

    accuracy                           0.89      7804
   macro avg       0.68      0.52      0.51      7804
weighted avg       0.84      0.89      0.85      7804

Confusion_matrix
[[6890   47]
 [ 827   40]]
done in 0.616670s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.22152156177631513
0.3002687222736268
Classification report
              precision    recall  f1-score   support

           0       0.90      0.99      0.94      7008
           1       0.47      0.04      0.07       841

    accuracy                           0.89      7849
   macro avg       0.68      0.52      0.51      7849
weighted avg       0.85      0.89      0.85      7849

Confusion_matrix
[[6969   39]
 [ 807   34]]
done in 0.612222s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.22152156177631513
0.2383536365922764
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96     12994
           1       0.44      0.04      0.08      1100

    accuracy                           0.92     14094
   macro avg       0.68      0.52      0.52     14094
weighted avg       0.89      0.92      0.89     14094

Confusion_matrix
[[12931    63]
 [ 1051    49]]
done in 0.627790s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.22152156177631513
0.2390038836500589
Classification report
              precision    recall  f1-score   support

           0       0.92      0.99      0.96     12948
           1       0.45      0.05      0.09      1101

    accuracy                           0.92     14049
   macro avg       0.69      0.52      0.52     14049
weighted avg       0.89      0.92      0.89     14049

Confusion_matrix
[[12882    66]
 [ 1046    55]]
done in 0.643748s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19931
           1       0.17      0.00      0.00      1967

    accuracy                           0.91     21898
   macro avg       0.54      0.50      0.48     21898
weighted avg       0.84      0.91      0.87     21898

Confusion_matrix
[[19921    10]
 [ 1965     2]]
done in 25.310008s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19956

  _warn_prf(average, modifier, msg_start, len(result))


0.22720929335372964
0.26937285642468783
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19956
           1       0.33      0.00      0.00      1942

    accuracy                           0.91     21898
   macro avg       0.62      0.50      0.48     21898
weighted avg       0.86      0.91      0.87     21898

Confusion_matrix
[[19954     2]
 [ 1941     1]]
done in 0.863272s
0.22720929335372964
0.3186263890614792
Classification report
              precision    recall  f1-score   support

           0       0.89      1.00      0.94      6937
           1       0.00      0.00      0.00       867

    accuracy                           0.89      7804
   macro avg       0.44      0.50      0.47      7804
weighted avg       0.79      0.89      0.84      7804

Confusion_matrix
[[6937    0]
 [ 867    0]]
done in 0.859683s


  _warn_prf(average, modifier, msg_start, len(result))


0.22720929335372964
0.30860165613685986
Classification report
              precision    recall  f1-score   support

           0       0.89      1.00      0.94      7008
           1       0.33      0.00      0.00       841

    accuracy                           0.89      7849
   macro avg       0.61      0.50      0.47      7849
weighted avg       0.83      0.89      0.84      7849

Confusion_matrix
[[7006    2]
 [ 840    1]]
done in 0.847022s
0.22720929335372964
0.24784858230659682
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96     12994
           1       0.00      0.00      0.00      1100

    accuracy                           0.92     14094
   macro avg       0.46      0.50      0.48     14094
weighted avg       0.85      0.92      0.88     14094

Confusion_matrix
[[12994     0]
 [ 1100     0]]
done in 0.867600s


  _warn_prf(average, modifier, msg_start, len(result))


0.22720929335372964
0.2474562183051891
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96     12948
           1       0.00      0.00      0.00      1101

    accuracy                           0.92     14049
   macro avg       0.46      0.50      0.48     14049
weighted avg       0.85      0.92      0.88     14049

Confusion_matrix
[[12948     0]
 [ 1101     0]]
done in 0.864146s


  _warn_prf(average, modifier, msg_start, len(result))


Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19931
           1       0.48      0.03      0.05      1967

    accuracy                           0.91     21898
   macro avg       0.70      0.51      0.50     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19871    60]
 [ 1911    56]]
done in 47.201855s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19956
           1       0.51      0.03      0.06      1942

    accuracy                           0.91     21898
   macro avg       0.71      0.51      0.50     21898
weighted avg       0.88      0.91      0.87     21898

Confusion_matrix
[[19902    54]
 [ 1885    57]]
done in 47.294504s
Classification report
              precision    recall  f1-score   support

           0       0.89      1.00      0.94      6937
           1       0.47      0.03    

threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.385
threshold:0.2, J-value:0.245
threshold:0.30000000000000004, J-value:0.127
threshold:0.4, J-value:0.056
threshold:0.5, J-value:0.026000000000000002
threshold:0.6000000000000001, J-value:0.006
threshold:0.7000000000000001, J-value:0.0
threshold:0.8, J-value:0.0
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6928711853023073
Balanced accuracy score of test is  0.6998048787057684
True positive rate of class 1 is  0.653
True positive rate of class 2 is  0.613
Positive prediction rate of class 1 is  0.302
Positive prediction rate of class 2 is  0.245


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train ['Class'] = y_train


(42377,)
(42377,)
(84754, 88)
X train 84754
Y train 84754
21898 7696 14202
21898 7696 14202
21898 7840 14058
21898 7840 14058


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2215993150756223
0.2554574796305997
Classification report
              precision    recall  f1-score   support

           0       0.92      0.99      0.95     19973
           1       0.47      0.05      0.08      1925

    accuracy                           0.91     21898
   macro avg       0.69      0.52      0.52     21898
weighted avg       0.88      0.91      0.88     21898

Confusion_matrix
[[19872   101]
 [ 1836    89]]
done in 0.599941s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2215993150756223
0.26678065921956223
Classification report
              precision    recall  f1-score   support

           0       0.91      0.99      0.95     19866
           1       0.46      0.04      0.08      2032

    accuracy                           0.91     21898
   macro avg       0.69      0.52      0.52     21898
weighted avg       0.87      0.91      0.87     21898

Confusion_matrix
[[19761   105]
 [ 1942    90]]
done in 0.596011s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2215993150756223
0.28501972051063745
Classification report
              precision    recall  f1-score   support

           0       0.90      0.99      0.95      6911
           1       0.46      0.04      0.07       785

    accuracy                           0.90      7696
   macro avg       0.68      0.52      0.51      7696
weighted avg       0.86      0.90      0.86      7696

Confusion_matrix
[[6874   37]
 [ 754   31]]
done in 0.579568s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2215993150756223
0.30879214166983937
Classification report
              precision    recall  f1-score   support

           0       0.89      0.99      0.94      6966
           1       0.41      0.04      0.07       874

    accuracy                           0.89      7840
   macro avg       0.65      0.52      0.51      7840
weighted avg       0.84      0.89      0.84      7840

Confusion_matrix
[[6917   49]
 [ 840   34]]
done in 0.595944s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2215993150756223
0.23943783410090175
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96     13062
           1       0.48      0.05      0.09      1140

    accuracy                           0.92     14202
   macro avg       0.70      0.52      0.52     14202
weighted avg       0.89      0.92      0.89     14202

Confusion_matrix
[[12998    64]
 [ 1082    58]]
done in 0.591454s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.2215993150756223
0.243351293562273
Classification report
              precision    recall  f1-score   support

           0       0.92      1.00      0.96     12900
           1       0.50      0.05      0.09      1158

    accuracy                           0.92     14058
   macro avg       0.71      0.52      0.52     14058
weighted avg       0.89      0.92      0.89     14058

Confusion_matrix
[[12844    56]
 [ 1102    56]]
done in 0.588954s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19973
           1       0.57      0.00      0.00      1925

    accuracy                           0.91     21898
   macro avg       0.74      0.50      0.48     21898
weighted avg       0.88      0.91      0.87     21898

Confusion_matrix
[[19970     3]
 [ 1921     4]]
done in 25.609205s
Classification report
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     19866
 

threshold:0.30000000000000004, J-value:0.155
threshold:0.4, J-value:0.08600000000000001
threshold:0.5, J-value:0.041
threshold:0.6000000000000001, J-value:0.016999999999999998
threshold:0.7000000000000001, J-value:0.006
threshold:0.8, J-value:0.002
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6922920358067808
Balanced accuracy score of test is  0.6967449381327334
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.384
threshold:0.2, J-value:0.246
threshold:0.30000000000000004, J-value:0.152
threshold:0.4, J-value:0.089
threshold:0.5, J-value:0.034
threshold:0.6000000000000001, J-value:0.015000000000000001
threshold:0.7000000000000001, J-value:0.005
threshold:0.8, J-value:0.001
threshold:0.9, J-value:0.0
Optimal threshold by J value is  0.1
Balanced accuracy score of val is  0.6919907062220572
Balanced accuracy score of test is  0.6905403887203685
threshold:0.0, J-value:0.0
threshold:0.1, J-value:0.37999999999999995
threshold:0.2, 

In [9]:
def add_mean_sd(records, result_table, overall_records, type):
    records.append({
        'auroc': result_table["auroc"].mean(),
        'overall threshold': result_table["overall threshold"].mean(),
        'male threshold': result_table["male threshold"].mean(),
        'female threshold': result_table["female threshold"].mean(),
        'overall ba validation': result_table["overall ba validation"].mean(),
        'overall ba test': result_table["overall ba test"].mean(),
        'male ba validation': result_table["male ba validation"].mean(),
        'male ba test': result_table["male ba test"].mean(),
        'female ba validation': result_table["female ba validation"].mean(),
        'female ba test': result_table["female ba test"].mean(),
        'overall precision':result_table["overall precision"].mean(),
        'overall recall':result_table["overall recall"].mean(),
        'overall tpr':result_table["overall tpr"].mean(),
        'overall tnr':result_table["overall tnr"].mean(),
        'overall pd':result_table["overall pd"].mean(),
        'male precision':result_table["male precision"].mean(),
        'male recall':result_table["male recall"].mean(),
        'male tpr':result_table["male tpr"].mean(),
        'male tnr':result_table["male tnr"].mean(),
        'male pd':result_table["male pd"].mean(),
        'female precision':result_table["female precision"].mean(),
        'female recall':result_table["female recall"].mean(),
        'female tpr':result_table["female tpr"].mean(),
        'female tnr':result_table["female tnr"].mean(),
        'female pd':result_table["female pd"].mean(),
        'eod': result_table["eod"].mean(),
        'di': result_table["di"].mean(),
        })
    records.append({
        'auroc': result_table["auroc"].std(),
        'overall threshold': result_table["overall threshold"].std(),
        'male threshold': result_table["male threshold"].std(),
        'female threshold': result_table["female threshold"].std(),
        'overall ba validation': result_table["overall ba validation"].std(),
        'overall ba test': result_table["overall ba test"].std(),
        'male ba validation': result_table["male ba validation"].std(),
        'male ba test': result_table["male ba test"].std(),
        'female ba validation': result_table["female ba validation"].std(),
        'female ba test': result_table["female ba test"].std(),
        'overall precision':result_table["overall precision"].std(),
        'overall recall':result_table["overall recall"].std(),
        'overall tpr':result_table["overall tpr"].std(),
        'overall tnr':result_table["overall tnr"].std(),
        'overall pd':result_table["overall pd"].std(),
        'male precision':result_table["male precision"].std(),
        'male recall':result_table["male recall"].std(),
        'male tpr':result_table["male tpr"].std(),
        'male tnr':result_table["male tnr"].std(),
        'male pd':result_table["male pd"].std(),
        'female precision':result_table["female precision"].std(),
        'female recall':result_table["female recall"].std(),
        'female tpr':result_table["female tpr"].std(),
        'female tnr':result_table["female tnr"].std(),
        'female pd':result_table["female pd"].std(),
        'eod': result_table["eod"].std(),
        'di': result_table["di"].std(),
        })
    overall_records.append({
        'type': type,
        'auroc': result_table["auroc"].mean(),
        'overall threshold': result_table["overall threshold"].mean(),
        'male threshold': result_table["male threshold"].mean(),
        'female threshold': result_table["female threshold"].mean(),
        'overall ba test': result_table["overall ba test"].mean(),
        'male ba test': result_table["male ba test"].mean(),
        'female ba test': result_table["female ba test"].mean(),
        'overall tpr':result_table["overall tpr"].mean(),
        'overall pd':result_table["overall pd"].mean(),
        'male tpr':result_table["male tpr"].mean(),
        'male pd':result_table["male pd"].mean(),
        'female tpr':result_table["female tpr"].mean(),
        'female pd':result_table["female pd"].mean(),
        'eod': result_table["eod"].mean(),
        'di': result_table["di"].mean(),
        })
    pd_result = pd.DataFrame(records)
    return pd_result, overall_records

In [10]:
overall_table = []
result_lr, overall_records = add_mean_sd (records_lr, result_lr, overall_table, 'lr')
result_rf, overall_records = add_mean_sd (records_rf, result_rf, overall_records, 'rf')
result_dt, overall_records = add_mean_sd (records_dt, result_dt, overall_records, 'dt')
result_gbt, overall_records = add_mean_sd (records_gbt, result_gbt, overall_records, 'gbt')

result_path='/Users/lifuchen/Desktop/research/resample_data/'
result_lr.to_csv(path.join(result_path,'gender-lr-resample-size-result.csv'), index=False)
result_rf.to_csv(path.join(result_path,'gender-rf-resample-size-result.csv'), index=False)
result_dt.to_csv(path.join(result_path,'gender-dt-resample-size-result.csv'), index=False)
result_gbt.to_csv(path.join(result_path,'gender-gbt-resample-size-result.csv'), index=False)

overall_result = pd.DataFrame(overall_table)
result_path='/Users/lifuchen/Desktop/research/resample_result/'
overall_result.to_csv(path.join(result_path,'gender-resample-size.csv'), index=False)
