In [1]:
# Importing Required Python Packages
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.max_columns',None)

In [2]:
# Loading both versions of Training sets
X_train_red = pd.read_csv('X_train_final.csv')
X_train = pd.read_csv('X_train_full_final.csv')
y_train = pd.read_csv('y_train.final.csv')

In [3]:
# Loading both versions of Test sets
X_test_red = pd.read_csv('X_test_final.csv')
X_test = pd.read_csv('X_test_full_final.csv')
y_test = pd.read_csv('y_test.final.csv')

### As per the Dataset Metadata , the column duration might leak information, since if duration = 0 then y=0 almost surely and after the end of the call, y is more or less known. So in order to have a realistic predictive model , the column 'duration' should be discarded. 

In [4]:
X_train_red.head()

Unnamed: 0,pdays,campaign,previous,cons_price_idx,emp_var_rate,cons_conf_idx,nr_employed,age,euribor3m,education_university_degree,education_illiterate,education_basic_6y,month_mar,job_blue-collar,loan_yes,job_housemaid,month_aug,loan_no,housing_yes,poutcome_success,poutcome_nonexistent,month_sep,job_student,default_no,job_admin,housing_no,job_retired,month_jul,contact_telephone,month_dec,marital_single,month_oct,job_services,marital_married,month_may,day_of_week_thu
0,0.196584,0.522981,-0.350127,-0.648967,-0.114858,-0.322269,0.39899,0.863739,0.288964,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0
1,0.196584,-0.203688,1.653813,-0.648967,-0.114858,-0.322269,0.39899,-0.289722,0.288964,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0
2,0.196584,-0.567023,-0.350127,1.103451,-1.133161,0.045048,-2.420139,3.651268,-1.583296,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,1,1,1,0,0,0,0,0,1,0,0
3,0.196584,-0.203688,-0.350127,0.72189,0.648868,0.887717,0.332723,-0.385843,0.713535,1,0,0,0,0,0,0,0,1,1,0,1,0,0,1,1,0,0,0,1,0,0,0,0,1,1,0
4,0.196584,-0.203688,-0.350127,-1.058152,-1.896888,-0.062987,-1.252175,1.824956,-1.357472,1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0


#### Deleting column duration from the Feature space.

In [5]:
#### Deleting 'duration' column from all the Training Dataset.
del X_train['duration']
del X_train_red['duration']
del X_test['duration']
del X_test_red['duration']

#### Saving the Resulting datasets back as CSV files

In [5]:
X_train.to_csv('X_train_full_final.csv',index=False)
X_train_red.to_csv('X_train_final.csv',index=False)
X_test.to_csv('X_test_full_final.csv',index=False)
X_test_red.to_csv('X_test_final.csv',index=False)

## Baseline Model: Dummy Classifier with Default Parameters 

In [5]:
# Importing Dummy classifier from Scikit Learn
from sklearn.dummy import DummyClassifier

In [6]:
# Importing the Sklearn's roc_auc_score module
from sklearn.metrics import roc_auc_score

In [7]:
# Instantiating the Dummy classifier object
bm = DummyClassifier(random_state=42)

#### Fitting dummy classifier (baseline model) to both full_feature training & test  sets and computing resulting roc_auc_scores.

In [8]:
# Fitting the simple dummy classifier to Training set with all features.
bm.fit(X_train,y_train)

DummyClassifier(random_state=42)

In [9]:
# Accuracy for the full feature Training set with baseline model
bm.score(X_train,y_train)

0.8002427921092564

In [10]:
# Accuracy for the full feature Test set with baseline model
bm.score(X_test,y_test)

0.803714493809177

In [11]:
# Predicting the probabilities of y=1 for the full feature training set
y_pred_train = bm.predict_proba(X_train)

In [12]:
print('The ROC AUC for the full_feature Training set is:',roc_auc_score(y_train,y_pred_train[:,1]))

The ROC AUC for the full_feature Training set is: 0.5013692527998519


In [13]:
# Predicting the probabilities of y=1 for the full feature test set
y_pred = bm.predict_proba(X_test)

In [14]:
print('The ROC AUC for the full_feature test set is:',roc_auc_score(y_test,y_pred[:,1]))

The ROC AUC for the full_feature test set is: 0.5050864722392566


#### Fitting dummy classifier (baseline model) to both Reduced_feature training & test  sets and computing resulting roc_auc_scores.

In [15]:
# Fitting dummy classifier to the reduced feature Training set.
bm.fit(X_train_red,y_train)

DummyClassifier(random_state=42)

In [16]:
# Accuracy for the reduced feature Training set with baseline model.
bm.score(X_train_red,y_train)

0.8002427921092564

In [17]:
# Accuracy for the reduced feature Test set with baseline model.
bm.score(X_test_red,y_test)

0.803714493809177

In [18]:
# Predicting the probabilities of y=1 for the reduced feature training set
y_pred_red_train = bm.predict_proba(X_train_red)

In [19]:
print('The ROC AUC for the reduced feature training set is:',roc_auc_score(y_train,y_pred_red_train[:,1]))

The ROC AUC for the reduced feature training set is: 0.5013692527998519


In [20]:
# Predicting the probabilities of y=1 for the reduced feature test set
y_pred_red = bm.predict_proba(X_test_red)

In [21]:
print('The ROC AUC for the reduced_feature test set is:',roc_auc_score(y_test,y_pred_red[:,1]))

The ROC AUC for the reduced_feature test set is: 0.5050864722392566


### Observations:
### 1) The roc_auc score for the reduced feature test set is  equal to that of full feature test set, indicating the presence of noise features in the full feature set.
### 2) The training set  roc_auc scores for both full feature & reduced feature training sets are less than that of corresponding test sets, probably due to underfitting.
### 3) The test set accuracy for both full feature & reduced feature test sets are less than 88.734%, which could be achieved by labeling all the test set  instances (y) equal to 0, the % of class 0 (majority class) in the whole dataset. Thus the default dummy classifier is certainly underfitting the training set.

## Model_1: Logistics Regression with Tuned Hyperparameters using Optuna.

In [23]:
# Importing  hyperparamater tuning optimizer optuna
import optuna

In [24]:
# Importing required Libraries
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_val_score

In [25]:
# Defining the appropriate objective function for the Logistic regression classifier
def objective_wrappper_ls(X_tr, y_tr, cls=None, cv_strat=None):
    '''
    Optimizes Logistics Regression parameters on the given training set X_tr,y_tr
    using cv_strat cross-validation object
    
    '''
    
    def objective(trial):
        params = {
        'C': trial.suggest_loguniform('C', 1e-5, 1e2),
        'l1_ratio':trial.suggest_uniform('l1_ratio',0,1),
        'class_weight':trial.suggest_categorical('class_weight',cl_weight),
        'penalty':trial.suggest_categorical('penalty',['none','elasticnet'])
            
        }
        
        cls.set_params(**params)#Initializing the model with the parameters 
    
        return np.mean(cross_val_score(cls, X_tr, y_tr, cv=cv_strat, n_jobs=5, scoring='roc_auc'))  
    return objective

In [26]:
# Defining the evaluation function for study's best parameters
def train_test_roc_auc(X_tr, y_tr, cls, obj_func, cv_strat, n_trials=100):
    ''' Computes the best hyper parameters of the classsifier and returns 
    Optuna's study's best score & clasifier parameters'''
    study = optuna.create_study(direction='maximize')
    study.optimize(obj_func(X_tr, y_tr, cls, cv_strat), n_trials)
    best_score = study.best_value
    best_params = study.best_params
    return (best_score,best_params)


In [27]:
# Instantiating the logistic Regression classifier
lr_s =  LogisticRegression(random_state=42,solver='saga',n_jobs=5)

In [28]:
# Instantiating the Stratified K fold object
cv_strat = StratifiedKFold(10,random_state=42)

In [29]:
# Defing the class weights
cl_weight = [None,'balanced',{0:1.0,1:9.0},{0:1.0,1:10},{0:1.0,1:11},{0:1.0,1:12}]

#### Computing the best hyperparameters for the  logistic Regression using full feature Training Set.

In [30]:
# Extracting the best model parameters and best study score
best_study_score, best_study_params = train_test_roc_auc(X_train, y_train, lr_s, objective_wrappper_ls, cv_strat, n_trials=200)

[I 2020-10-01 15:23:17,213] A new study created in memory with name: no-name-a10d68ac-400a-4a24-8074-ec3ffe9d6357
[I 2020-10-01 15:23:19,666] Trial 0 finished with value: 0.7492845854664483 and parameters: {'C': 0.00013154292928952658, 'l1_ratio': 0.6503459972495506, 'class_weight': 'balanced', 'penalty': 'elasticnet'}. Best is trial 0 with value: 0.7492845854664483.
[I 2020-10-01 15:23:24,371] Trial 1 finished with value: 0.7901937777169737 and parameters: {'C': 1.8195015053814443, 'l1_ratio': 0.817875878754326, 'class_weight': {0: 1.0, 1: 11}, 'penalty': 'none'}. Best is trial 1 with value: 0.7901937777169737.
[I 2020-10-01 15:23:25,609] Trial 2 finished with value: 0.7492804583675193 and parameters: {'C': 0.0006441255577934125, 'l1_ratio': 0.5169023380178759, 'class_weight': None, 'penalty': 'elasticnet'}. Best is trial 1 with value: 0.7901937777169737.
[I 2020-10-01 15:23:27,150] Trial 3 finished with value: 0.7899304064875101 and parameters: {'C': 0.012336671299333676, 'l1_ratio':

[I 2020-10-01 15:26:53,998] Trial 62 finished with value: 0.7909338809823218 and parameters: {'C': 0.061189753086054045, 'l1_ratio': 0.6478009067431644, 'class_weight': {0: 1.0, 1: 9.0}, 'penalty': 'elasticnet'}. Best is trial 52 with value: 0.7910201646331781.
[I 2020-10-01 15:26:55,416] Trial 63 finished with value: 0.790358477590177 and parameters: {'C': 0.014701741648850828, 'l1_ratio': 0.7220763555013198, 'class_weight': {0: 1.0, 1: 9.0}, 'penalty': 'elasticnet'}. Best is trial 52 with value: 0.7910201646331781.
[I 2020-10-01 15:26:59,700] Trial 64 finished with value: 0.7908161458557955 and parameters: {'C': 0.19687961597331166, 'l1_ratio': 0.9005445410467796, 'class_weight': {0: 1.0, 1: 9.0}, 'penalty': 'elasticnet'}. Best is trial 52 with value: 0.7910201646331781.
[I 2020-10-01 15:27:01,885] Trial 65 finished with value: 0.7908892080433939 and parameters: {'C': 0.022626674598518217, 'l1_ratio': 0.969005652801274, 'class_weight': {0: 1.0, 1: 9.0}, 'penalty': 'elasticnet'}. Best

[I 2020-10-01 15:30:31,935] Trial 125 finished with value: 0.790819626075943 and parameters: {'C': 0.23298480348450012, 'l1_ratio': 0.8885439551166441, 'class_weight': 'balanced', 'penalty': 'elasticnet'}. Best is trial 121 with value: 0.7910228732831464.
[I 2020-10-01 15:30:38,304] Trial 126 finished with value: 0.7907646480460196 and parameters: {'C': 0.4378841024467422, 'l1_ratio': 0.8363364036142433, 'class_weight': 'balanced', 'penalty': 'elasticnet'}. Best is trial 121 with value: 0.7910228732831464.
[I 2020-10-01 15:30:43,730] Trial 127 finished with value: 0.790980855996994 and parameters: {'C': 0.10947949729509962, 'l1_ratio': 0.779249835507399, 'class_weight': 'balanced', 'penalty': 'elasticnet'}. Best is trial 121 with value: 0.7910228732831464.
[I 2020-10-01 15:30:48,935] Trial 128 finished with value: 0.7903863197884661 and parameters: {'C': 0.02283055526043597, 'l1_ratio': 0.980711683666912, 'class_weight': 'balanced', 'penalty': 'none'}. Best is trial 121 with value: 0.7

[I 2020-10-01 15:34:02,198] Trial 187 finished with value: 0.7899327752986959 and parameters: {'C': 0.025662137642552922, 'l1_ratio': 0.9326164941834261, 'class_weight': 'balanced', 'penalty': 'elasticnet'}. Best is trial 171 with value: 0.7910375404858618.
[I 2020-10-01 15:34:04,948] Trial 188 finished with value: 0.7906247499174992 and parameters: {'C': 0.0346269451491401, 'l1_ratio': 0.8490190984345148, 'class_weight': 'balanced', 'penalty': 'elasticnet'}. Best is trial 171 with value: 0.7910375404858618.
[I 2020-10-01 15:34:07,463] Trial 189 finished with value: 0.7910402726357865 and parameters: {'C': 0.06306939745135184, 'l1_ratio': 0.9023268474526251, 'class_weight': 'balanced', 'penalty': 'elasticnet'}. Best is trial 189 with value: 0.7910402726357865.
[I 2020-10-01 15:34:10,185] Trial 190 finished with value: 0.7910411160256406 and parameters: {'C': 0.05322734576627777, 'l1_ratio': 0.9047278006913283, 'class_weight': 'balanced', 'penalty': 'elasticnet'}. Best is trial 190 with

In [31]:
print('The best roc_auc_score for the study is: ',best_study_score)

The best roc_auc_score for the study is:  0.7910644693741309


In [32]:
print(('The best study parameters for the classifier are: ',best_study_params))

('The best study parameters for the classifier are: ', {'C': 0.05118333113785145, 'l1_ratio': 0.9748195107278903, 'class_weight': 'balanced', 'penalty': 'elasticnet'})


#### Computing the  full feature roc_auc score for the test data using the best study Parameters

In [33]:
# Obtaining the best full feature LR model by setting best study parameters.
lr_f = lr_s.set_params(**best_study_params)

In [34]:
# fitting the best Logistics regression model on the full feature training set
lr_f.fit(X_train,y_train)

LogisticRegression(C=0.05118333113785145, class_weight='balanced',
                   l1_ratio=0.9748195107278903, n_jobs=5, penalty='elasticnet',
                   random_state=42, solver='saga')

In [35]:
# Defining the function to calculate the roc_auc score for the feature sets
def cal_roc_auc(X, y, cls, f_set, t_set, model_name):
    ''' Calculates the roc auc score using the best study parameters 
        f_set : String: specifies 'full feature', 'Reduced feature'
        t_set: String: specifies 'training', 'test'
        model_name: String: specifies Name of the model '''
        
    y_pred = cls.predict_proba(X)
    print('The roc_auc_score for the {} {} set using the best {} is '.format(f_set,t_set,model_name),roc_auc_score(y,y_pred[:,1]))

In [36]:
# Calculating the full feature training set roc_auc score using the best study parameters
cal_roc_auc(X_train, y_train, lr_f, 'full feature', 'training', 'Logistic Regression')

The roc_auc_score for the full feature training set using the best Logistic Regression is  0.7941151549648426


In [37]:
# Calculating the full feature test set roc_auc score using the best study parameters
cal_roc_auc(X_test, y_test, lr_f, 'full feature', 'test', 'Logistic Regression')

The roc_auc_score for the full feature test set using the best Logistic Regression is  0.8009337852021322


In [38]:
print('The accuracy for the full feature test set is: ',lr_f.score(X_test,y_test))

The accuracy for the full feature test set is:  0.8319980577810148


In [39]:
# Saving the full feature best Logistic Regression model 
import joblib
joblib.dump(lr_f,'Log_Reg_Full.joblib')

['Log_Reg_Full.joblib']

#### Computing the best hyperparameters for the  logistic Regression using Reduced feature Training Set.

In [40]:
# Extracting the best model parameters and best study score
best_study_score,best_study_params = train_test_roc_auc(X_train_red, y_train, lr_s, objective_wrappper_ls, cv_strat, n_trials=200)

[I 2020-10-01 15:37:28,119] A new study created in memory with name: no-name-1163dd5c-cf52-4d97-ba1b-8a7d514cd49d
[I 2020-10-01 15:37:31,230] Trial 0 finished with value: 0.7906171954484427 and parameters: {'C': 1.5795031120665964e-05, 'l1_ratio': 0.9480790916763634, 'class_weight': {0: 1.0, 1: 9.0}, 'penalty': 'none'}. Best is trial 0 with value: 0.7906171954484427.
[I 2020-10-01 15:37:34,497] Trial 1 finished with value: 0.7905944109974822 and parameters: {'C': 8.879665162925674, 'l1_ratio': 0.24151511036275042, 'class_weight': {0: 1.0, 1: 10}, 'penalty': 'none'}. Best is trial 0 with value: 0.7906171954484427.
[I 2020-10-01 15:37:37,557] Trial 2 finished with value: 0.7905623264936935 and parameters: {'C': 0.0002791084362307681, 'l1_ratio': 0.45295636883338586, 'class_weight': {0: 1.0, 1: 11}, 'penalty': 'none'}. Best is trial 0 with value: 0.7906171954484427.
[I 2020-10-01 15:37:40,497] Trial 3 finished with value: 0.7905623264936935 and parameters: {'C': 9.466631701568094e-05, 'l1

[I 2020-10-01 15:40:19,338] Trial 62 finished with value: 0.7908105108209174 and parameters: {'C': 0.270452274866648, 'l1_ratio': 0.5785606989725617, 'class_weight': {0: 1.0, 1: 12}, 'penalty': 'elasticnet'}. Best is trial 46 with value: 0.7910022575755734.
[I 2020-10-01 15:40:20,636] Trial 63 finished with value: 0.7907190155803268 and parameters: {'C': 0.033884785847968064, 'l1_ratio': 0.6233813380496074, 'class_weight': {0: 1.0, 1: 12}, 'penalty': 'elasticnet'}. Best is trial 46 with value: 0.7910022575755734.
[I 2020-10-01 15:40:23,572] Trial 64 finished with value: 0.79094288779393 and parameters: {'C': 0.12354132160357051, 'l1_ratio': 0.48771513406952566, 'class_weight': {0: 1.0, 1: 12}, 'penalty': 'elasticnet'}. Best is trial 46 with value: 0.7910022575755734.
[I 2020-10-01 15:40:27,682] Trial 65 finished with value: 0.7906710196820268 and parameters: {'C': 0.5653537455144129, 'l1_ratio': 0.5338694529633254, 'class_weight': {0: 1.0, 1: 12}, 'penalty': 'elasticnet'}. Best is tria

[I 2020-10-01 15:43:10,869] Trial 124 finished with value: 0.791002919395586 and parameters: {'C': 0.22248837599693988, 'l1_ratio': 0.938258709311163, 'class_weight': {0: 1.0, 1: 9.0}, 'penalty': 'elasticnet'}. Best is trial 120 with value: 0.7910481680557272.
[I 2020-10-01 15:43:14,792] Trial 125 finished with value: 0.7908943241230884 and parameters: {'C': 0.3230750080640674, 'l1_ratio': 0.9994194950977051, 'class_weight': {0: 1.0, 1: 9.0}, 'penalty': 'elasticnet'}. Best is trial 120 with value: 0.7910481680557272.
[I 2020-10-01 15:43:17,826] Trial 126 finished with value: 0.7910414458690701 and parameters: {'C': 0.1524145160030075, 'l1_ratio': 0.8897774904380614, 'class_weight': {0: 1.0, 1: 9.0}, 'penalty': 'elasticnet'}. Best is trial 120 with value: 0.7910481680557272.
[I 2020-10-01 15:43:21,434] Trial 127 finished with value: 0.7910222615050078 and parameters: {'C': 0.18671562935504565, 'l1_ratio': 0.9123348656616098, 'class_weight': {0: 1.0, 1: 9.0}, 'penalty': 'elasticnet'}. Be

[I 2020-10-01 15:46:01,432] Trial 186 finished with value: 0.7910492543421047 and parameters: {'C': 0.13433365925283067, 'l1_ratio': 0.9674637621487514, 'class_weight': {0: 1.0, 1: 9.0}, 'penalty': 'elasticnet'}. Best is trial 145 with value: 0.7910530987490983.
[I 2020-10-01 15:46:05,463] Trial 187 finished with value: 0.7908406779890929 and parameters: {'C': 0.4425131708940126, 'l1_ratio': 0.9719474524989278, 'class_weight': {0: 1.0, 1: 9.0}, 'penalty': 'elasticnet'}. Best is trial 145 with value: 0.7910530987490983.
[I 2020-10-01 15:46:09,494] Trial 188 finished with value: 0.7909335785289167 and parameters: {'C': 0.25228692921245016, 'l1_ratio': 0.8523302449578716, 'class_weight': {0: 1.0, 1: 9.0}, 'penalty': 'elasticnet'}. Best is trial 145 with value: 0.7910530987490983.
[I 2020-10-01 15:46:12,620] Trial 189 finished with value: 0.7906171954484427 and parameters: {'C': 0.07138433776855237, 'l1_ratio': 0.9975720704870695, 'class_weight': {0: 1.0, 1: 9.0}, 'penalty': 'none'}. Best 

In [41]:
print('The best roc_auc_score for the study is: ',best_study_score)

The best roc_auc_score for the study is:  0.7910531061050109


In [42]:
print(('The best study parameters for the classifier are: ',best_study_params))

('The best study parameters for the classifier are: ', {'C': 0.12725888493400458, 'l1_ratio': 0.9851193622801032, 'class_weight': {0: 1.0, 1: 9.0}, 'penalty': 'elasticnet'})


#### Computing the  Reduced feature roc_auc score for the test data using the best study Parameters

In [43]:
# Obtaining the best reduced feature LR model by setting best study parameters.
lr_R = lr_s.set_params(**best_study_params)

In [44]:
# fitting the best Logistics regression model on the reduced feature training set
lr_R.fit(X_train_red, y_train)

LogisticRegression(C=0.12725888493400458, class_weight={0: 1.0, 1: 9.0},
                   l1_ratio=0.9851193622801032, n_jobs=5, penalty='elasticnet',
                   random_state=42, solver='saga')

In [45]:
# Calculating the reduced feature training set roc_auc score using the best study parameters
cal_roc_auc(X_train_red, y_train, lr_R,'reduced feature', 'training', 'Logistic Regression')

The roc_auc_score for the reduced feature training set using the best Logistic Regression is  0.7939385748220312


In [46]:
# Calculating the reduced feature test set roc_auc score using the best study parameters
cal_roc_auc(X_test_red, y_test, lr_R, 'reduced feature','test','Logistic Regression')

The roc_auc_score for the reduced feature test set using the best Logistic Regression is  0.7986454254917684


In [49]:
# Computing the accuracy score for the Reduced feature test set 
print('The accuracy for the reduced feature test is: ',lr_R.score(X_test_red,y_test))

The accuracy for the reduced feature test is:  0.809905316824472


In [48]:
# Saving the Reduced feature best Logistic Regression model 
joblib.dump(lr_R,'Log_Reg_Reduced.joblib')

['Log_Reg_Reduced.joblib']

## Observations:
### 1) From the above analysis we can clearly see that roc_auc score of the full feature test set using tuned Logistic regression model is almost equal to that of  corresponding reduced feature test set , confirming the earlier suspicion that there are lot of noise features in the original dataset.
### 2) The roc_auc test set score for the tuned Logistic models are much higher than the scores from the corresponding  baseline models, which was expected.
### 3) The roc_auc test scores are more than their training counterparts for both full feature & reduced feature datasets ,perhaps due to underfitting. May be a more flexible model will yield better result.

## Defining  Reward Risk Ratio for a Family of Machine Learning Models:

## R_R Ratio = Mean of CV K Fold score / Std. Dev of K Fold score

### R_R ratio may be helpful in choosing among models having same computational complexity

### Calculating R_R ratio for best Logistic Regression Model 

In [4]:
# Loading the best logistic regression Model
import joblib
lr_R = joblib.load('Log_Reg_Reduced.joblib')

In [5]:
# Importing required Libraries
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_val_score

In [6]:
# Instantiating the Stratified K fold object
cv_strat = StratifiedKFold(10,random_state=42)

In [7]:
# Computing the Reward, Risk of the Logistic Regression Model
score_Log_Reg = cross_val_score(lr_R, X_train_red, y_train, cv=cv_strat, n_jobs=5, scoring='roc_auc')

In [10]:
print('The reward for the best Logistics Regression Model using roc_auc metric is: ',np.mean(score_Log_Reg))

The reward for the best Logistics Regression Model using roc_auc metric is:  0.7910531061050109


In [11]:
print('The risk associated with the best Logistics Regression Model using roc_auc metric is: ',np.std(score_Log_Reg))

The risk associated with the best Logistics Regression Model using roc_auc metric is:  0.016087407569279358


In [8]:
# Computing the Reward of the best Logistic Regression Model
R_R_Ratio_Log_Reg = np.mean(score_Log_Reg)/np.std(score_Log_Reg)

In [12]:
print('The reward risk  ratio for the best Logistics Regression Model using roc_auc metric is: ',R_R_Ratio_Log_Reg)

The reward risk  ratio for the best Logistics Regression Model using roc_auc metric is:  49.17219276619884


### R_R Ratio for the best Logistic Regression using reduced feature set is: 49.17219276619884