In [1]:
# Importing Required Python Packages
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.max_columns',None)

In [2]:
# Loading Reduced feature Training set
X_train_red = pd.read_csv('X_train_final.csv')
y_train = pd.read_csv('y_train.final.csv')

In [3]:
# Loading Reduced feature Test set
X_test_red = pd.read_csv('X_test_final.csv')
y_test = pd.read_csv('y_test.final.csv')

## Model_16: Weighted Aggregating Classifier 

####  Loading  the best Voting Classifier model & Neural Network (with Equal Nodes in all the Layers)

In [4]:
# Importing Joblib module
import joblib

In [5]:
import tensorflow
from tensorflow import keras

In [6]:
# Importing best Voting Classifier
voting_clf = joblib.load('Voting_Red.joblib')

In [7]:
# Importing best Neural Net Classifier
neural = keras.models.load_model('Best_model_Selu_eq_Learn.h5')

### Performing 10 fold class validations to get best weights for Voting and Neural Calssifiers 

In [8]:
from sklearn.model_selection import StratifiedKFold

In [9]:
# Initializing the 10 Fold object
cv_strat = StratifiedKFold(10,random_state=42)

In [10]:
# Defining the prediction matrices for the voting & Neural classifiers
y_preds_vote = np.empty((10, int(len(X_train_red)/10)), dtype=np.float32)
y_preds_neural = np.empty((10, int(len(X_train_red)/10)), dtype=np.float32)
y_true = np.empty((10, int(len(X_train_red)/10)), dtype=np.float32)

### Getting predictions of the Voting classifier for each fold of Training set.

In [11]:
# Computing  10 fold  CV predictions on the training set for the Voting Classifier
i = 0
for train_index, test_index in cv_strat.split(X_train_red, y_train):
    # Creating  Folds
    X_tr, X_tst = X_train_red.loc[train_index], X_train_red.loc[test_index]
    y_tr, y_tst = y_train.loc[train_index], y_train.loc[test_index]
    # Fitting the best Voting Classifier
    voting_clf.fit(X_tr, y_tr)
    y_pred = voting_clf.predict_proba(X_tst)[:,1]
    # Storing the fold predictions in a numpy array
    y_preds_vote[i,:] = y_pred
    # Storing the true fold values in a numpy array
    y_true[i,:] = y_tst.values.flatten()
    i = i+1  

In [14]:
#y_preds_vote.shape
y_true.shape


(10, 3295)

In [15]:
# Storing the copies of the Voting classifier Predictions & True Class labels for each instance of each test fold.
y_preds_vote_copy = y_preds_vote.copy()
y_true_copy = y_true.copy()

In [16]:
# Turning predictions & corresponding class labels into column vectors.
y_preds_vote = y_preds_vote.reshape(-1,1)
y_true = y_true.reshape(-1,1)

In [18]:
y_true_copy.shape

(10, 3295)

In [19]:
# Saving the Voting Predictions & True class labels for each test fold
pd.DataFrame(y_preds_vote_copy).to_csv('Voting_Preds_Folds.csv',index=False)
pd.DataFrame(y_true_copy).to_csv('True_Labels_Folds.csv',index=False)

### Getting predictions for the Neural classifier for each fold

In [20]:
# # Importing train test split from Sklearn to produce validation set
from sklearn.model_selection import train_test_split

In [21]:
# Defining the exponential decay learning rate.
def exponential_decay_fn(epoch):
            return 0.01 * 0.1**(epoch /4 )

In [22]:
# Computing  10 fold  CV predictions on the training set for the Neural Network
i = 0
for train_index, test_index in cv_strat.split(X_train_red, y_train):
    # Creating  Folds
    X_tr, X_tst = X_train_red.loc[train_index], X_train_red.loc[test_index]
    y_tr, y_tst = y_train.loc[train_index], y_train.loc[test_index]
    
    # Splitting the Training set further into training & validation set.
    X_tr_r, X_val, y_tr_r, y_val = train_test_split(X_tr, y_tr, test_size=0.1, random_state=42, stratify=y_tr)
    
    # Loading fresh keras model on every iteration
    neural = keras.models.load_model('Best_model_Selu_eq_Learn.h5')
    
    # comiling & Training the Neural Net
    neural.compile(loss="binary_crossentropy", optimizer=keras.optimizers.Nadam(beta_1=0.9, beta_2=0.999), metrics=["accuracy"])
    
    # defining Checkpoints
    checkpoint_cb = keras.callbacks.ModelCheckpoint('best_model.h5',save_best_only=True) # 1st Callback
    early_stopping_cb = keras.callbacks.EarlyStopping(patience=10)# 2nd Callback
    lr_scheduler_cb = keras.callbacks.LearningRateScheduler(exponential_decay_fn)# 3rd Callback
    
    # Fitting The model
    neural.fit(X_tr_r, y_tr_r, epochs=50, validation_data=(X_val,y_val),batch_size=32,
    class_weight={0: 1.0, 1: 10.0},callbacks=[checkpoint_cb,early_stopping_cb,lr_scheduler_cb])
    
    # Loading the best Neural model after training & before making predictions
    neural_best = keras.models.load_model('best_model.h5')    
    
    # Saving the predictions for every fold in a numpy array
    y_pred = neural_best.predict_proba(X_tst)
    y_preds_neural[i,:] = y_pred.flatten()
    i = i+1  

  ...
    to  
  ['...']
  ...
    to  
  ['...']
Train on 26689 samples, validate on 2966 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
  ...
    to  
  ['...']
  ...
    to  
  ['...']
Train on 26689 samples, validate on 2966 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
  ...
    to  
  ['...']
  ...
    to  
  ['...']
Train on 26689 samples, validate on 2966 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
  ...
    to  
  ['...']
  ...
    to  
  ['...']
Train on 26689 samples, validate on 2966 samples
Epoch 1

Epoch 18/50
  ...
    to  
  ['...']
  ...
    to  
  ['...']
Train on 26689 samples, validate on 2966 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
  ...
    to  
  ['...']
  ...
    to  
  ['...']
Train on 26689 samples, validate on 2966 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
  ...
    to  
  ['...']
  ...
    to  
  ['...']
Train on 26689 samples, validate on 2966 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
  ...
    

In [23]:
# storing a copy of the Neural predictions for each fold in a numpy array & finally into a csv file
y_preds_neural_copy = y_preds_neural.copy()
pd.DataFrame(y_preds_neural_copy).to_csv('Neural_Preds_Folds.csv',index=False)

In [24]:
y_preds_neural_copy.shape

(10, 3295)

In [25]:
# Turning  neural predictions  into column vector.
y_preds_neural = y_preds_neural.reshape(-1,1)

In [26]:
# Confirming that voting classifier predictions , Neural predictions & True class labels have same shape
assert y_preds_neural.shape == y_preds_vote.shape == y_true.shape

In [27]:
# importing optuna
import optuna

In [28]:
# Importing the Sklearn's roc_auc_score module
from sklearn.metrics import roc_auc_score

In [29]:
# Declaring objective function for Weighted Aggregation Classifier
def objective_wrappper_auc(y_pred_cls_1, y_pred_cls_2, y_true):
    '''
    Optimizes weights for predictions from 2 classifiers to achieve maximum roc_auc score
      
    '''
    
    def objective(trial):
        w1 = trial.suggest_uniform('w1',-1,1)
        w2 = trial.suggest_uniform('w2',-1,1)
        
        y_final_pred = y_pred_cls_1 * w1 + y_pred_cls_2 * w2
        
        roc_auc_1 = roc_auc_score(y_true,y_final_pred)
        
                   
        return roc_auc_1
    
    return objective

In [30]:
# Defining the evaluation function for study's best parameters
def roc_auc_weights(y_pred_cls_1, y_pred_cls_2, y_true, obj_func, n_trials=200):
    ''' Computes the best hyper parameters of the classsifier on the Training set and returns 
    Optuna's study's best score & clasifier parameters'''
    study = optuna.create_study(direction='maximize')
    study.optimize(obj_func(y_pred_cls_1, y_pred_cls_2, y_true),n_trials)
    best_score = study.best_value
    best_params = study.best_params
    return (best_score,best_params)


In [31]:
best_study_score,best_study_params = roc_auc_weights(y_preds_vote, y_preds_neural, y_true, objective_wrappper_auc,1000)

[32m[I 2020-10-22 20:50:39,728][0m A new study created in memory with name: no-name-71c0ebaf-46fa-4c9e-8128-78407d00dc52[0m
[32m[I 2020-10-22 20:50:39,742][0m Trial 0 finished with value: 0.7712874965945355 and parameters: {'w1': -0.38504861498674847, 'w2': 0.7245313701325928}. Best is trial 0 with value: 0.7712874965945355.[0m
[32m[I 2020-10-22 20:50:39,753][0m Trial 1 finished with value: 0.1999509340407264 and parameters: {'w1': -0.7600424066570979, 'w2': -0.22465585974897562}. Best is trial 0 with value: 0.7712874965945355.[0m
[32m[I 2020-10-22 20:50:39,764][0m Trial 2 finished with value: 0.764979670962859 and parameters: {'w1': -0.40084637154122915, 'w2': 0.6740586199414385}. Best is trial 0 with value: 0.7712874965945355.[0m
[32m[I 2020-10-22 20:50:39,776][0m Trial 3 finished with value: 0.8002498326383827 and parameters: {'w1': 0.3536713490623571, 'w2': -0.021919771475948258}. Best is trial 3 with value: 0.8002498326383827.[0m
[32m[I 2020-10-22 20:50:39,785][0m

[32m[I 2020-10-22 20:50:40,319][0m Trial 39 finished with value: 0.8003232675695422 and parameters: {'w1': 0.6134788446235964, 'w2': -0.02281434956403712}. Best is trial 21 with value: 0.8004135455438836.[0m
[32m[I 2020-10-22 20:50:40,335][0m Trial 40 finished with value: 0.7964980263417824 and parameters: {'w1': 0.13014865218094174, 'w2': 0.24400167968946015}. Best is trial 21 with value: 0.8004135455438836.[0m
[32m[I 2020-10-22 20:50:40,351][0m Trial 41 finished with value: 0.8003261699538979 and parameters: {'w1': 0.615519469406572, 'w2': -0.022120411632526878}. Best is trial 21 with value: 0.8004135455438836.[0m
[32m[I 2020-10-22 20:50:40,365][0m Trial 42 finished with value: 0.8001721086281197 and parameters: {'w1': 0.41299952659882944, 'w2': 0.09896369150957596}. Best is trial 21 with value: 0.8004135455438836.[0m
[32m[I 2020-10-22 20:50:40,380][0m Trial 43 finished with value: 0.8002435856015789 and parameters: {'w1': 0.8247166934765944, 'w2': -0.05266388463368245}

[32m[I 2020-10-22 20:50:40,892][0m Trial 78 finished with value: 0.7994554546471763 and parameters: {'w1': 0.6844963278401524, 'w2': 0.3464271863722617}. Best is trial 47 with value: 0.8004242475103256.[0m
[32m[I 2020-10-22 20:50:40,908][0m Trial 79 finished with value: 0.7995341092632168 and parameters: {'w1': 0.5731177560996594, 'w2': 0.2744328811152519}. Best is trial 47 with value: 0.8004242475103256.[0m
[32m[I 2020-10-22 20:50:40,925][0m Trial 80 finished with value: 0.7993485547636991 and parameters: {'w1': 0.6523100536409039, 'w2': -0.12888059681249311}. Best is trial 47 with value: 0.8004242475103256.[0m
[32m[I 2020-10-22 20:50:40,939][0m Trial 81 finished with value: 0.8004212115241502 and parameters: {'w1': 0.4958673456604902, 'w2': 0.029848151657616666}. Best is trial 47 with value: 0.8004242475103256.[0m
[32m[I 2020-10-22 20:50:40,953][0m Trial 82 finished with value: 0.8003178589993302 and parameters: {'w1': 0.555254726154999, 'w2': 0.09274402735520043}. Best

[32m[I 2020-10-22 20:50:41,499][0m Trial 117 finished with value: 0.8003925562373363 and parameters: {'w1': 0.9525644519613478, 'w2': -0.002948036335391431}. Best is trial 84 with value: 0.8004263759255196.[0m
[32m[I 2020-10-22 20:50:41,514][0m Trial 118 finished with value: 0.8003725297852818 and parameters: {'w1': 0.7742257438143308, 'w2': 0.10612144359343975}. Best is trial 84 with value: 0.8004263759255196.[0m
[32m[I 2020-10-22 20:50:41,528][0m Trial 119 finished with value: 0.8001729609155892 and parameters: {'w1': 0.8147417208870933, 'w2': -0.06745942440300216}. Best is trial 84 with value: 0.8004263759255196.[0m
[32m[I 2020-10-22 20:50:41,543][0m Trial 120 finished with value: 0.1998159132777137 and parameters: {'w1': -0.44159248688359276, 'w2': 0.03538513951275478}. Best is trial 84 with value: 0.8004263759255196.[0m
[32m[I 2020-10-22 20:50:41,559][0m Trial 121 finished with value: 0.8002068589220805 and parameters: {'w1': 0.6978888768387492, 'w2': 0.1557537804294

[32m[I 2020-10-22 20:50:42,075][0m Trial 156 finished with value: 0.800421441872115 and parameters: {'w1': 0.8894042632482476, 'w2': 0.06145048162532479}. Best is trial 84 with value: 0.8004263759255196.[0m
[32m[I 2020-10-22 20:50:42,089][0m Trial 157 finished with value: 0.8001289967030388 and parameters: {'w1': 0.8573069414521634, 'w2': -0.07898097913922114}. Best is trial 84 with value: 0.8004263759255196.[0m
[32m[I 2020-10-22 20:50:42,103][0m Trial 158 finished with value: 0.8003997108451212 and parameters: {'w1': 0.985248776118595, 'w2': 0.10771073769861887}. Best is trial 84 with value: 0.8004263759255196.[0m
[32m[I 2020-10-22 20:50:42,119][0m Trial 159 finished with value: 0.8004129005695824 and parameters: {'w1': 0.758672350530721, 'w2': 0.01142006242323703}. Best is trial 84 with value: 0.8004263759255196.[0m
[32m[I 2020-10-22 20:50:42,134][0m Trial 160 finished with value: 0.8001746700974877 and parameters: {'w1': 0.6665318102210641, 'w2': 0.15876216836407564}. 

[32m[I 2020-10-22 20:50:42,668][0m Trial 195 finished with value: 0.7970599049182571 and parameters: {'w1': 0.06698457703111277, 'w2': 0.10158310624660148}. Best is trial 84 with value: 0.8004263759255196.[0m
[32m[I 2020-10-22 20:50:42,683][0m Trial 196 finished with value: 0.8004180235083183 and parameters: {'w1': 0.7808412717557942, 'w2': 0.01867221993559317}. Best is trial 84 with value: 0.8004263759255196.[0m
[32m[I 2020-10-22 20:50:42,699][0m Trial 197 finished with value: 0.8004042026304337 and parameters: {'w1': 0.6251463428768471, 'w2': 0.06633689066609108}. Best is trial 84 with value: 0.8004263759255196.[0m
[32m[I 2020-10-22 20:50:42,715][0m Trial 198 finished with value: 0.8002286774813009 and parameters: {'w1': 0.816502615790992, 'w2': -0.05517633403653405}. Best is trial 84 with value: 0.8004263759255196.[0m
[32m[I 2020-10-22 20:50:42,730][0m Trial 199 finished with value: 0.8003265569384788 and parameters: {'w1': 0.7348809469644534, 'w2': 0.11942862819405864

[32m[I 2020-10-22 20:50:43,263][0m Trial 234 finished with value: 0.8004239941275644 and parameters: {'w1': 0.782692436140711, 'w2': 0.024411922519186363}. Best is trial 84 with value: 0.8004263759255196.[0m
[32m[I 2020-10-22 20:50:43,278][0m Trial 235 finished with value: 0.8003791822345036 and parameters: {'w1': 0.777823478643615, 'w2': -0.008162531614747344}. Best is trial 84 with value: 0.8004263759255196.[0m
[32m[I 2020-10-22 20:50:43,294][0m Trial 236 finished with value: 0.8004142319808184 and parameters: {'w1': 0.8371867146717406, 'w2': 0.07078159501178106}. Best is trial 84 with value: 0.8004263759255196.[0m
[32m[I 2020-10-22 20:50:43,310][0m Trial 237 finished with value: 0.8003521992739137 and parameters: {'w1': 0.9059179046201555, 'w2': 0.1352280337820846}. Best is trial 84 with value: 0.8004263759255196.[0m
[32m[I 2020-10-22 20:50:43,326][0m Trial 238 finished with value: 0.8002252775453413 and parameters: {'w1': 0.996322752535537, 'w2': -0.06867663980089603}

[32m[I 2020-10-22 20:50:43,884][0m Trial 273 finished with value: 0.8002919724950524 and parameters: {'w1': 0.8292817234100452, 'w2': 0.149707002392217}. Best is trial 84 with value: 0.8004263759255196.[0m
[32m[I 2020-10-22 20:50:43,899][0m Trial 274 finished with value: 0.8003856550123127 and parameters: {'w1': 0.7679681296519959, 'w2': -0.005759536757162793}. Best is trial 84 with value: 0.8004263759255196.[0m
[32m[I 2020-10-22 20:50:43,915][0m Trial 275 finished with value: 0.8002001235475916 and parameters: {'w1': 0.8001673088282123, 'w2': -0.06078784879124144}. Best is trial 84 with value: 0.8004263759255196.[0m
[32m[I 2020-10-22 20:50:43,931][0m Trial 276 finished with value: 0.8003567048801041 and parameters: {'w1': 0.7108663287788073, 'w2': 0.10391839790410053}. Best is trial 84 with value: 0.8004263759255196.[0m
[32m[I 2020-10-22 20:50:43,947][0m Trial 277 finished with value: 0.7995921339155351 and parameters: {'w1': 0.6738951595512146, 'w2': -0.1165681881493329

[32m[I 2020-10-22 20:50:44,537][0m Trial 312 finished with value: 0.8004086944157461 and parameters: {'w1': 0.6520953951223207, 'w2': 0.06453916664833874}. Best is trial 284 with value: 0.800426615487403.[0m
[32m[I 2020-10-22 20:50:44,555][0m Trial 313 finished with value: 0.8004243718982265 and parameters: {'w1': 0.8011239051388368, 'w2': 0.02576862406655561}. Best is trial 284 with value: 0.800426615487403.[0m
[32m[I 2020-10-22 20:50:44,572][0m Trial 314 finished with value: 0.8004209304996331 and parameters: {'w1': 0.8688101464824429, 'w2': 0.02535201319534727}. Best is trial 284 with value: 0.800426615487403.[0m
[32m[I 2020-10-22 20:50:44,618][0m Trial 315 finished with value: 0.8004008026944741 and parameters: {'w1': 0.8001842771656353, 'w2': 0.08671197130442587}. Best is trial 284 with value: 0.800426615487403.[0m
[32m[I 2020-10-22 20:50:44,636][0m Trial 316 finished with value: 0.8002651415641194 and parameters: {'w1': 0.8448701085668685, 'w2': 0.16337620627879257}

[32m[I 2020-10-22 20:50:45,218][0m Trial 351 finished with value: 0.8004225890049793 and parameters: {'w1': 0.8045515033152245, 'w2': 0.044528317933180725}. Best is trial 284 with value: 0.800426615487403.[0m
[32m[I 2020-10-22 20:50:45,236][0m Trial 352 finished with value: 0.8002430834430158 and parameters: {'w1': 0.7029726145008439, 'w2': -0.04494928457328109}. Best is trial 284 with value: 0.800426615487403.[0m
[32m[I 2020-10-22 20:50:45,253][0m Trial 353 finished with value: 0.8004033733777606 and parameters: {'w1': 0.6167260031971981, 'w2': 0.003074795775600176}. Best is trial 284 with value: 0.800426615487403.[0m
[32m[I 2020-10-22 20:50:45,269][0m Trial 354 finished with value: 0.8003668125487969 and parameters: {'w1': 0.7577286355805399, 'w2': 0.10646890803359577}. Best is trial 284 with value: 0.800426615487403.[0m
[32m[I 2020-10-22 20:50:45,285][0m Trial 355 finished with value: 0.800418474990329 and parameters: {'w1': 0.8026538335557515, 'w2': 0.0598799686755227

[32m[I 2020-10-22 20:50:45,890][0m Trial 390 finished with value: 0.8004061744090119 and parameters: {'w1': 0.9650247632978581, 'w2': 0.009233606409155895}. Best is trial 284 with value: 0.800426615487403.[0m
[32m[I 2020-10-22 20:50:45,907][0m Trial 391 finished with value: 0.8001001571378532 and parameters: {'w1': 0.9130557408951984, 'w2': -0.08916585688578138}. Best is trial 284 with value: 0.800426615487403.[0m
[32m[I 2020-10-22 20:50:45,922][0m Trial 392 finished with value: 0.8003307630923149 and parameters: {'w1': 0.8050275738083168, 'w2': -0.028137244351063913}. Best is trial 284 with value: 0.800426615487403.[0m
[32m[I 2020-10-22 20:50:45,939][0m Trial 393 finished with value: 0.8004214695138707 and parameters: {'w1': 0.8862532628648321, 'w2': 0.054562455461843774}. Best is trial 284 with value: 0.800426615487403.[0m
[32m[I 2020-10-22 20:50:45,955][0m Trial 394 finished with value: 0.8003440864185956 and parameters: {'w1': 0.8491073371904793, 'w2': 0.1300335502838

[32m[I 2020-10-22 20:50:46,516][0m Trial 429 finished with value: 0.8004244087539009 and parameters: {'w1': 0.9137092584262921, 'w2': 0.0330290616997842}. Best is trial 284 with value: 0.800426615487403.[0m
[32m[I 2020-10-22 20:50:46,533][0m Trial 430 finished with value: 0.8003833515326653 and parameters: {'w1': 0.9369691245210867, 'w2': 0.11856146610555268}. Best is trial 284 with value: 0.800426615487403.[0m
[32m[I 2020-10-22 20:50:46,549][0m Trial 431 finished with value: 0.8004189356862587 and parameters: {'w1': 0.9081813587978068, 'w2': 0.06628647042724553}. Best is trial 284 with value: 0.800426615487403.[0m
[32m[I 2020-10-22 20:50:46,565][0m Trial 432 finished with value: 0.8004250859769171 and parameters: {'w1': 0.956839891109127, 'w2': 0.040081641076619864}. Best is trial 284 with value: 0.800426615487403.[0m
[32m[I 2020-10-22 20:50:46,581][0m Trial 433 finished with value: 0.8004258875878345 and parameters: {'w1': 0.9011165320995006, 'w2': 0.03551453354167723}.

[32m[I 2020-10-22 20:50:47,166][0m Trial 468 finished with value: 0.8003777863258372 and parameters: {'w1': 0.8268407837016378, 'w2': -0.008954323544064488}. Best is trial 284 with value: 0.800426615487403.[0m
[32m[I 2020-10-22 20:50:47,182][0m Trial 469 finished with value: 0.8004243672912671 and parameters: {'w1': 0.7848037135965307, 'w2': 0.027844027182290065}. Best is trial 284 with value: 0.800426615487403.[0m
[32m[I 2020-10-22 20:50:47,198][0m Trial 470 finished with value: 0.8002462530310106 and parameters: {'w1': 0.912141971872503, 'w2': 0.1850561692095599}. Best is trial 284 with value: 0.800426615487403.[0m
[32m[I 2020-10-22 20:50:47,215][0m Trial 471 finished with value: 0.8004171573999708 and parameters: {'w1': 0.8804045623465726, 'w2': 0.0685606221964126}. Best is trial 284 with value: 0.800426615487403.[0m
[32m[I 2020-10-22 20:50:47,231][0m Trial 472 finished with value: 0.7995285302355107 and parameters: {'w1': 0.9995952800889507, 'w2': -0.18005327249588832

[32m[I 2020-10-22 20:50:47,807][0m Trial 507 finished with value: 0.8003687290438636 and parameters: {'w1': 0.6531606171768412, 'w2': 0.09106222293808502}. Best is trial 284 with value: 0.800426615487403.[0m
[32m[I 2020-10-22 20:50:47,824][0m Trial 508 finished with value: 0.8002864487508581 and parameters: {'w1': 0.7182420602099054, 'w2': -0.036271549715563796}. Best is trial 284 with value: 0.800426615487403.[0m
[32m[I 2020-10-22 20:50:47,840][0m Trial 509 finished with value: 0.8004071971539752 and parameters: {'w1': 0.8607042900382426, 'w2': 0.008551320787562246}. Best is trial 284 with value: 0.800426615487403.[0m
[32m[I 2020-10-22 20:50:47,857][0m Trial 510 finished with value: 0.8000258422774684 and parameters: {'w1': 0.7729634246590107, 'w2': -0.08635600045852282}. Best is trial 284 with value: 0.800426615487403.[0m
[32m[I 2020-10-22 20:50:47,873][0m Trial 511 finished with value: 0.8004220914533755 and parameters: {'w1': 0.8284570244842394, 'w2': 0.05505723947992

[32m[I 2020-10-22 20:50:48,481][0m Trial 546 finished with value: 0.800322935868473 and parameters: {'w1': 0.609485854684508, 'w2': 0.10043891978652908}. Best is trial 284 with value: 0.800426615487403.[0m
[32m[I 2020-10-22 20:50:48,498][0m Trial 547 finished with value: 0.8003314725640464 and parameters: {'w1': 0.6472461994713423, 'w2': -0.02250308353137999}. Best is trial 284 with value: 0.800426615487403.[0m
[32m[I 2020-10-22 20:50:48,516][0m Trial 548 finished with value: 0.8004081185458343 and parameters: {'w1': 0.6439858947261723, 'w2': 0.06127951587546981}. Best is trial 284 with value: 0.800426615487403.[0m
[32m[I 2020-10-22 20:50:48,532][0m Trial 549 finished with value: 0.7997844191825824 and parameters: {'w1': 0.49492962238994653, 'w2': -0.07362864779449986}. Best is trial 284 with value: 0.800426615487403.[0m
[32m[I 2020-10-22 20:50:48,550][0m Trial 550 finished with value: 0.8004139417423829 and parameters: {'w1': 0.5499778162488221, 'w2': 0.01082598571255530

[32m[I 2020-10-22 20:50:49,157][0m Trial 585 finished with value: 0.8003766852625657 and parameters: {'w1': 0.8519903878162083, 'w2': -0.010809200718996236}. Best is trial 284 with value: 0.800426615487403.[0m
[32m[I 2020-10-22 20:50:49,175][0m Trial 586 finished with value: 0.8001637930665926 and parameters: {'w1': 0.5860283982377716, 'w2': 0.14231256870811157}. Best is trial 284 with value: 0.800426615487403.[0m
[32m[I 2020-10-22 20:50:49,193][0m Trial 587 finished with value: 0.7998705048239656 and parameters: {'w1': 0.22689827444364058, 'w2': 0.08171435183519349}. Best is trial 284 with value: 0.800426615487403.[0m
[32m[I 2020-10-22 20:50:49,210][0m Trial 588 finished with value: 0.8002608018084637 and parameters: {'w1': 0.7662145319253494, 'w2': -0.04500231433933598}. Best is trial 284 with value: 0.800426615487403.[0m
[32m[I 2020-10-22 20:50:49,229][0m Trial 589 finished with value: 0.8004141628764291 and parameters: {'w1': 0.8110665353910488, 'w2': 0.01381601180141

[32m[I 2020-10-22 20:50:49,870][0m Trial 624 finished with value: 0.8001188429647531 and parameters: {'w1': 0.5103132747321876, 'w2': -0.04795960668212816}. Best is trial 284 with value: 0.800426615487403.[0m
[32m[I 2020-10-22 20:50:49,888][0m Trial 625 finished with value: 0.8003834620996884 and parameters: {'w1': 0.618597068493761, 'w2': 0.07826237437323408}. Best is trial 284 with value: 0.800426615487403.[0m
[32m[I 2020-10-22 20:50:49,907][0m Trial 626 finished with value: 0.8004140753442026 and parameters: {'w1': 0.5114868104929522, 'w2': 0.010165672432983604}. Best is trial 284 with value: 0.800426615487403.[0m
[32m[I 2020-10-22 20:50:49,926][0m Trial 627 finished with value: 0.7998840584982108 and parameters: {'w1': 0.5346562531898365, 'w2': -0.07166355152745243}. Best is trial 284 with value: 0.800426615487403.[0m
[32m[I 2020-10-22 20:50:49,945][0m Trial 628 finished with value: 0.8002565910476682 and parameters: {'w1': 0.626695150322586, 'w2': 0.12385236066107065

[32m[I 2020-10-22 20:50:50,638][0m Trial 663 finished with value: 0.8003041717232652 and parameters: {'w1': 0.6388529212526538, 'w2': -0.02815729272066626}. Best is trial 284 with value: 0.800426615487403.[0m
[32m[I 2020-10-22 20:50:50,656][0m Trial 664 finished with value: 0.8004141997321035 and parameters: {'w1': 0.7198658141156355, 'w2': 0.014887063404326856}. Best is trial 284 with value: 0.800426615487403.[0m
[32m[I 2020-10-22 20:50:50,675][0m Trial 665 finished with value: 0.800411771864555 and parameters: {'w1': 0.925913769552538, 'w2': 0.0830159019252526}. Best is trial 284 with value: 0.800426615487403.[0m
[32m[I 2020-10-22 20:50:50,692][0m Trial 666 finished with value: 0.8001693214177463 and parameters: {'w1': 0.7602070350752075, 'w2': -0.06340672110151968}. Best is trial 284 with value: 0.800426615487403.[0m
[32m[I 2020-10-22 20:50:50,711][0m Trial 667 finished with value: 0.8004214372651557 and parameters: {'w1': 0.6592210949987524, 'w2': 0.03854277345607037}

[32m[I 2020-10-22 20:50:51,386][0m Trial 702 finished with value: 0.8004247588828072 and parameters: {'w1': 0.9968263768044437, 'w2': 0.032224814999135196}. Best is trial 284 with value: 0.800426615487403.[0m
[32m[I 2020-10-22 20:50:51,404][0m Trial 703 finished with value: 0.8004132000219366 and parameters: {'w1': 0.87213330153753, 'w2': 0.07658013166583169}. Best is trial 284 with value: 0.800426615487403.[0m
[32m[I 2020-10-22 20:50:51,422][0m Trial 704 finished with value: 0.8001141208314757 and parameters: {'w1': 0.6636996064646711, 'w2': -0.06283849774250583}. Best is trial 284 with value: 0.800426615487403.[0m
[32m[I 2020-10-22 20:50:51,441][0m Trial 705 finished with value: 0.8004153468649678 and parameters: {'w1': 0.8955620117661007, 'w2': 0.01906578286730512}. Best is trial 284 with value: 0.800426615487403.[0m
[32m[I 2020-10-22 20:50:51,458][0m Trial 706 finished with value: 0.8003656009185025 and parameters: {'w1': 0.9256679888466988, 'w2': 0.1309971922746535}.

[32m[I 2020-10-22 20:50:52,124][0m Trial 741 finished with value: 0.800421911781963 and parameters: {'w1': 0.9976552463346626, 'w2': 0.04974918321430513}. Best is trial 284 with value: 0.800426615487403.[0m
[32m[I 2020-10-22 20:50:52,144][0m Trial 742 finished with value: 0.7998923279901452 and parameters: {'w1': 0.7587178113485257, 'w2': -0.10094528116695847}. Best is trial 284 with value: 0.800426615487403.[0m
[32m[I 2020-10-22 20:50:52,165][0m Trial 743 finished with value: 0.19962586238592434 and parameters: {'w1': -0.20117246748266182, 'w2': 0.00285927825081167}. Best is trial 284 with value: 0.800426615487403.[0m
[32m[I 2020-10-22 20:50:52,186][0m Trial 744 finished with value: 0.8004064185778544 and parameters: {'w1': 0.7929858396528353, 'w2': 0.07962037199370917}. Best is trial 284 with value: 0.800426615487403.[0m
[32m[I 2020-10-22 20:50:52,207][0m Trial 745 finished with value: 0.800175158435173 and parameters: {'w1': 0.8531274184650275, 'w2': 0.2030993177198397

[32m[I 2020-10-22 20:50:52,920][0m Trial 780 finished with value: 0.8004239803066864 and parameters: {'w1': 0.8848435019985067, 'w2': 0.041224824615534644}. Best is trial 284 with value: 0.800426615487403.[0m
[32m[I 2020-10-22 20:50:52,940][0m Trial 781 finished with value: 0.8002046245468225 and parameters: {'w1': 0.5264980996926668, 'w2': -0.03939733554896504}. Best is trial 284 with value: 0.800426615487403.[0m
[32m[I 2020-10-22 20:50:52,958][0m Trial 782 finished with value: 0.8003907779510487 and parameters: {'w1': 0.6957247591570359, 'w2': 0.08357920718129752}. Best is trial 284 with value: 0.800426615487403.[0m
[32m[I 2020-10-22 20:50:52,977][0m Trial 783 finished with value: 0.8003849916101743 and parameters: {'w1': 0.7901437165801968, 'w2': -0.006131678445032415}. Best is trial 284 with value: 0.800426615487403.[0m
[32m[I 2020-10-22 20:50:52,995][0m Trial 784 finished with value: 0.8004259106226309 and parameters: {'w1': 0.9320612532903997, 'w2': 0.03630574559515

[32m[I 2020-10-22 20:50:53,667][0m Trial 819 finished with value: 0.8003838951538621 and parameters: {'w1': 0.39025280414839464, 'w2': -0.0031771500458466014}. Best is trial 284 with value: 0.800426615487403.[0m
[32m[I 2020-10-22 20:50:53,687][0m Trial 820 finished with value: 0.8003538485653413 and parameters: {'w1': 0.9359590357502975, 'w2': 0.13813423134021607}. Best is trial 284 with value: 0.800426615487403.[0m
[32m[I 2020-10-22 20:50:53,706][0m Trial 821 finished with value: 0.7977756559351789 and parameters: {'w1': 0.8342131748344285, 'w2': 0.9610020324439121}. Best is trial 284 with value: 0.800426615487403.[0m
[32m[I 2020-10-22 20:50:53,726][0m Trial 822 finished with value: 0.2426086590048142 and parameters: {'w1': 0.03795783399352508, 'w2': -0.05818375728985343}. Best is trial 284 with value: 0.800426615487403.[0m
[32m[I 2020-10-22 20:50:53,747][0m Trial 823 finished with value: 0.8004059947375992 and parameters: {'w1': 0.8797995968273413, 'w2': 0.0904688435499

[32m[I 2020-10-22 20:50:54,444][0m Trial 858 finished with value: 0.8003075624453062 and parameters: {'w1': 0.9644549334462054, 'w2': -0.041166245535252236}. Best is trial 284 with value: 0.800426615487403.[0m
[32m[I 2020-10-22 20:50:54,464][0m Trial 859 finished with value: 0.8004230450939495 and parameters: {'w1': 0.9068759524318228, 'w2': 0.0426197838085395}. Best is trial 284 with value: 0.800426615487403.[0m
[32m[I 2020-10-22 20:50:54,484][0m Trial 860 finished with value: 0.8003945280159146 and parameters: {'w1': 0.830048463853231, 'w2': -0.0014148949605126485}. Best is trial 284 with value: 0.800426615487403.[0m
[32m[I 2020-10-22 20:50:54,503][0m Trial 861 finished with value: 0.8004137528570519 and parameters: {'w1': 0.7701436764889812, 'w2': 0.0646557450830457}. Best is trial 284 with value: 0.800426615487403.[0m
[32m[I 2020-10-22 20:50:54,524][0m Trial 862 finished with value: 0.8003961220238306 and parameters: {'w1': 0.87375253691711, 'w2': 0.09985249194976656}

[32m[I 2020-10-22 20:50:55,242][0m Trial 897 finished with value: 0.800204942427014 and parameters: {'w1': 0.8676317010528847, 'w2': 0.19487928006808616}. Best is trial 284 with value: 0.800426615487403.[0m
[32m[I 2020-10-22 20:50:55,261][0m Trial 898 finished with value: 0.8004258829808751 and parameters: {'w1': 0.9996596044955853, 'w2': 0.039389990280184514}. Best is trial 284 with value: 0.800426615487403.[0m
[32m[I 2020-10-22 20:50:55,281][0m Trial 899 finished with value: 0.8003313435691861 and parameters: {'w1': 0.9677452228696299, 'w2': 0.15422364525645474}. Best is trial 284 with value: 0.800426615487403.[0m
[32m[I 2020-10-22 20:50:55,300][0m Trial 900 finished with value: 0.8003919158699944 and parameters: {'w1': 0.9749308862249617, 'w2': 0.11574594800073836}. Best is trial 284 with value: 0.800426615487403.[0m
[32m[I 2020-10-22 20:50:55,319][0m Trial 901 finished with value: 0.8004201150678381 and parameters: {'w1': 0.9985347563034538, 'w2': 0.06980289994480188}

[32m[I 2020-10-22 20:50:56,021][0m Trial 936 finished with value: 0.8004144807566205 and parameters: {'w1': 0.9072577884996794, 'w2': 0.07883074692989345}. Best is trial 902 with value: 0.8004266845917924.[0m
[32m[I 2020-10-22 20:50:56,040][0m Trial 937 finished with value: 0.8004227871042291 and parameters: {'w1': 0.8583111807318956, 'w2': 0.04068705742411795}. Best is trial 902 with value: 0.8004266845917924.[0m
[32m[I 2020-10-22 20:50:56,059][0m Trial 938 finished with value: 0.8001807098211232 and parameters: {'w1': 0.9554873951990849, 'w2': -0.0772224083005129}. Best is trial 902 with value: 0.8004266845917924.[0m
[32m[I 2020-10-22 20:50:56,077][0m Trial 939 finished with value: 0.8003751465381613 and parameters: {'w1': 0.9968646247243587, 'w2': -0.013407499484993278}. Best is trial 902 with value: 0.8004266845917924.[0m
[32m[I 2020-10-22 20:50:56,097][0m Trial 940 finished with value: 0.8004087635201356 and parameters: {'w1': 0.9992237709357853, 'w2': 0.098498955795

[32m[I 2020-10-22 20:50:56,800][0m Trial 975 finished with value: 0.8003921001483661 and parameters: {'w1': 0.7463557716690925, 'w2': -0.003548149977825616}. Best is trial 902 with value: 0.8004266845917924.[0m
[32m[I 2020-10-22 20:50:56,819][0m Trial 976 finished with value: 0.800306309352378 and parameters: {'w1': 0.8009744397026577, 'w2': 0.13977032382998134}. Best is trial 902 with value: 0.8004266845917924.[0m
[32m[I 2020-10-22 20:50:56,838][0m Trial 977 finished with value: 0.800141813263797 and parameters: {'w1': 0.9234294304832779, 'w2': -0.08240047769936143}. Best is trial 902 with value: 0.8004266845917924.[0m
[32m[I 2020-10-22 20:50:56,856][0m Trial 978 finished with value: 0.8004216998618354 and parameters: {'w1': 0.8908473872355797, 'w2': 0.044820481324846284}. Best is trial 902 with value: 0.8004266845917924.[0m
[32m[I 2020-10-22 20:50:56,875][0m Trial 979 finished with value: 0.8003373372232286 and parameters: {'w1': 0.7725215715001911, 'w2': -0.02525672011

In [32]:
print('The best roc_auc_score for the study is: ',best_study_score)

The best roc_auc_score for the study is:  0.8004266845917924


In [33]:
print(('The best study parameters for the classifier are: ',best_study_params))

('The best study parameters for the classifier are: ', {'w1': 0.9397785734580625, 'w2': 0.03493904313818527})


### Training the Tuned Weighted Aggregating Classifier on the whole Training set & making the predictions on the Test set.

In [34]:
# Training the voting classifier on whole Traing set
voting_clf.fit(X_train_red, y_train)

VotingClassifier(estimators=[('logistic_Reg',
                              LogisticRegression(C=0.12725888493400458,
                                                 class_weight={0: 1.0, 1: 9.0},
                                                 l1_ratio=0.9851193622801032,
                                                 n_jobs=5, penalty='elasticnet',
                                                 random_state=42,
                                                 solver='saga')),
                             ('Random_Forest',
                              RandomForestClassifier(class_weight='balanced',
                                                     max_depth=11,
                                                     min_samples_leaf=0.000264150675671259,
                                                     n_estimators=1560,
                                                     n_jobs=5,
                                                     rando...
                              

In [35]:
# Making Predictions on the Test Set using voting classifier
y_preds_vote_final = voting_clf.predict_proba(X_test_red)[:,1]

In [36]:
# Reloading best Neural Net Classifier 
neural = keras.models.load_model('Best_model_Selu_eq_Learn.h5')

In [37]:
# Making Predictions on the Test Set using Neural classifier
y_preds_neural_final = neural.predict_proba(X_test_red)

In [38]:
# Converting all test set  voting classifier  predictions into a column vector 
y_preds_vote_final = y_preds_vote_final.reshape(-1,1)

In [39]:
# combining the predictions from the Neural  & Voting classifier with the study's best votes.
y_combined_final = y_preds_vote_final * best_study_params['w1'] + y_preds_neural_final * best_study_params['w2']

In [40]:
# Computing the the final roc_auc score of the combined model 
print('The final test set roc_auc score of the combined voting & neural classifier using the best weights is: ',
     roc_auc_score(y_test,y_combined_final))

The final test set roc_auc score of the combined voting & neural classifier using the best weights is:  0.8173387453417614


### Calculating R_R ratio of the tuned Weighted Aggregating classifier using study's best weights.

In [41]:
# Asserting that all the Predictions & True Call label arrays have same shape
assert y_preds_vote_copy.shape == y_true_copy.shape == y_preds_neural_copy.shape

In [51]:
roc_auc_agg = []
for i in range(10):
    y_pred_combined = y_preds_vote_copy[i,:] * best_study_params['w1'] + y_preds_neural_copy[i,:] * best_study_params['w2']
    roc_auc_agg.append(roc_auc_score(y_true_copy[i,:],y_pred_combined))
    

In [52]:
print('The reward associated with the Aggregating classifier using roc_auc metric is: ',np.mean(roc_auc_agg))

The reward associated with the Aggregating classifier using roc_auc metric is:  0.8014073062606618


In [53]:
print('The risk associated with the Aggregating Classifier using roc_auc metric is: ',np.std(roc_auc_agg))

The risk associated with the Aggregating Classifier using roc_auc metric is:  0.016844751862311692


In [54]:
R_R_Ratio_Aggregator = np.mean(roc_auc_agg)/np.std(roc_auc_agg)

In [55]:
print('The reward risk ratio for the Aggregating Classifier using roc_auc metric is: ',R_R_Ratio_Aggregator)

The reward risk ratio for the Aggregating Classifier using roc_auc metric is:  47.57608261678961


## Observations: 
### 1) The test set roc_auc score for the tuned Weighted Aggregating classifier is nearly equal to that of best tuned Voting Classifier, owing to more weight being assigned to the voting classifier.
### 2) Similarly the R_R ratio of the Weighted Aggregating classifier is approx. equal (although more) to that of the best tuned Voting Classifier. So with added complexity the former doesn't offer any advantage over the latter.

### Best R_R Ratio for the tuned Weighted  Classifier using reduced feature set is:  47.57608261678961