In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.feature_selection import RFE
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.metrics import confusion_matrix, accuracy_score, balanced_accuracy_score
from sklearn.metrics import matthews_corrcoef
from sklearn.model_selection import KFold, RepeatedStratifiedKFold, StratifiedKFold
from collections import Counter

In [None]:
def evaluate_model_train(model, X_train, y_train):
    from sklearn import metrics
    conf_matrix_list_of_arrays = []
    mcc_array=[]
    #cv = KFold(n_splits=5)
    #cv = StratifiedKFold(n_splits=5)
    cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=1)
    lst_accu = []
    AUC_list=[]
    prec_train=np.mean(cross_val_score(model, X_train, y_train, cv=cv, scoring='precision'))
    recall_train=np.mean(cross_val_score(model, X_train, y_train, cv=cv, scoring='recall'))
    f1_train=np.mean(cross_val_score(model, X_train, y_train, cv=cv, scoring='f1'))
    Acc=np.mean(cross_val_score(model, X_train, y_train, cv=cv, scoring='accuracy'))
    print(Acc)
    for train_index, test_index in cv.split(X_train, y_train): 
        X_train_fold, X_test_fold = X_train[train_index], X_train[test_index] 
        y_train_fold, y_test_fold = y_train[train_index], y_train[test_index] 
        model.fit(X_train_fold, y_train_fold) 
        lst_accu.append(model.score(X_test_fold, y_test_fold))
        acc=np.mean(lst_accu)
        
        conf_matrix = confusion_matrix(y_test_fold, model.predict(X_test_fold))
        conf_matrix_list_of_arrays.append(conf_matrix)
        cm = np.mean(conf_matrix_list_of_arrays, axis=0)
        mcc_array.append(matthews_corrcoef(y_test_fold, model.predict(X_test_fold)))
        mcc=np.mean(mcc_array, axis=0)
        
        AUC=metrics.roc_auc_score( y_test_fold, model.predict_proba(X_test_fold)[:,1])
        AUC_list.append(AUC)
        auc=np.mean(AUC_list)
        
        
    total=sum(sum(cm))
    accuracy=(cm[0,0]+cm[1,1])/total
    specificity = cm[0,0]/(cm[0,1]+cm[0,0])
    sensitivity = cm[1,1]/(cm[1,0]+cm[1,1])
       
    
    return {'prec_train': prec_train, 'recall_train': recall_train, 'f1_train': f1_train, 'cm': cm, 'mcc': mcc,'Acc':Acc,
           'sen':sensitivity,'spec':specificity, 'acc':acc, 'lst_accu':lst_accu, 'AUC':auc}

In [None]:
def evaluate_model_test(model, X_test, y_test):
    from sklearn import metrics

    # Predict Test Data 
    y_pred = model.predict_proba(X_test)[:,1]
    for i in range(len(y_pred)):
        if y_pred[i]>0.5:
            y_pred[i]=1
        else:
            y_pred[i]=0
    

    # Calculate accuracy, precision, recall, f1-score, and kappa score
    acc = metrics.accuracy_score(y_test, y_pred)
    prec = metrics.precision_score(y_test, y_pred)
    rec = metrics.recall_score(y_test, y_pred)
    f1 = metrics.f1_score(y_test, y_pred)

    # Calculate area under curve (AUC)
    y_pred_proba = model.predict_proba(X_test)[::,1]
    fpr, tpr, _ = metrics.roc_curve(y_test, y_pred_proba)
    auc = metrics.roc_auc_score(y_test, y_pred_proba)
    
    #MCC
    mcc=matthews_corrcoef(y_test, y_pred)
    
    # Display confussion matrix
    cm = metrics.confusion_matrix(y_test, y_pred)
    total=sum(sum(cm))
    
    #accuracy=(cm[0,0]+cm[1,1])/total
    spec = cm[0,0]/(cm[0,1]+cm[0,0])
    sen= cm[1,1]/(cm[1,0]+cm[1,1])

    return {'acc': acc, 'prec': prec, 'rec': rec, 'f1': f1, 'mcc':mcc,
            'fpr': fpr, 'tpr': tpr, 'auc': auc, 'cm': cm, 'sen': sen, 'spec':spec}

In [None]:
# Read feature descriptors of training data
df1 = pd.read_csv(" /path......")
df2 = pd.read_csv(" /path......")
.
.
.
df6

In [None]:
data1=pd.concat([df1, ....],axis = 1)
X_train = data1.iloc[:,1:].values

In [None]:
X_train=np.array(X_train)
lab=len(X_train)/2
pos_labels = np.ones(int(lab))
neg_labels = np.zeros(int(lab))
y_train = np.concatenate((pos_labels,neg_labels),axis=0)


In [None]:
# Read feature descriptors of independent data
df1 = pd.read_csv(" /path......")
df2 = pd.read_csv(" /path......")
.
.
.
df6

In [None]:
# Concatenate Independent feature descriptors 
data2=pd.concat([df1, ....],axis = 1)
X_test_ind = data2.iloc[:,1:].values
X_test_ind=np.array(X_test_ind)
lab=len(X_test_ind)/2
pos_labels = np.ones(int(lab))
neg_labels = np.zeros(int(lab))
y_test_ind = np.concatenate((pos_labels,neg_labels),axis=0)

In [None]:
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=1,random_state=92)

# LGBM Testing

In [None]:
 # Fit the model
    
import os
import lightgbm as lgbm

lgbm_model = lgbm.LGBMClassifier(" ** best_params")
lgbm_model.fit(X_train, y_train)
#score = cross_val_score(lgbm_model, X_train, y_train, cv=cv, scoring="accuracy")
#accuracy_mean = score.mean()


y_pred = lgbm_model.predict(X_test_ind)
accuracy = accuracy_score(y_test_ind, y_pred)

#print('Mean_Accuracy is', accuracy_mean)
print('Ind Accuracy is', accuracy)



In [None]:
%%time
# Evaluate Model on Training data
train_eval = evaluate_model_train(lgbm_model, X_train, y_train)
print("Confusion Matrix is: ", train_eval['cm'])
print ('Accuracy : ', train_eval['acc'])
print('Sensitivity : ', train_eval['sen'])
print('Specificity : ', train_eval['spec'])
print("Mean of Matthews Correlation Coefficient is: ", train_eval['mcc'])
print("The Precision value is: ", train_eval['prec_train'])
print("The Recall value is: ", train_eval['recall_train'])
print("The F1 score is: ", train_eval['f1_train'])
print('The area under curve is:', train_eval['AUC'])

In [None]:
# Evaluate Model on Testing data
#optimized_lgbm.fit(X_train, y_train)
dtc_eval = evaluate_model_test(lgbm_model, X_test_ind, y_test_ind)
# Print result
print('Accuracy:', dtc_eval['acc'])
print('Precision:', dtc_eval['prec'])
print('Recall:', dtc_eval['rec'])
print('F1 Score:', dtc_eval['f1'])
print('Area Under Curve:', dtc_eval['auc'])
print('Sensitivity : ', dtc_eval['sen'])
print('Specificity : ', dtc_eval['spec'])
print('MCC Score : ', dtc_eval['mcc'])
print('Confusion Matrix:\n', dtc_eval['cm'])

# MLP Classifier Testing

In [None]:
from sklearn.neural_network import MLPClassifier
MLP= MLPClassifier("** Best_params")
MLP.fit(X_train, y_train)
#scores = cross_val_score(MLP, X_train, y_train, cv=cv,  scoring='accuracy')
y_pred =MLP.predict(X_test_ind)
accuracy = accuracy_score(y_test_ind, y_pred)

#print(scores.mean())
print(accuracy)


In [None]:
%%time
# Evaluate Model on Training data
train_eval = evaluate_model_train(MLP, X_train, y_train)
print("Confusion Matrix is: ", train_eval['cm'])
print ('Accuracy : ', train_eval['acc'])
print('Sensitivity : ', train_eval['sen'])
print('Specificity : ', train_eval['spec'])
print("Mean of Matthews Correlation Coefficient is: ", train_eval['mcc'])
print("The Precision value is: ", train_eval['prec_train'])
print("The Recall value is: ", train_eval['recall_train'])
print("The F1 score is: ", train_eval['f1_train'])
print('The area under curve is:', train_eval['AUC'])

In [None]:
# Evaluate Model on Testing data

dtc_eval = evaluate_model_test(MLP, X_test_ind, y_test_ind)
# Print result
print('Accuracy:', dtc_eval['acc'])
print('Precision:', dtc_eval['prec'])
print('Recall:', dtc_eval['rec'])
print('F1 Score:', dtc_eval['f1'])
print('Area Under Curve:', dtc_eval['auc'])
print('Sensitivity : ', dtc_eval['sen'])
print('Specificity : ', dtc_eval['spec'])
print('MCC Score : ', dtc_eval['mcc'])
print('Confusion Matrix:\n', dtc_eval['cm'])

# ETC Testing

In [None]:
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=1, random_state=92)

In [None]:
from sklearn.metrics import accuracy_score, classification_report
from sklearn.ensemble import ExtraTreesClassifier

In [None]:
%%time
from sklearn.ensemble import ExtraTreesClassifier
# Fit the model
etc_model = ExtraTreesClassifier(" ** Best_params ** ")

etc_model.fit(X_train, y_train)
#score = cross_val_score(etc_model, X_train, y_train, cv=cv, scoring="accuracy")
#accuracy_mean = score.mean()


y_pred = etc_model.predict(X_test_ind)
accuracy = accuracy_score(y_test_ind, y_pred)

#print('Mean_Accuracy is', accuracy_mean)
print('Ind Accuracy is', accuracy)





In [None]:
# Evaluate Model on Training data
train_eval = evaluate_model_train(etc_model, X_train, y_train)
print("Confusion Matrix is:\n", train_eval['cm'])
print ('Accuracy : ', train_eval['Acc'])
print('Sensitivity : ', train_eval['sen'])
print('Specificity : ', train_eval['spec'])
print("Matthews Correlation Coefficient is: ", train_eval['mcc'])
print("Precision value is: ", train_eval['prec_train'])
print("Recall value is: ", train_eval['recall_train'])
print('The area under curve is:', train_eval['AUC'])
print("F1 score is: ", train_eval['f1_train'])

In [None]:
# Evaluate Model on Testing data
dtc_eval = evaluate_model_test(etc_model, X_test_ind, y_test_ind)
# Print result
print('Accuracy:', dtc_eval['acc'])
print('Precision:', dtc_eval['prec'])
print('Recall:', dtc_eval['rec'])
print('F1 Score:', dtc_eval['f1'])
print('Area Under Curve:', dtc_eval['auc'])
print('Sensitivity : ', dtc_eval['sen'])
print('Specificity : ', dtc_eval['spec'])
print('MCC Score : ', dtc_eval['mcc'])
print('Confusion Matrix:\n', dtc_eval['cm'])

# XGB Testing

In [None]:
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=1, random_state=92)

In [None]:
from xgboost import XGBClassifier

# Fit the model
xgb_model = XGBClassifier(" ** Best_params** " )
xgb_model.fit(X_train, y_train) 

#score = cross_val_score(xgb_model, X_train, y_train, cv=cv, scoring="accuracy")
#accuracy_mean = score.mean()


y_pred = xgb_model.predict(X_test_ind)
accuracy = accuracy_score(y_test_ind, y_pred)

#print('Mean_Accuracy is', accuracy_mean)
print('Ind Accuracy is', accuracy)


In [None]:
# Evaluate Model on Training data
train_eval = evaluate_model_train(xgb_model, X_train, y_train)

print("Confusion Matrix is:\n", train_eval['cm'])
print ('Accuracy : ', train_eval['acc'])
print('Sensitivity : ', train_eval['sen'])
print('Specificity : ', train_eval['spec'])
print("Matthews Correlation Coefficient is: ", train_eval['mcc'])
print("Precision value is: ", train_eval['prec_train'])
print("Recall value is: ", train_eval['recall_train'])
print("F1 score is: ", train_eval['f1_train'])
print('The area under curve is:', train_eval['AUC'])

In [None]:
# Evaluate Model on Testing data
dtc_eval = evaluate_model_test(xgb_model, X_test_ind, y_test_ind)

# Print result
print('Accuracy:', dtc_eval['acc'])
print('Precision:', dtc_eval['prec'])
print('Recall:', dtc_eval['rec'])
print('F1 Score:', dtc_eval['f1'])
print('Area Under Curve:', dtc_eval['auc'])
print('Sensitivity : ', dtc_eval['sen'])
print('Specificity : ', dtc_eval['spec'])
print('MCC Score : ', dtc_eval['mcc'])
print('Confusion Matrix:\n', dtc_eval['cm'])

# Random forest Testing

In [None]:
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=1, random_state=92)

In [None]:
%%time
#import optuna
from sklearn.ensemble import RandomForestClassifier

Rf_model = RandomForestClassifier(" ** Best_Params")
Rf_model.fit(X_train,y_train) 
#score = cross_val_score(Rf_model, X_train, y_train, cv=cv, scoring="accuracy")
#accuracy_mean = score.mean()


y_pred = Rf_model.predict(X_test_ind)
accuracy = accuracy_score(y_test_ind, y_pred)

#print('Mean_Accuracy is', accuracy_mean)
print('Ind Accuracy is', accuracy)





In [None]:
# Evaluate Model on Training data
train_eval = evaluate_model_train(Rf_model, X_train, y_train)
print("Confusion Matrix is: ", train_eval['cm'])
print ('Accuracy : ', train_eval['Acc'])
print('Sensitivity : ', train_eval['sen'])
print('Specificity : ', train_eval['spec'])
print("Mean of Matthews Correlation Coefficient is: ", train_eval['mcc'])
print("The Acc value from CM is: ", train_eval['acc'])
print("The Recall value is: ", train_eval['recall_train'])
print("The F1 score is: ", train_eval['f1_train'])
print('The area under curve is:', train_eval['AUC'])

In [None]:
#Evaluate Model on Testing data

test_eval = evaluate_model_test(Rf_model, X_test_ind, y_test_ind)
# Print result
print('Accuracy:', dtc_eval['acc'])
print('Precision:', dtc_eval['prec'])
print('Recall:', dtc_eval['rec'])
print('F1 Score:', dtc_eval['f1'])
print('Area Under Curve:', dtc_eval['auc'])
print('Sensitivity : ', dtc_eval['sen'])
print('Specificity : ', dtc_eval['spec'])
print('MCC Score : ', dtc_eval['mcc'])
print('Confusion Matrix:\n', dtc_eval['cm'])

# CatBoost Testing

In [None]:
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=1,random_state=92)

In [None]:
import catboost
from catboost import CatBoostClassifier

# Fit the model
CB_model = catboost.CatBoostClassifier(" ** Best_params")
CB_model.fit(X_train, y_train)
#score = cross_val_score(CB_model, X_train, y_train, cv=cv, scoring="accuracy")
#accuracy_mean = score.mean()


y_pred = CB_model.predict(X_test_ind)
accuracy = accuracy_score(y_test_ind, y_pred)

#print('Mean_Accuracy is', accuracy_mean)
print('Ind Accuracy is', accuracy)


In [None]:
# Evaluate Model on Training data
train_eval = evaluate_model_train(CB_model, X_train, y_train)
print("Confusion Matrix is:\n", train_eval['cm'])
print ('Accuracy : ', train_eval['acc'])
print('Sensitivity : ', train_eval['sen'])
print('Specificity : ', train_eval['spec'])
print("Matthews Correlation Coefficient is: ", train_eval['mcc'])
print("Precision value is: ", train_eval['prec_train'])
print("Recall value is: ", train_eval['recall_train'])
print('The area under curve is:', train_eval['AUC'])
print("F1 score is: ", train_eval['f1_train'])

In [None]:
# Evaluate Model on Testing data
test_eval = evaluate_model_test(CB_model, X_test_ind, y_test_ind)
# Print result
print('Accuracy:', dtc_eval['acc'])
print('Precision:', dtc_eval['prec'])
print('Recall:', dtc_eval['rec'])
print('F1 Score:', dtc_eval['f1'])
print('Area Under Curve:', dtc_eval['auc'])
print('Sensitivity : ', dtc_eval['sen'])
print('Specificity : ', dtc_eval['spec'])
print('MCC Score : ', dtc_eval['mcc'])
print('Confusion Matrix:\n', dtc_eval['cm'])

# Voting Classifier

In [None]:
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=1,random_state=92)

In [None]:
X_train.shape, y_train.shape, X_test_ind.shape, y_test_ind.shape

In [None]:
from sklearn.ensemble import VotingClassifier
vclf2 = VotingClassifier(estimators=[ ('RF', Rf_model),  ('XGB', xgb_model),('LGBM', lgbm_model),('ETC', etc_model),
                                     ('MLP', MLP),('Catboost', CB_model)], voting='soft')
vclf2.fit(X_train, y_train)
#score = cross_val_score(vclf2, X_train, y_train, cv=cv, scoring="accuracy")
#accuracy_mean = score.mean()


y_pred = vclf2.predict(X_test_ind)
accuracy = accuracy_score(y_test_ind, y_pred)

#print('Mean_Accuracy is', accuracy_mean)
print('Ind Accuracy is', accuracy)



In [None]:
# Evaluate Model on Training data
train_eval = evaluate_model_train(vclf2, X_train, y_train)

print("Confusion Matrix is:\n", train_eval['cm'])
print ('Accuracy : ', train_eval['acc'])
print('Sensitivity : ', train_eval['sen'])
print('Specificity : ', train_eval['spec'])
print("Matthews Correlation Coefficient is: ", train_eval['mcc'])
print("Precision value is: ", train_eval['prec_train'])
print("Recall value is: ", train_eval['recall_train'])
print("F1 score is: ", train_eval['f1_train'])
print('The area under curve is:', train_eval['AUC'])

In [None]:
# Evaluate Model on Testing data
dtc_eval = evaluate_model_test(vclf2, X_test_ind, y_test_ind)

# Print result
print('Accuracy:', dtc_eval['acc'])
print('Precision:', dtc_eval['prec'])
print('Recall:', dtc_eval['rec'])
print('F1 Score:', dtc_eval['f1'])
print('Area Under Curve:', dtc_eval['auc'])
print('Sensitivity : ', dtc_eval['sen'])
print('Specificity : ', dtc_eval['spec'])
print('MCC Score : ', dtc_eval['mcc'])
print('Confusion Matrix:\n', dtc_eval['cm'])

# Meta-Classifier

In [None]:
X_train.shape, y_train.shape, X_test_ind.shape, y_test_ind.shape

In [None]:
# defining meta-classifier
from keras.wrappers.scikit_learn import KerasClassifier
from mlxtend.classifier import StackingClassifier
from sklearn import metrics
clf_stack = StackingClassifier(classifiers =[ ( lgbm_model ), ( Rf_model), (MLP),(xgb_model), (etc_model)], 
                               meta_classifier = CB_model, use_probas = True, use_features_in_secondary = True)
clf_stack.fit(X_train, y_train)
#score = cross_val_score(clf_stack, X_train, y_train, cv=cv, scoring="accuracy")
#accuracy_mean = score.mean()
y=clf_stack.predict(X_test_ind)
score=accuracy_score(y,y_test_ind)

#print(accuracy_mean)
print(score)

In [None]:
%%time
# Evaluate Model on Training data
train_eval = evaluate_model_train(clf_stack, X_train, y_train)

print("Confusion Matrix is:\n", train_eval['cm'])
print ('Accuracy : ', train_eval['acc'])
print('Sensitivity : ', train_eval['sen'])
print('Specificity : ', train_eval['spec'])
print("Matthews Correlation Coefficient is: ", train_eval['mcc'])
print("Precision value is: ", train_eval['prec_train'])
print("Recall value is: ", train_eval['recall_train'])
print("F1 score is: ", train_eval['f1_train'])
print('The area under curve is:', train_eval['AUC'])

In [None]:
# Evaluate Model on Testing data
dtc_eval = evaluate_model_test(clf_stack, X_test_ind, y_test_ind)

# Print result
print('Accuracy:', dtc_eval['acc'])
print('Precision:', dtc_eval['prec'])
print('Recall:', dtc_eval['rec'])
print('F1 Score:', dtc_eval['f1'])
print('Area Under Curve:', dtc_eval['auc'])
print('Sensitivity : ', dtc_eval['sen'])
print('Specificity : ', dtc_eval['spec'])
print('MCC Score : ', dtc_eval['mcc'])
print('Confusion Matrix:\n', dtc_eval['cm'])

# Ensemble Classifier

In [None]:
from sklearn import model_selection
from mlxtend.classifier import EnsembleVoteClassifier
from sklearn import metrics

eclf = EnsembleVoteClassifier(clfs=[  (Rf_model), ( xgb_model),(etc_model),(lgbm_model),(CB_model),(MLP)], voting='soft')#, weights=[0.2,0.1,0.3,0.3, 0.1])
eclf.fit(X_train, y_train)
#score = cross_val_score(eclf, X_train, y_train, cv=cv, scoring="accuracy")
#accuracy_mean = score.mean()
y_predd=eclf.predict(X_test_ind)

score=accuracy_score(y_predd,y_test_ind)

#print(accuracy_mean)
print(score)



In [None]:
# Evaluate Model on Training data
train_eval = evaluate_model_train(eclf, X_train, y_train)

print("Confusion Matrix is:\n", train_eval['cm'])
print ('Accuracy : ', train_eval['acc'])
print('Sensitivity : ', train_eval['sen'])
print('Specificity : ', train_eval['spec'])
print("Matthews Correlation Coefficient is: ", train_eval['mcc'])
print("Precision value is: ", train_eval['prec_train'])
print("Recall value is: ", train_eval['recall_train'])
print("F1 score is: ", train_eval['f1_train'])

In [None]:
# Evaluate Model on Testing data
dtc_eval = evaluate_model_test(eclf, X_test_ind, y_test_ind)

# Print result
print('Accuracy:', dtc_eval['acc'])
print('Precision:', dtc_eval['prec'])
print('Recall:', dtc_eval['rec'])
print('F1 Score:', dtc_eval['f1'])
print('Area Under Curve:', dtc_eval['auc'])
print('Sensitivity : ', dtc_eval['sen'])
print('Specificity : ', dtc_eval['spec'])
print('MCC Score : ', dtc_eval['mcc'])
print('Confusion Matrix:\n', dtc_eval['cm'])

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
%matplotlib inline
import seaborn as sns
from sklearn.model_selection import KFold, RepeatedStratifiedKFold, StratifiedKFold
from sklearn.metrics import f1_score
from mlens.ensemble import SuperLearner
from mlens.metrics.metrics import rmse
from sklearn.datasets import load_boston
from sklearn.linear_model import Lasso
from sklearn.svm import SVR

In [None]:
# Create an ensemble model named as super learner from mlens package.

model = SuperLearner(
    folds=10, 
    random_state=42
)

model.add(
    [
        
        xgb_model,
        lgbm_model,  
        Rf_model,
        CB_model,
        etc_model,
        MLP,
    
        
    ]
)

model.add_meta(
   CatBoostClassifier()
)

model.fit(X_train, y_train)

preds = model.predict(X_test_ind)

print('SuperLearner Train accuracy: ', accuracy_score(y_test_ind, preds))
print('SuperLearner ind-accuracy: ', f1_score(y_test_ind, preds))

In [None]:
from sklearn.linear_model import SGDClassifier

In [None]:
mdict = {
    'RF': RandomForestClassifier(random_state=40),
    'XGB': XGBClassifier(random_state=25),
    'LGBM': lgbm.LGBMClassifier(random_state=72),
    'CABT': CatBoostClassifier(random_state=90),
    'OARF': RandomForestClassifier(" **params"),
    'OAXGB': XGBClassifier( "**params"),
    'OALGBM': lgbm.LGBMClassifier("**params"),
    'OCAT': CatBoostClassifier("**params"),
    
    'OAET': ExtraTreesClassifier("**params"),
    'ET': ExtraTreesClassifier(random_state=32),
    'OMLP': MLPClassifier("**params"),
    'MLP': MLPClassifier(random_state= 72),
#     'GB': GradientBoostingClassifier(random_state=42),
#     'RDG': RidgeClassifier(random_state=42),
#     'PCP': Perceptron(random_state=42),
#     'PAC': PassiveAggressiveClassifier(random_state=42)
}


In [None]:
import optuna
from optuna.samplers import TPESampler


In [None]:
def create_model(trial):
    model_names = list()
    models_list = [
         'XGB', 'LGBM', 'RF','CABT','ET', 'MLP','OARF', 
        'OAXGB',  'OALGBM','OCAT', 'OARF',
           'OAET', 'OMLP',
    ]
    
    head_list = [
        'RF', 
        'XGB', 
        'LGBM', 
        'CABT',     
        'ET',
        'MLP'
        
    ]
       
    
    n_models = trial.suggest_int("n_models", 2, 6)
    for i in range(n_models):
        model_item = trial.suggest_categorical('model_{}'.format(i), models_list)
        if model_item not in model_names:
            model_names.append(model_item)
    
    folds = trial.suggest_int("folds", 5, 10)
    
    model = SuperLearner(
        folds=folds, 
        random_state=42
    )
    
    models = [
        mdict[item] for item in model_names
    ]
    model.add(models)
    head = trial.suggest_categorical('head', head_list)
    model.add_meta(
        mdict[head]
    )
        
    return model
        

def objective(trial):
    
    model = create_model(trial)
    model.fit(X_train, y_train)
    preds = model.predict(X_test_ind)
    score = accuracy_score(y_test_ind, preds)
    return score

study = optuna.create_study(
    direction="maximize", 
  
)

study.optimize(
    objective, 
    n_trials=50
)

In [None]:
params = study.best_params

head = params['head']
folds = params['folds']
del params['head'], params['n_models'], params['folds']
result = list()
for key, value in params.items():
    if value not in result:
        result.append(value)
        
result

In [None]:
model = SuperLearner(
    folds=folds, 
    random_state=72
)

models = [
    mdict[item] for item in result
]
model.add(models)
model.add_meta(mdict[head])

model.fit(X_train, y_train)

preds = model.predict(X_test_ind)

print('Optimized SuperLearner accuracy: ', accuracy_score(y_test_ind, preds))
#print('Optimized SuperLearner f1-score: ', f1_score(y_test, preds))

In [None]:
X_train.shape, y_train.shape, X_test_ind.shape, y_test_ind.shape

In [None]:
#X_test = np.array([X_test_ind])
#y_test = np.array([y_test_ind])

In [None]:
# Evaluate Model on Training data
train_eval = evaluate_model_train(model, X_train, y_train)

print("Confusion Matrix is:\n", train_eval['cm'])
print ('Accuracy : ', train_eval['acc'])
print('Sensitivity : ', train_eval['sen'])
print('Specificity : ', train_eval['spec'])
print("Matthews Correlation Coefficient is: ", train_eval['mcc'])
print("Precision value is: ", train_eval['prec_train'])
print("Recall value is: ", train_eval['recall_train'])
print("F1 score is: ", train_eval['f1_train'])

In [None]:
# Evaluate Model on Testing data
dtc_eval = evaluate_model_test(model, X_test_ind, y_test_ind)
# Print result
print('Accuracy:', dtc_eval['acc'])
print('Precision:', dtc_eval['prec'])
print('Recall:', dtc_eval['rec'])
print('F1 Score:', dtc_eval['f1'])
print('Area Under Curve:', dtc_eval['auc'])
print('Sensitivity : ', dtc_eval['sen'])
print('Specificity : ', dtc_eval['spec'])
print('MCC Score : ', dtc_eval['mcc'])
print('Confusion Matrix:\n', dtc_eval['cm'])