In [85]:
import sys; sys.path.append('../../')
import numpy as np
import pandas as pd
from DataPreparation.DataPreparation import read_data
from sklearn.metrics import classification_report
from sklearn.model_selection import cross_val_predict
from mlpath import mlquest as mlq
from ModelAnalysis import cross_validation
from utils import load_hyperparameters, save_model, get_metrics, load_model
from sklearn.ensemble import RandomForestClassifier, StackingClassifier
rf_model = 'RandomForest'
from sklearn.linear_model import LogisticRegression
lr_model = 'LogisticRegression'
from sklearn.svm import SVC
svm_model = 'SVM'
model_name = "StackingEnsemble"
mlq.start_quest(model_name, table_dest="../../", log_defs=False)




# <font color="aqua">Stacking Ensemble</font> Model

### Read the data

In [86]:
x_data_d, y_data_d = mlq.l(read_data)(kind='Numerical', standardize=True, split='all')
rf_opt_params = load_hyperparameters(rf_model)
svm_opt_params = load_hyperparameters(svm_model)
lr_opt_params = load_hyperparameters(lr_model)

### Initiate model

In [87]:
# TODO Edit candidate_opt_params
# TODO Could tune clf_final
clf_rf = RandomForestClassifier(**rf_opt_params)
clf_svm = SVC(**svm_opt_params)
clf_log = LogisticRegression(**lr_opt_params)
clf_final = SVC(**svm_opt_params)
clf = mlq.l(StackingClassifier)(estimators=[('svm', clf_svm), ('log', clf_log), ('rf', clf_rf)], final_estimator=clf_final)

### Evaluate Model Bias

In [88]:
clf.fit(x_data_d, y_data_d)
y_pred = clf.predict(x_data_d)
train_metrics = classification_report(y_data_d, y_pred, digits=3)
train_acc, train_wf1 = get_metrics(train_metrics)
print(train_metrics)

              precision    recall  f1-score   support

           0      1.000     1.000     1.000       190
           1      0.995     1.000     0.998       201
           2      1.000     0.998     0.999       406
           3      1.000     1.000     1.000       680

    accuracy                          0.999      1477
   macro avg      0.999     0.999     0.999      1477
weighted avg      0.999     0.999     0.999      1477



### Evaluate Model Generalization

In [89]:
kfold=cross_validation(clf, x_data_d, y_data_d, k=[10], n_repeats=[10], random_state=1)

repeated_10fold_wf1 = kfold.get(f'10-Repeated 10-fold')[0]

0
10-Repeated 10-fold
0.9912


#### Save Model

In [90]:
save_model(model_name, clf)

### Tracking

In [91]:
mlq.log_metrics(train_acc, train_wf1, repeated_10fold_wf1)
mlq.end_quest()
mlq.show_logs(model_name, table_dest="../../", last_k=6)

info,info.1,info.2,info.3,read_data,metrics,metrics.1,metrics.2,metrics.3,cross_val_predict,cross_val_predict.1
15:52:17,05/15/23,2.34 min,22,True,0.989,0.989,0.9838,,,
15:55:33,05/15/23,1.68 min,23,True,0.989,0.989,0.9831,,,
17:44:30,05/16/23,14.94 min,24,True,0.999,0.999,0.9905,,,
18:05:27,05/16/23,1.59 min,25,True,0.99,0.99,0.9824,,,
18:09:46,05/16/23,43.86 min,26,True,0.998,0.998,0.9892,,,
18:54:39,05/16/23,9.83 min,27,True,0.999,0.999,0.9912,,,
