In [75]:
import sys; sys.path.append('../../')
import numpy as np
import pandas as pd
from DataPreparation.DataPreparation import read_data
from sklearn.metrics import classification_report
from sklearn.model_selection import cross_val_predict
from mlpath import mlquest as mlq
from ModelAnalysis import cross_validation
from utils import load_hyperparameters, save_model, get_metrics
from sklearn.ensemble import RandomForestClassifier, StackingClassifier
model_name = "StackingEnsemble"
rf_model = 'RandomForest'
from sklearn.linear_model import LogisticRegression
lr_model = 'LogisticRegression'
from sklearn.svm import SVC
svm_model = 'SVM'
mlq.start_quest("StackingEnsemble", table_dest="../../", log_defs=False)




# <font color="aqua">Stacking Ensemble</font> Model

### Read the data

In [76]:
x_data_d, y_data_d = mlq.l(read_data)(kind='Numerical', standardize=True)
rf_opt_params = load_hyperparameters(rf_model)
svm_opt_params = load_hyperparameters(svm_model)
lr_opt_params = load_hyperparameters(lr_model)

### Initiate model

In [77]:
# TODO Edit candidate_opt_params
# TODO Could tune clf_final
clf_rf = RandomForestClassifier(**rf_opt_params)
clf_svm = SVC(**svm_opt_params)
clf_log = LogisticRegression(**lr_opt_params)
clf_final = RandomForestClassifier()
clf = mlq.l(StackingClassifier)(estimators=[('svm', clf_svm), ('log', clf_log)], final_estimator=clf_final)

### Evaluate Model Bias

In [78]:
clf.fit(x_data_d, y_data_d)
y_pred = clf.predict(x_data_d)
train_metrics = classification_report(y_data_d, y_pred, digits=3)
train_acc, train_wf1 = get_metrics(train_metrics)
print(train_metrics)

              precision    recall  f1-score   support

           0      0.993     0.993     0.993       152
           1      0.969     0.988     0.978       160
           2      0.994     0.988     0.991       324
           3      1.000     0.998     0.999       544

    accuracy                          0.993      1180
   macro avg      0.989     0.992     0.990      1180
weighted avg      0.993     0.993     0.993      1180



### Evaluate Model Generalization

In [79]:
kfold=cross_validation(clf, x_data_d, y_data_d, k=[10], n_repeats=[10], random_state=1)

repeated_10fold_wf1 = kfold.get(f'10-Repeated 10-fold')[0]

0
10-Repeated 10-fold
0.9814


#### Save Model

In [80]:
save_model(model_name, clf)

### Tracking

In [81]:
mlq.log_metrics(train_acc, train_wf1, repeated_10fold_wf1)
mlq.end_quest()
mlq.show_logs("StackingEnsemble", table_dest="../../", last_k=6)

info,info.1,info.2,info.3,read_data,metrics,metrics.1,metrics.2,metrics.3,cross_val_predict,cross_val_predict.1
16:09:31,05/11/23,1.13 min,15,True,,,,0.9771186440677966,"StackingClassifier(estimators=[('rf', RandomForestClassifier...",5.0
16:11:27,05/11/23,19.37 s,16,True,,,,0.9813559322033898,"StackingClassifier(estimators=[('rf', RandomForestClassifier...",5.0
16:24:34,05/11/23,19.83 s,17,True,,,,0.9805084745762712,"StackingClassifier(estimators=[('rf', RandomForestClassifier...",5.0
15:28:15,05/14/23,3.81 min,18,True,0.991,0.991,0.9813,,,
15:32:42,05/14/23,31.52 s,19,True,0.991,0.991,0.9813,,,
15:33:41,05/14/23,53.63 s,20,True,0.993,0.993,0.9814,,,
