In [1]:
import sys; sys.path.append('../../')
import numpy as np
import pandas as pd
from DataPreparation.DataPreparation import read_data
from sklearn.metrics import classification_report
from mlpath import mlquest as mlq
from sklearn.model_selection import cross_val_predict
from utils import load_hyperparameters, save_model, get_metrics
from ModelAnalysis import cross_validation
from sklearn.ensemble import RandomForestClassifier
model_name= 'RandomForest'

mlq.start_quest("RandomForest", table_dest="../../", log_defs=True)

# <font color="aqua">Random Forest</font> Model

### Read the data

In [2]:
x_data_d, y_data_d = mlq.l(read_data)(kind='Numerical', select=True)
x_data_d = x_data_d[['Weight', 'Height']]
opt_params = load_hyperparameters(model_name)

### Initiate model

In [3]:
opt_params['n_estimators'] = 1000
clf = mlq.l(RandomForestClassifier)(**opt_params)

### Evaluate Model Bias

In [4]:
clf.fit(x_data_d, y_data_d)
y_pred = clf.predict(x_data_d)
train_metrics = classification_report(y_data_d, y_pred, digits=3)
train_acc, train_wf1 = get_metrics(train_metrics)
print(train_metrics)

              precision    recall  f1-score   support

           0      0.961     0.980     0.971       152
           1      0.975     0.956     0.965       160
           2      0.991     0.997     0.994       324
           3      1.000     0.996     0.998       544

    accuracy                          0.989      1180
   macro avg      0.982     0.982     0.982      1180
weighted avg      0.989     0.989     0.989      1180



### Evaluate Model Generalization

In [5]:
kfold=cross_validation(clf, x_data_d, y_data_d, k=[10], n_repeats=[10], random_state=1,loo=False)

repeated_10fold_wf1 = kfold.get(f'10-Repeated 10-fold')[0]

0
10-Repeated 10-fold
0.9624


#### Save Model

In [6]:
save_model(model_name, clf)

### Tracking

In [7]:
mlq.log_metrics(train_acc, train_wf1, repeated_10fold_wf1)
mlq.end_quest()
mlq.show_logs("RandomForest", table_dest="../../", last_k=6)

info,info.1,info.2,info.3,read_data,read_data.1,read_data.2,read_data.3,read_data.4,RandomForestClassifier,RandomForestClassifier.1,RandomForestClassifier.2,RandomForestClassifier.3,RandomForestClassifier.4,RandomForestClassifier.5,RandomForestClassifier.6,RandomForestClassifier.7,RandomForestClassifier.8,RandomForestClassifier.9,metrics,metrics.1,metrics.2,metrics.3,metrics.4,metrics.5,metrics.6,cross_val_predict
15:58:51,05/13/23,2.34 s,5,Numerical,,train,True,,0.0,sqrt,0.0,True,False,0,False,0.0,1.0,,,,,,,,0.9466101694915254,5.0
15:59:42,05/13/23,2.50 s,7,Numerical,,train,True,,0.0,sqrt,0.0,True,False,0,False,0.0,1.0,,,,,,,,0.9491525423728814,5.0
16:07:44,05/13/23,8.80 min,9,Numerical,,train,True,,0.0,sqrt,0.0,True,False,0,False,0.0,1.0,,0.99,0.99,0.9528,0.9559,0.9554,0.9534,,
15:13:03,05/14/23,29.49 s,10,Numerical,,train,True,,0.0,sqrt,0.0,True,False,0,False,0.0,,,0.986,0.986,0.9563,,,,,
15:15:01,05/14/23,5.17 min,11,Numerical,,train,True,,0.0,sqrt,0.0,True,False,0,False,0.0,,,0.991,0.991,0.959,,,,,
15:24:21,05/14/23,2.46 min,12,Numerical,True,train,True,,0.0,sqrt,0.0,True,False,0,False,0.0,,,0.989,0.989,0.9624,,,,,
