# MLflow: Random Forest

In [1]:
import pandas as pd
import numpy as np
import mlflow

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

## Load Data

In [2]:
data_clean = '../../data/clean/data_clean.csv'

df = pd.read_csv(data_clean, low_memory=False)
df = df.sample(frac=0.10, random_state=44)

df_train, df_test = train_test_split(df, 
                                     train_size=0.7, 
                                     random_state=44)

In [3]:
X_train = df_train.drop(['Default'], axis=1)
y_train = df_train['Default']

X_test = df_test.drop(['Default'], axis=1)
y_test = df_test['Default']

print(f'Train: {X_train.shape}, {y_train.shape}')
print(f'Test:  {X_test.shape}, {y_test.shape}')

Train: (62622, 18), (62622,)
Test:  (26839, 18), (26839,)


## Model Training

In [4]:
criter = 'entropy' 
depth = 20
rand_rfc = 44

In [5]:
rfc_model = RandomForestClassifier(criterion=criter,
                                   max_depth=depth,
                                   random_state=rand_rfc)

rfc_model.fit(X_train, y_train)
y_pred = rfc_model.predict(X_test)

In [6]:
acc = round(accuracy_score(y_test, y_pred),4)
pre = round(precision_score(y_test, y_pred, average='macro'),4)
rec = round(recall_score(y_test, y_pred, average='macro'),4)
f1s = round(f1_score(y_test, y_pred, average='macro'),4)

print(f'Accuracy:  {acc}')
print(f'Precision: {pre}')
print(f'Recall:    {rec}')
print(f'F1-score:  {f1s}')

Accuracy:  0.9409
Precision: 0.917
Recall:    0.8744
F1-score:  0.8937


## MLflow Registry

In [7]:
with mlflow.start_run():
    mlflow.log_param('criterion', criter)
    mlflow.log_param('max_depth', depth)
    mlflow.log_param('random_state', rand_rfc)

    mlflow.log_metric('accuracy', acc)
    mlflow.log_metric('precision', pre)
    mlflow.log_metric('recall', rec)
    mlflow.log_metric('f1-score', f1s)

    mlflow.sklearn.log_model(rfc_model, 'rfc-model', 
                             registered_model_name="RFC-Model")

Registered model 'RFC-Model' already exists. Creating a new version of this model...
Created version '4' of model 'RFC-Model'.


In [8]:
# localhost:5000
# !mlflow ui 

mlflow.end_run()

## Load Model

In [9]:
loaded_model = mlflow.sklearn.load_model('runs:/73d3cb660e044b7eb2a4c560ffd1106f/rfc-model')

y_new = loaded_model.predict(X_test)

In [10]:
acc = round(accuracy_score(y_test, y_new),4)
pre = round(precision_score(y_test, y_new, average='macro'),4)
rec = round(recall_score(y_test, y_new, average='macro'),4)
f1s = round(f1_score(y_test, y_new, average='macro'),4)

print(f'Accuracy:  {acc}')
print(f'Precision: {pre}')
print(f'Recall:    {rec}')
print(f'F1-score:  {f1s}')

Accuracy:  0.9416
Precision: 0.9174
Recall:    0.8767
F1-score:  0.8952
