In [1]:
import os
import sys
import yaml
from urllib.parse import urlparse

# Init

In [2]:
import logging
import warnings

In [3]:
warnings.filterwarnings("ignore")

logging.basicConfig(level=logging.WARN)
logger = logging.getLogger(__name__)

In [4]:
import mlflow
import mlflow.sklearn

In [5]:
from src.preprocessing import dataset
from src.training import modelfactory

## Setup pahs

In [6]:
_PATHS = {}
_PATHS['project'] = os.path.join('.')
_PATHS['configs'] = os.path.join(_PATHS['project'], 'training_config.yaml')

## Setup Flags

In [7]:
_FLAGS = {}

## Get config

In [8]:
training_config = {}
with open(_PATHS['configs'], "r", encoding="utf-8") as f_r:
    training_config = yaml.safe_load(f_r)
    
training_config

{'hyperparameters': {'alpha': 0.5, 'l1_ratio': 0.1, 'seed': 42},
 'data_file': 'data/wine-quality.csv',
 'run': {'random_seed': 40, 'experiment': {'id': 'boilerplate_mlflow_name'}}}

# Training

## Set mlFlow run

In [9]:
experiment_id = training_config["run"]["experiment"]["id"]
mlflow.set_experiment(experiment_id)

<Experiment: artifact_location='file:///Users/alan_spark/Projects/boilerplates/boilerplate_mlflow/mlruns/3', experiment_id='3', lifecycle_stage='active', name='boilerplate_mlflow_name', tags={}>

## Start run

In [10]:
mlflow.start_run()

<ActiveRun: >

Track parameters. It is not limited to hyperparameter you can track whatever you think important for a run

In [11]:
mlflow.log_params(training_config["hyperparameters"])

## Get dataset

In [12]:
train_xs, train_ys, test_xs, test_ys = dataset.prepare(
        training_config["data_file"]
    )

## Get model 

In [13]:
model, model_flavor = modelfactory.fit(
    training_config["hyperparameters"], train_xs, train_ys
)

## Evaluate model 

In [14]:
rmse, mae, r_2 = modelfactory.eval_metrics(model, test_xs, test_ys)

print(f"  RMSE: {rmse}")
print(f"  MAE: {mae}")
print(f"  R2: {r_2}")

  RMSE: 0.7950988669306662
  MAE: 0.6241872040904787
  R2: 0.19979112548205147


track metrics

In [15]:
mlflow.log_metric("rmse", rmse)
mlflow.log_metric("r2", r_2)
mlflow.log_metric("mae", mae)

## Log model in mlFlow

In [16]:
tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme

if tracking_url_type_store != "file":
    model_flavor.log_model(model, "model", registered_model_name=experiment_id)
else:
    model_flavor.log_model(model, "model")

In [17]:
## End run

In [18]:
mlflow.end_run()

_END_