In [1]:
import os
import mlflow
from mlflow.tracking import MlflowClient

## Q1. Install the package

In [2]:
mlflow.__version__

'2.3.2'

## Q2. Download and preprocess the data

In [3]:
def sizeof_fmt(num, suffix="B"):
    for unit in ["", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi"]:
        if abs(num) < 1024.0:
            return f"{num:3.1f}{unit}{suffix}"
        num /= 1024.0
    return f"{num:.1f}Yi{suffix}"

In [4]:
sizeof_fmt(os.path.getsize("./output/dv.pkl"))

'150.1KiB'

## Q3. Train a model with autolog

Codes: cohorts/2023/02-experiment-tracking/homework/train.py

In [5]:

TRACKING_URL = "http://ec2-18-142-183-214.ap-southeast-1.compute.amazonaws.com:5050"
EXPERIMENT_NAME ="green-taxi-tip-amount-experiment"
# mlflow.set_tracking_uri(TRACKING_URL)

In [6]:
client = MlflowClient(tracking_uri=TRACKING_URL)

In [7]:
experiment = client.get_experiment_by_name(EXPERIMENT_NAME)

In [10]:
run = client.search_runs(experiment_ids=experiment.experiment_id)[0]

In [12]:
run.data.params['max_depth']

'10'

## Q4. Tune model hyperparameters

In [13]:
EXPERIMENT_NAME="random-forest-hyperopt"

In [14]:
experiment = client.get_experiment_by_name(EXPERIMENT_NAME)

In [19]:
best_run = client.search_runs(experiment_ids=experiment.experiment_id,
                          order_by=['metrics.rmse ASC'],
                          max_results=1)[0]

In [22]:
best_run.data.metrics['rmse']

2.449827329704216


## Q5. Promote the best model to the model registry

In [23]:
EXPERIMENT_NAME = "random-forest-best-models"

In [24]:
experiment = client.get_experiment_by_name(EXPERIMENT_NAME)

In [25]:
best_run = client.search_runs(experiment_ids=experiment.experiment_id,
                          order_by=['metrics.test_rmse ASC'],
                          max_results=1)[0]

In [28]:
best_run.data.metrics['test_rmse']

2.2854691906481364

In [29]:
## Q6. Model metadata

In [33]:
# client.create_registered_model()
registered_model = client.get_registered_model("green-taxi-tip-amount-best-model")

In [35]:
print(registered_model)

<RegisteredModel: aliases={}, creation_timestamp=1685091023613, description='', last_updated_timestamp=1685107577630, latest_versions=[<ModelVersion: aliases=[], creation_timestamp=1685107577630, current_stage='None', description='', last_updated_timestamp=1685107577630, name='green-taxi-tip-amount-best-model', run_id='e109c9381af847a68d754f7bd0c4b45e', run_link='', source='s3://s3-mlflow-artifacts-storage/mlflow/7/e109c9381af847a68d754f7bd0c4b45e/artifacts/model', status='READY', status_message='', tags={}, user_id='', version='5'>], name='green-taxi-tip-amount-best-model', tags={}>
