In [4]:
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, median_absolute_error, r2_score
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import fetch_california_housing
from typing import Any, Dict, Literal

from datetime import datetime, timedelta
import pandas as pd
import numpy as np
import pickle
import json
import logging
import io

import mlflow
from mlflow.tracking import MlflowClient
from mlflow.models import infer_signature
from mlflow.store.artifact.artifact_repository_registry import get_artifact_repository
import os

In [5]:
os.getenv("MLFLOW_TRACKING_URI", "No env")

'http://mlflow-service:5000'

In [6]:
mlflow.get_registry_uri()

'http://mlflow-service:5000'

In [8]:
exp_name = "Lyubov_Guzhvina"
my_experiment = mlflow.create_experiment(name = exp_name, artifact_location = 's3://lubguzh/mlflow')

In [17]:
my_experiment

'556672555723861289'

In [9]:
mlflow.search_experiments(
    filter_string="name = 'Lyubov_Guzhvina'"
)

[<Experiment: artifact_location='s3://lubguzh/mlflow', creation_time=1729017813557, experiment_id='556672555723861289', last_update_time=1729017813557, lifecycle_stage='active', name='Lyubov_Guzhvina', tags={}>]

In [10]:
mlflow.set_experiment(experiment_name = exp_name)

<Experiment: artifact_location='s3://lubguzh/mlflow', creation_time=1729017813557, experiment_id='556672555723861289', last_update_time=1729017813557, lifecycle_stage='active', name='Lyubov_Guzhvina', tags={}>

In [11]:
mlflow.search_experiments()

[<Experiment: artifact_location='s3://lubguzh/mlflow', creation_time=1729017813557, experiment_id='556672555723861289', last_update_time=1729017813557, lifecycle_stage='active', name='Lyubov_Guzhvina', tags={}>,
 <Experiment: artifact_location='s3://lubguzh/mlflow/0', creation_time=1729016202921, experiment_id='0', last_update_time=1729016202921, lifecycle_stage='active', name='Default', tags={}>]

In [55]:
model_names = ["random_forest", "linear_regression", "desicion_tree"]
models = dict(
    zip(model_names, [
        RandomForestRegressor(),
        LinearRegression(),
        DecisionTreeRegressor(),
    ]))

housing = fetch_california_housing(as_frame=True)

X_train, X_test, y_train, y_test = train_test_split(housing['data'], housing['target'])
X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, test_size=0.5)

# scaler = StandardScaler()
# X_train_fitted = pd.DataFrame(scaler.fit_transform(X_train))
# X_test_fitted = pd.DataFrame(scaler.transform(X_test))
# X_val_fitted = pd.DataFrame(scaler.transform(X_val))

In [53]:
X_train_fitted

Unnamed: 0,0,1,2,3,4,5,6,7
0,-0.042425,0.899984,-0.306601,-0.407787,-0.141719,-0.044996,0.987689,-1.465845
1,-0.555584,-0.532579,-0.049642,-0.241712,-0.245776,-0.066390,1.931851,-1.031488
2,-0.081068,0.183703,-0.461441,-0.073274,-0.202343,-0.123703,-0.863243,0.715924
3,0.130341,-1.089686,0.053957,0.030122,2.146630,0.026168,-0.783783,1.045436
4,-1.316016,-0.452992,-0.590352,0.238076,0.287177,-0.039828,1.342918,-0.956599
...,...,...,...,...,...,...,...,...
15475,0.064066,-1.010099,-0.117971,-0.187995,-0.362501,-0.091927,-0.919331,0.880680
15476,0.117705,1.297918,0.236153,-0.050096,-0.490988,-0.043884,-0.839872,0.695954
15477,-0.350310,-1.089686,3.046775,3.050960,-1.163287,-0.040352,-0.844546,2.288595
15478,0.692420,0.581637,0.051476,-0.205804,-0.433983,0.006020,-0.839872,0.601094


In [54]:
with mlflow.start_run(run_name="@nokeryy", experiment_id = my_experiment, description = "parent") as parent_run:
    for model_name in models:
        with mlflow.start_run(run_name=model_name, experiment_id=my_experiment, nested=True) as child_run:
            model = models[model_name]

            model.fit(pd.DataFrame(X_train), y_train)
            prediction = model.predict(X_val)

            eval_df = X_val.copy()
            eval_df["target"] = y_val
        
            signature = infer_signature(X_test, prediction)
            model_info = mlflow.sklearn.log_model(model, "linreg", signature=signature, 
                                                  registered_model_name=f"sk-learn-{model_name}-reg-model")
            mlflow.evaluate(
                model=model_info.model_uri,
                data=eval_df,
                targets="target",
                model_type="regressor",
                evaluators=["default"],
            )

Registered model 'sk-learn-random_forest-reg-model' already exists. Creating a new version of this model...
2024/10/15 19:49:15 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: sk-learn-random_forest-reg-model, version 11
Created version '11' of model 'sk-learn-random_forest-reg-model'.


Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

2024/10/15 19:49:29 INFO mlflow.models.evaluation.default_evaluator: Computing model predictions.
2024/10/15 19:49:29 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2024/10/15 19:49:29 INFO mlflow.tracking._tracking_service.client: 🏃 View run random_forest at: http://mlflow-service:5000/#/experiments/556672555723861289/runs/f7486b91eaab42cf953ec4c2b07ad83d.
2024/10/15 19:49:29 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://mlflow-service:5000/#/experiments/556672555723861289.
Registered model 'sk-learn-linear_regression-reg-model' already exists. Creating a new version of this model...
2024/10/15 19:49:31 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: sk-learn-linear_regression-reg-model, version 3
Created version '3' of model 'sk-learn-linear_regression-reg-model'.


Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

2024/10/15 19:49:31 INFO mlflow.models.evaluation.default_evaluator: Computing model predictions.
2024/10/15 19:49:31 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2024/10/15 19:49:31 INFO mlflow.tracking._tracking_service.client: 🏃 View run linear_regression at: http://mlflow-service:5000/#/experiments/556672555723861289/runs/26572ed6155643808e79ecb39e0c45ae.
2024/10/15 19:49:31 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://mlflow-service:5000/#/experiments/556672555723861289.
Registered model 'sk-learn-desicion_tree-reg-model' already exists. Creating a new version of this model...
2024/10/15 19:49:34 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: sk-learn-desicion_tree-reg-model, version 3
Created version '3' of model 'sk-learn-desicion_tree-reg-model'.


Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

2024/10/15 19:49:35 INFO mlflow.models.evaluation.default_evaluator: Computing model predictions.
2024/10/15 19:49:35 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2024/10/15 19:49:35 INFO mlflow.tracking._tracking_service.client: 🏃 View run desicion_tree at: http://mlflow-service:5000/#/experiments/556672555723861289/runs/87b44adafdf04e5e8bb1874c396f52e1.
2024/10/15 19:49:35 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://mlflow-service:5000/#/experiments/556672555723861289.
2024/10/15 19:49:35 INFO mlflow.tracking._tracking_service.client: 🏃 View run @nokeryy at: http://mlflow-service:5000/#/experiments/556672555723861289/runs/d1f1dcf9dae14e4b947aa218dec373b1.
2024/10/15 19:49:35 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://mlflow-service:5000/#/experiments/556672555723861289.
