In [1]:
import os
import argparse
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import ElasticNet
from urllib.parse import urlparse
import mlflow
import mlflow.sklearn

In [2]:
#mlflow server --backend-store-uri sqlite:///mlflow.db
mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_experiment("Red-Wine-Quality")

2024/04/12 18:30:10 INFO mlflow.tracking.fluent: Experiment with name 'Red-Wine-Quality' does not exist. Creating a new experiment.


<Experiment: artifact_location='file:///c:/Users/demon/Desktop/projects/MLflow/mlruns/1', creation_time=1712926810483, experiment_id='1', last_update_time=1712926810483, lifecycle_stage='active', name='Red-Wine-Quality', tags={}>

In [3]:
def get_data():
    path = r"./data/winequality-red.csv"

    try:
        data = pd.read_csv(path)
        return data 
    except Exception as e:
        raise e

In [4]:
def evaluate(y,pred):
    rmse = np.sqrt(mean_squared_error(y,pred))
    mae = mean_absolute_error(y,pred)
    r2 = r2_score(y,pred)

    return rmse, mae, r2

In [5]:
data = get_data()
train,test = train_test_split(data,random_state=42)
train_x = train.drop(["quality"],axis=1)
test_x = test.drop(["quality"],axis=1)

train_y = train[["quality"]]
test_y = test[["quality"]]

In [6]:
alpha=0.6
l1_ratio = 0.9

with mlflow.start_run():
    # specify parameters manually
    mlflow.set_tag("developer","Alpha_leporis")
    mlflow.set_tag("model","elastic-net")
    
    mlflow.log_param("alpha",alpha)
    mlflow.log_param("l1_ratio",l1_ratio)
    
    lr = ElasticNet(alpha=alpha, l1_ratio=alpha)
    lr.fit(train_x,train_y)

    pred = lr.predict(test_x)

    rmse,mae,r2 = evaluate(test_y,pred)
    
    mlflow.log_metric("rmse",rmse)
    mlflow.log_metric("mae",mae)
    mlflow.log_metric("r2",r2)
    
    mlflow.sklearn.log_model(lr,"elastic-net-lr")


    print(f"Elastic net Params: alpha: {alpha}, l1_ratio: {l1_ratio}")
    print(f"Elastic net metric: rmse:{rmse}, mae:{mae},r2:{r2}")



Elastic net Params: alpha: 0.6, l1_ratio: 0.9
Elastic net metric: rmse:0.7772257709002306, mae:0.6365170497744915,r2:0.023462653879936957


In [7]:
# enable autologging
mlflow.sklearn.autolog()

with mlflow.start_run() as run:
    lr = ElasticNet(alpha=alpha, l1_ratio=alpha)
    lr.fit(train_x,train_y)

    pred = lr.predict(test_x)

    rmse,mae,r2 = evaluate(test_y,pred)

    print(f"Elastic net Params: alpha: {alpha}, l1_ratio: {l1_ratio}")
    print(f"Elastic net metric: rmse:{rmse}, mae:{mae},r2:{r2}")

Elastic net Params: alpha: 0.6, l1_ratio: 0.9
Elastic net metric: rmse:0.7772257709002306, mae:0.6365170497744915,r2:0.023462653879936957


# HYPERPARAMETER TUNING AND TRACKING USING MLFLOW

In [8]:
!pip install hyperopt



In [9]:
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from hyperopt.pyll import scope

In [10]:
def objective(params):
    with mlflow.start_run():
        mlflow.set_tag("developer","Alpha_leporis")
        mlflow.set_tag("model", "Elasticnet")
        mlflow.log_params(params)
        
        lr = ElasticNet(**params)
        lr.fit(train_x,train_y)

        pred = lr.predict(test_x)

        rmse,mae,r2 = evaluate(test_y,pred)
        mlflow.log_metric("rmse", rmse)
        mlflow.log_metric("mae",mae)
        mlflow.log_metric("r2",r2)

    return {'loss': rmse, 'status': STATUS_OK}

In [11]:
search_space = { "alpha": hp.loguniform('alpha',0.01,1),
                  "l1_ratio": hp.uniform('l1_ratio',0,1)}

In [12]:
best_result = fmin(
    fn=objective,
    space=search_space,
    algo=tpe.suggest,
    max_evals=4,
    trials=Trials()
)

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:52<00:00, 13.11s/trial, best loss: 0.7198553201306017]


In [13]:
params= {"alpha":1.43098798581676,
"copy_X":True,
"fit_intercept":True,
"l1_ratio":0.003137093485394149,
"max_iter":1000,
"normalize":"deprecated",
"positive":False,
"precompute":False,
"random_state":None,
"selection":"cyclic",
"tol":0.0001,
"warm_start":False}

In [16]:
# enable autologging
mlflow.sklearn.autolog()
with mlflow.start_run() :
    lr = ElasticNet(**params)
    lr.fit(train_x,train_y)

    pred = lr.predict(test_x)

    rmse,mae,r2 = evaluate(test_y,pred)

    print(f"Elastic net Params: alpha: {alpha}, l1_ratio: {l1_ratio}")
    print(f"Elastic net metric: rmse:{rmse}, mae:{mae},r2:{r2}")

TypeError: ElasticNet.__init__() got an unexpected keyword argument 'normalize'

In [15]:
!pip install threadpoolctl==3.1.0



# Model Registry

In [19]:
from mlflow.tracking import MlflowClient
MLFLOW_TRACKING_URI = "sqlite:///mlflow.db"

## Interacting with MLflow Tracking Server to extract run ids with higher accuracy

In [23]:
client = MlflowClient(tracking_uri=MLFLOW_TRACKING_URI)

client.search_experiments()

[<Experiment: artifact_location='file:///c:/Users/demon/Desktop/projects/MLflow/mlruns/1', creation_time=1712926810483, experiment_id='1', last_update_time=1712926810483, lifecycle_stage='active', name='Red-Wine-Quality', tags={}>,
 <Experiment: artifact_location='mlflow-artifacts:/0', creation_time=1712926260353, experiment_id='0', last_update_time=1712926260353, lifecycle_stage='active', name='Default', tags={}>]

In [24]:
from mlflow.entities import ViewType

runs = client.search_runs(
    experiment_ids='1',
    filter_string="metrics.rmse >0.77",
    run_view_type=ViewType.ACTIVE_ONLY,
    max_results=5,
    order_by=["metrics.rmse DESC"]
)

In [25]:
for run in runs:
    print(f"run id: {run.info.run_id}, rmse: {run.data.metrics['rmse']:.4f}")

run id: 38c39e914fd34fab809d887265a941c4, rmse: 0.7848
run id: 11ff1a28960f42a0a7430a6d64e3374b, rmse: 0.7843
run id: aff9a98bf67143f3ae9e4d35b858d8ee, rmse: 0.7827
run id: 058712122be0459ebfc13ee70cbc2b0b, rmse: 0.7772


## Interacting with the Model Registry

In [26]:
mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)

run_id = "38c39e914fd34fab809d887265a941c4"
model_uri = f"runs:/{run_id}/model"
mlflow.register_model(model_uri=model_uri, name="wine_quality")

Registered model 'wine_quality' already exists. Creating a new version of this model...
Created version '3' of model 'wine_quality'.


<ModelVersion: aliases=[], creation_timestamp=1712930125793, current_stage='None', description=None, last_updated_timestamp=1712930125793, name='wine_quality', run_id='38c39e914fd34fab809d887265a941c4', run_link=None, source='file:///c:/Users/demon/Desktop/projects/MLflow/mlruns/1/38c39e914fd34fab809d887265a941c4/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=3>

In [28]:
model_name = "wine_quality"
latest_versions = client.get_latest_versions(name=model_name)

for version in latest_versions:
    print(f"version: {version.version}, stage: {version.current_stage}")


version: 1, stage: Production
version: 2, stage: Staging
version: 3, stage: None


  latest_versions = client.get_latest_versions(name=model_name)


In [29]:
model_version = 3
new_stage = "Production"
client.transition_model_version_stage(
    name=model_name,
    version=model_version,
    stage=new_stage,
    archive_existing_versions=True
)

  client.transition_model_version_stage(


<ModelVersion: aliases=[], creation_timestamp=1712930125793, current_stage='Production', description=None, last_updated_timestamp=1712931810169, name='wine_quality', run_id='38c39e914fd34fab809d887265a941c4', run_link=None, source='file:///c:/Users/demon/Desktop/projects/MLflow/mlruns/1/38c39e914fd34fab809d887265a941c4/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=3>

In [30]:
model_name = "wine_quality"
latest_versions = client.get_latest_versions(name=model_name)

for version in latest_versions:
    print(f"version: {version.version}, stage: {version.current_stage}")

version: 1, stage: Archived
version: 2, stage: Staging
version: 3, stage: Production


  latest_versions = client.get_latest_versions(name=model_name)


## TEST THE MODEL IN PRODUCTION

In [31]:
df = get_data()
train,test = train_test_split(df,random_state=42)
train_x = train.drop(["quality"],axis=1)
test_x = test.drop(["quality"],axis=1)

train_y = train[["quality"]]
test_y = test[["quality"]]

In [32]:
def test_model(name, stage, X_test, y_test):
    model = mlflow.pyfunc.load_model(f"models:/{name}/{stage}")
    y_pred = model.predict(X_test)
    return {"rmse": mean_squared_error(y_test, y_pred, squared=False)}

In [33]:
test_model(name=model_name, stage="Production", X_test=test_x, y_test=test_y)

  latest = client.get_latest_versions(name, None if stage is None else [stage])


{'rmse': 0.7847866179645888}

In [34]:
test_model(name=model_name, stage="Archived", X_test=test_x, y_test=test_y)

  latest = client.get_latest_versions(name, None if stage is None else [stage])


{'rmse': 0.7198553201306017}

In [35]:
test_model(name=model_name, stage="Staging", X_test=test_x, y_test=test_y)

  latest = client.get_latest_versions(name, None if stage is None else [stage])


{'rmse': 0.7827079766841751}