In [1]:
import os
import argparse
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import ElasticNet
from urllib.parse import urlparse
import mlflow
import mlflow.sklearn

# Importing the dataset
from ucimlrepo import fetch_ucirepo 

In [2]:
# fetch dataset 
wine_quality = fetch_ucirepo(id=186) 
  
# data (as pandas dataframes) 
X = wine_quality.data.features 
y = wine_quality.data.targets 
  
# # metadata 
# print(wine_quality.metadata) 
  
# # variable information 
# print(wine_quality.variables) 

In [3]:
#mlflow server --backend-store-uri sqlite:///mlflow.db
mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_experiment("Wine-Quality")

2023/12/17 11:44:48 INFO mlflow.tracking.fluent: Experiment with name 'Wine-Quality' does not exist. Creating a new experiment.


<Experiment: artifact_location='/Users/mowlanicab/Desktop/Data Science/Projects/MLflow/mlruns/1', creation_time=1702793688703, experiment_id='1', last_update_time=1702793688703, lifecycle_stage='active', name='Wine-Quality', tags={}>

In [4]:
X

Unnamed: 0,fixed_acidity,volatile_acidity,citric_acid,residual_sugar,chlorides,free_sulfur_dioxide,total_sulfur_dioxide,density,pH,sulphates,alcohol
0,7.4,0.70,0.00,1.9,0.076,11.0,34.0,0.99780,3.51,0.56,9.4
1,7.8,0.88,0.00,2.6,0.098,25.0,67.0,0.99680,3.20,0.68,9.8
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.99700,3.26,0.65,9.8
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.99800,3.16,0.58,9.8
4,7.4,0.70,0.00,1.9,0.076,11.0,34.0,0.99780,3.51,0.56,9.4
...,...,...,...,...,...,...,...,...,...,...,...
6492,6.2,0.21,0.29,1.6,0.039,24.0,92.0,0.99114,3.27,0.50,11.2
6493,6.6,0.32,0.36,8.0,0.047,57.0,168.0,0.99490,3.15,0.46,9.6
6494,6.5,0.24,0.19,1.2,0.041,30.0,111.0,0.99254,2.99,0.46,9.4
6495,5.5,0.29,0.30,1.1,0.022,20.0,110.0,0.98869,3.34,0.38,12.8


In [5]:
     
def evaluate(y,pred):
    rmse = np.sqrt(mean_squared_error(y,pred))
    mae = mean_absolute_error(y,pred)
    r2 = r2_score(y,pred)

    return rmse, mae, r2

In [6]:
train_X, test_X, train_y, test_y = train_test_split(X, y)

In [9]:
alpha=0.9
l1_ratio = 0.9

with mlflow.start_run():
    
    mlflow.set_tag("delevoper","mowlanica")
    mlflow.set_tag("model","elastic-net")
    
    mlflow.log_param("alpha",alpha)
    mlflow.log_param("l1_ratio",l1_ratio)
    
    lr = ElasticNet(alpha=alpha, l1_ratio=alpha)
    lr.fit(train_X,train_y)

    pred = lr.predict(test_X)

    rmse,mae,r2 = evaluate(test_y,pred)
    
    mlflow.log_metric("rmse",rmse)
    mlflow.log_metric("mae",mae)
    mlflow.log_metric("r2",r2)
    
    mlflow.sklearn.log_model(lr,"elastic-net-lr")


    print(f"Elastic net Params: alpha: {alpha}, l1_ratio: {l1_ratio}")
    print(f"Elastic net metric: rmse:{rmse}, mae:{mae},r2:{r2}")

Elastic net Params: alpha: 0.9, l1_ratio: 0.9
Elastic net metric: rmse:0.8731463718994478, mae:0.6823939269880464,r2:0.006019757724433195


In [10]:
mlflow.sklearn.autolog()
with mlflow.start_run() :
    lr = ElasticNet(alpha=alpha, l1_ratio=alpha)
    lr.fit(train_X,train_y)

    pred = lr.predict(test_X)

    rmse,mae,r2 = evaluate(test_y,pred)

    print(f"Elastic net Params: alpha: {alpha}, l1_ratio: {l1_ratio}")
    print(f"Elastic net metric: rmse:{rmse}, mae:{mae},r2:{r2}")



Elastic net Params: alpha: 0.9, l1_ratio: 0.9
Elastic net metric: rmse:0.8731463718994478, mae:0.6823939269880464,r2:0.006019757724433195


In [11]:
!pip3 install hyperopt

Collecting hyperopt
  Downloading hyperopt-0.2.7-py2.py3-none-any.whl (1.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m300.5 kB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting networkx>=2.2 (from hyperopt)
  Downloading networkx-3.2.1-py3-none-any.whl.metadata (5.2 kB)
Collecting future (from hyperopt)
  Downloading future-0.18.3.tar.gz (840 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m840.9/840.9 kB[0m [31m154.5 kB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
[?25hCollecting tqdm (from hyperopt)
  Using cached tqdm-4.66.1-py3-none-any.whl.metadata (57 kB)
Collecting py4j (from hyperopt)
  Downloading py4j-0.10.9.7-py2.py3-none-any.whl (200 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m200.5/200.5 kB[0m [31m185.0 kB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hDownloading networkx-3.2.1-py3-none-any.whl (1.6 MB)
[2K   [90m━━━━━━━━━

In [12]:
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from hyperopt.pyll import scope

In [13]:
def objective(params):
    with mlflow.start_run():
        mlflow.set_tag("model", "Elasticnet")
        mlflow.log_params(params)
        
        lr = ElasticNet(**params)
        lr.fit(train_X,train_y)

        pred = lr.predict(test_X)

        rmse,mae,r2 = evaluate(test_y,pred)
        mlflow.log_metric("rmse", rmse)
        mlflow.log_metric("mae",mae)
        mlflow.log_metric("r2",r2)

    return {'loss': rmse, 'status': STATUS_OK}

In [14]:
search_space = { "alpha": hp.loguniform('alpha',0.01,1),
                  "l1_ratio": hp.uniform('l1_ratio',0,1)}

In [15]:
best_result = fmin(
    fn=objective,
    space=search_space,
    algo=tpe.suggest,
    max_evals=10,
    trials=Trials()
)

  0%|          | 0/10 [00:00<?, ?trial/s, best loss=?]





 10%|█         | 1/10 [00:03<00:34,  3.79s/trial, best loss: 0.8472737984627975]





 20%|██        | 2/10 [00:06<00:25,  3.23s/trial, best loss: 0.8472737984627975]





 30%|███       | 3/10 [00:08<00:19,  2.76s/trial, best loss: 0.8472737984627975]





 40%|████      | 4/10 [00:11<00:15,  2.62s/trial, best loss: 0.8472737984627975]





 50%|█████     | 5/10 [00:13<00:11,  2.39s/trial, best loss: 0.8203351727728293]





 60%|██████    | 6/10 [00:15<00:09,  2.27s/trial, best loss: 0.8203351727728293]





 70%|███████   | 7/10 [00:17<00:06,  2.11s/trial, best loss: 0.8203351727728293]





 80%|████████  | 8/10 [00:19<00:04,  2.10s/trial, best loss: 0.8203351727728293]





 90%|█████████ | 9/10 [00:21<00:02,  2.10s/trial, best loss: 0.8203351727728293]





100%|██████████| 10/10 [00:23<00:00,  2.33s/trial, best loss: 0.8203351727728293]


* Check the results in the UI

In [16]:
best_result

{'alpha': 1.631130433090343, 'l1_ratio': 0.061143063370751705}

In [17]:
!pwd

/Users/mowlanicab/Desktop/Data Science/Projects/MLflow


* Lets use all the parameters in UI for our best result

![best_params](best_params.png)

In [20]:
params= {"alpha":1.631130433090343,
"copy_X":True,
"fit_intercept":True,
"l1_ratio":0.061143063370751705,
"max_iter":1000,
"positive":False,
"precompute":False,
"random_state":None,
"selection":"cyclic",
"tol":0.0001,
"warm_start":False}

In [21]:
mlflow.sklearn.autolog()
with mlflow.start_run() :
    lr = ElasticNet(**params)
    lr.fit(train_X,train_y)

    pred = lr.predict(test_X)

    rmse,mae,r2 = evaluate(test_y,pred)

    print(f"Elastic net Params: alpha: {alpha}, l1_ratio: {l1_ratio}")
    print(f"Elastic net metric: rmse:{rmse}, mae:{mae},r2:{r2}")



Elastic net Params: alpha: 0.9, l1_ratio: 0.9
Elastic net metric: rmse:0.8203351727728293, mae:0.6377055604837236,r2:0.12262286964751701


## Model registry

In [22]:
from mlflow.tracking import MlflowClient
MLFLOW_TRACKING_URI = "sqlite:///mlflow.db"

#### Interacting with MLflow Tracking Server to extract run ids with higher accuracy

In [25]:
client = MlflowClient(tracking_uri=MLFLOW_TRACKING_URI)

# client.list_experiment() - parameter has been removed

In [26]:
from mlflow.entities import ViewType

runs = client.search_runs(
    experiment_ids='1',
    filter_string="metrics.rmse >0.7",
    run_view_type=ViewType.ACTIVE_ONLY,
    max_results=5,
    order_by=["metrics.rmse DESC"]
)

In [27]:
for run in runs:
    print(f"run id: {run.info.run_id}, rmse: {run.data.metrics['rmse']:.4f}")

run id: 8b89ab313bc4460d89874476e3316cff, rmse: 0.8757
run id: 069b370a693a4f1183a7132403b2552a, rmse: 0.8757
run id: 9362221184ae46459ef9c896d0a202e5, rmse: 0.8757
run id: 056c883f8d1c4429ac62df3d11011bc6, rmse: 0.8757
run id: df5a70d9fab2482e9bae289ec18816fb, rmse: 0.8757


### Interacting with the Model Registry


In [28]:
mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)

In [29]:
run_id = "069b370a693a4f1183a7132403b2552a"
model_uri = f"runs:/{run_id}/model"
mlflow.register_model(model_uri=model_uri, name="wine_quality")

Registered model 'wine_quality' already exists. Creating a new version of this model...
Created version '3' of model 'wine_quality'.


<ModelVersion: aliases=[], creation_timestamp=1702980686541, current_stage='None', description=None, last_updated_timestamp=1702980686541, name='wine_quality', run_id='069b370a693a4f1183a7132403b2552a', run_link=None, source=('/Users/mowlanicab/Desktop/Data '
 'Science/Projects/MLflow/mlruns/1/069b370a693a4f1183a7132403b2552a/artifacts/model'), status='READY', status_message=None, tags={}, user_id=None, version=3>

In [30]:
model_name = "wine_quality"
latest_versions = client.get_latest_versions(name=model_name)

for version in latest_versions:
    print(f"version: {version.version}, stage: {version.current_stage}")

version: 3, stage: None


  latest_versions = client.get_latest_versions(name=model_name)


In [31]:
model_version = 3
new_stage = "Production"
client.transition_model_version_stage(
    name=model_name,
    version=model_version,
    stage=new_stage,
    archive_existing_versions=True
)

  client.transition_model_version_stage(


<ModelVersion: aliases=[], creation_timestamp=1702980686541, current_stage='Production', description=None, last_updated_timestamp=1702980769782, name='wine_quality', run_id='069b370a693a4f1183a7132403b2552a', run_link=None, source=('/Users/mowlanicab/Desktop/Data '
 'Science/Projects/MLflow/mlruns/1/069b370a693a4f1183a7132403b2552a/artifacts/model'), status='READY', status_message=None, tags={}, user_id=None, version=3>

In [32]:
model_name = "wine_quality"
latest_versions = client.get_latest_versions(name=model_name)

for version in latest_versions:
    print(f"version: {version.version}, stage: {version.current_stage}")

version: 2, stage: None
version: 3, stage: Production


  latest_versions = client.get_latest_versions(name=model_name)


### Testing the model in production

In [34]:
train_x, test_x, train_y, test_y = train_test_split(X, y,random_state=42)

In [35]:
def test_model(name, stage, X_test, y_test):
    model = mlflow.pyfunc.load_model(f"models:/{name}/{stage}")
    y_pred = model.predict(X_test)
    return {"rmse": mean_squared_error(y_test, y_pred, squared=False)}

In [36]:
test_model(name=model_name, stage="Production", X_test=test_x, y_test=test_y)

  latest = client.get_latest_versions(name, None if stage is None else [stage])


{'rmse': 0.8616945738772849}