# hyperparamter tuning and tracking unsing mlflow 
- fmin - help minimize loss function
- tpe - tree based estimator help optimize parameters 
- hp - lot of functions and methods 
- STATUS_OK - every trails gets ok status 
- Trails - best estimate on how many times can we run the optimization algorithm
- score - custom logging possible 

# reference 
https://github.com/karndeepsingh/Mlflow-Tutorial/blob/main/MLFlow_Implementation.ipynb 

In [1]:
import os 
import pandas as pd
import numpy as np 
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,mean_squared_error,mean_absolute_error,r2_score
from sklearn.linear_model import ElasticNet
from urllib.parse import urlparse
import mlflow
import mlflow.sklearn

In [2]:
df=pd.read_csv('winequality-red.csv')
df.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5


In [3]:
def evaluate_metrics(actual, pred):
    rmse = np.sqrt(mean_squared_error(actual, pred))
    mae = mean_absolute_error(actual, pred)
    r2 = r2_score(actual, pred)
    return rmse, mae, r2

In [4]:
train,test=train_test_split(df,random_state=42)
xtrain=train.drop(["quality"],axis=1)
xtest=test.drop(["quality"],axis=1)
ytrain=train[["quality"]]
ytest=test[["quality"]]

In [5]:
alpha=0.6
l1_ratio=0.9
with mlflow.start_run():
    mlflow.set_tag('developer', 'amruth')
    mlflow.set_tag('model','ElsticNet')
    mlflow.log_param('alpha',alpha)
    mlflow.log_param('l1_ratio',l1_ratio)

    lr=ElasticNet(alpha=alpha,l1_ratio=l1_ratio,random_state=42)
    lr.fit(xtrain,ytrain)
    pred=lr.predict(xtest)

    rmse, mae, r2 = evaluate_metrics(ytest, pred)
    mlflow.log_metric('rmse',rmse)
    mlflow.log_metric('mae',mae)
    mlflow.log_metric('r2',r2)

    mlflow.sklearn.log_model(lr,'elastic=net-lr')

    print(f'ElasticNet net Params: {lr.get_params()}')
    print(f'RMSE: {rmse} MAE: {mae} R2: {r2}')



ElasticNet net Params: {'alpha': 0.6, 'copy_X': True, 'fit_intercept': True, 'l1_ratio': 0.9, 'max_iter': 1000, 'positive': False, 'precompute': False, 'random_state': 42, 'selection': 'cyclic', 'tol': 0.0001, 'warm_start': False}
RMSE: 0.7852398645108017 MAE: 0.6446785626602032 R2: 0.0032203771587697716


In [6]:
mlflow.sklearn.autolog()
with mlflow.start_run():
    lr=ElasticNet(alpha=0.6,l1_ratio=0.9,random_state=42)
    lr.fit(xtrain,ytrain)
    pred=lr.predict(xtest)

    rmse, mae, r2 = evaluate_metrics(ytest, pred)

    print(f'ElasticNet net Params: {lr.get_params()}')
    print(f'RMSE: {rmse} MAE: {mae} R2: {r2}')



ElasticNet net Params: {'alpha': 0.6, 'copy_X': True, 'fit_intercept': True, 'l1_ratio': 0.9, 'max_iter': 1000, 'positive': False, 'precompute': False, 'random_state': 42, 'selection': 'cyclic', 'tol': 0.0001, 'warm_start': False}
RMSE: 0.7852398645108017 MAE: 0.6446785626602032 R2: 0.0032203771587697716


In [8]:
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
import numpy as np
from hyperopt.pyll import scope
import mlflow
from sklearn.linear_model import ElasticNet
#from sklearn.metrics import ev

In [9]:
def objective(params):
    """ 
    :params params: dict of hyperparameters
    """
    with mlflow.start_run():
        mlflow.set_tag("model", "ElasticNet")
        mlflow.log_params(params)

        lr=ElasticNet(**params)
        lr.fit(xtrain,ytrain)
        ypred=lr.predict(xtest)
        rmse, mse, r2 = evaluate_metrics(ytest,ypred)
        mlflow.log_metric("rmse",rmse)
        mlflow.log_metric("mse",mse)    
        mlflow.log_metric("r2",r2)

    return {'loss': rmse, 'status': STATUS_OK}   

In [10]:
search_space = {
    'alpha': hp.uniform('alpha', 0.01, 1),
    'l1_ratio': hp.uniform('l1_ratio', 0, 1),
}

In [33]:
mlflow.set_experiment('hyperopt_testing')
best_result = fmin(
    fn=objective, 
    space=search_space,
    algo=tpe.suggest,
    max_evals=10,
    trials=Trials()
)

100%|██████████| 10/10 [01:10<00:00,  7.07s/trial, best loss: 0.6926364835460415]


In [12]:
best_result

{'alpha': np.float64(0.09538133520940972),
 'l1_ratio': np.float64(0.6132104939349722)}

In [13]:
params= {"alpha":1.43098798581676,
"copy_X":True,
"fit_intercept":True,
"l1_ratio":0.003137093485394149,
"max_iter":1000,
"positive":False,
"precompute":False,
"random_state":None,
"selection":"cyclic",
"tol":0.0001,
"warm_start":False}

In [30]:
mlflow.sklearn.autolog()
with mlflow.start_run(run_name='hyperopt'):
    lr=ElasticNet(**params)
    lr.fit(xtrain,ytrain)
    pred=lr.predict(xtest)

    rmse, mae, r2 = evaluate_metrics(ytest, pred)

    print(f'ElasticNet net Params: {lr.get_params()}')
    print(f'RMSE: {rmse} MAE: {mae} R2: {r2}')



ElasticNet net Params: {'alpha': 1.43098798581676, 'copy_X': True, 'fit_intercept': True, 'l1_ratio': 0.003137093485394149, 'max_iter': 1000, 'positive': False, 'precompute': False, 'random_state': None, 'selection': 'cyclic', 'tol': 0.0001, 'warm_start': False}
RMSE: 0.715046298073532 MAE: 0.5735208349754253 R2: 0.17346205261746928


# model registry

In [39]:
from mlflow import MlflowClient
import mlflow 
MLFLOW_TRACKING_URI='http://127.0.0.1:5000'

# Interacting with mlflow tracking server to extract run id's with higher accuracy

In [40]:
client=MlflowClient(tracking_uri=MLFLOW_TRACKING_URI)
client.search_experiments()

[<Experiment: artifact_location='file:///d:/pythonProjects/MLOps_Krish_Naik/3.MLflow/MLproject/mlruns/720724581667782363', creation_time=1735307787258, experiment_id='720724581667782363', last_update_time=1735307787258, lifecycle_stage='active', name='hyperopt_testing', tags={}>,
 <Experiment: artifact_location='file:///d:/pythonProjects/MLOps_Krish_Naik/3.MLflow/MLproject/mlruns/0', creation_time=1735273908936, experiment_id='0', last_update_time=1735273908936, lifecycle_stage='active', name='Default', tags={}>]

In [43]:
from mlflow.entities import ViewType

runs=client.search_runs(experiment_ids='720724581667782363',
                        filter_string='metrics.rmse>0.7',
                        run_view_type=ViewType.ACTIVE_ONLY,
                        max_results=5,
                        order_by=['metrics.rmse DESC']
                        )


In [45]:
for run in runs:
    print(f'runid: {run.info.run_id} rmse: {run.data.metrics['rmse']:.4f}')

runid: 7e9009fb15f54a0bb424ab524257be33 rmse: 0.7852
runid: 6fe606808485415598fd19ec0d4b7534 rmse: 0.7850
runid: 6b39154a6c444e48aaa62607f58bed55 rmse: 0.7694
runid: f33cb2d344d14b76ade07a35e2ce1ff1 rmse: 0.7648
runid: 6657b3e8e99940768cf45bd2651a251a rmse: 0.7488


# Interacting with the model registry

In [60]:
mlflow.set_tracking_uri('http://127.0.0.1:5000')
client=MlflowClient()
run_id='b71cf385d7574636ab9d87a18ef6da3b'
model_uri=f'runs:/{run_id}/model'
mlflow.register_model(model_uri=model_uri,name='wine_quality')

Registered model 'wine_quality' already exists. Creating a new version of this model...
2024/12/27 09:40:19 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: wine_quality, version 7
Created version '7' of model 'wine_quality'.


<ModelVersion: aliases=[], creation_timestamp=1735314019859, current_stage='None', description='', last_updated_timestamp=1735314019859, name='wine_quality', run_id='b71cf385d7574636ab9d87a18ef6da3b', run_link='', source='file:///d:/pythonProjects/MLOps_Krish_Naik/3.MLflow/MLproject/mlruns/720724581667782363/b71cf385d7574636ab9d87a18ef6da3b/artifacts/model', status='READY', status_message='', tags={}, user_id='', version='7'>

In [55]:
model_name='wine_quality'
latest_versions=client.get_latest_versions(name=model_name)

for version in latest_versions:
    print(f'Version: {version.version} tags: {version.tags}')

Version: 2 tags: {}


  latest_versions=client.get_latest_versions(name=model_name)


In [64]:
client.set_model_version_tag(name='wine_quality',version='6',key='model_stage',value='testing')
client.set_model_version_tag(name='wine_quality',version='7',key='model_stage',value='production')

# test the model in production

In [66]:
def test_model(name,version,xtest,ytest):
    model_uri=f'models:/{name}/{version}'
    model=mlflow.sklearn.load_model(model_uri)
    pred=model.predict(xtest)
    rmse, mae, r2 = evaluate_metrics(ytest, pred)
    print(f'RMSE: {rmse} MAE: {mae} R2: {r2}')

In [69]:
test_model(name=model_name, version="7", xtest=xtest, ytest=ytest)

RMSE: 0.6926364835460415 MAE: 0.5527260739258862 R2: 0.22445821940647515
