In [1]:
import os
os.getcwd()

'/home/kousik/ALL_OTHERS/practice_projects/mlops-with-mlflow/notebooks'

In [2]:
os.chdir('../')

In [3]:
os.getcwd()

'/home/kousik/ALL_OTHERS/practice_projects/mlops-with-mlflow'

In [4]:
import os
import warnings
import sys
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import ElasticNet
from src.get_data import read_params
import argparse
import joblib
import json
import mlflow
from urllib.parse import urlparse

In [5]:
def eval_metrics(actual, pred):
    rmse = np.sqrt(mean_squared_error(actual, pred))
    mae = mean_absolute_error(actual, pred)
    r2 = r2_score(actual, pred)
    return rmse, mae, r2


In [6]:
read_params(config_path="params.yaml")

{'base': {'project': 'winequality-project',
  'random_state': 42,
  'target_col': 'TARGET'},
 'data_source': {'s3_source': 'data_given/winequality.csv'},
 'load_data': {'raw_dataset_csv': 'data/raw/winequality.csv'},
 'split_data': {'train_path': 'data/processed/train_winequality.csv',
  'test_path': 'data/processed/test_winequality.csv',
  'test_size': 0.2},
 'estimators': {'ElasticNet': {'params': {'alpha': 0.88, 'l1_ratio': 0.89}}},
 'model_dir': 'saved_models',
 'reports': {'params': 'report/params.json', 'scores': 'report/scores.json'},
 'webapp_model_dir': 'prediction_service/model/model.joblib',
 'mlflow_config': {'artifacts_dir': 'artifacts',
  'experiment_name': 'ElasticNet regression',
  'run_name': 'mlops',
  'registered_model_name': 'ElasticNetWineModel',
  'remote_server_uri': 'http://0.0.0.0:5000'}}

In [30]:
config = read_params(config_path="params.yaml")
test_data_path = config["split_data"]["test_path"]
train_data_path = config["split_data"]["train_path"]
random_state = config["base"]["random_state"]
model_dir = config["model_dir"]

alpha = config["estimators"]["ElasticNet"]["params"]["alpha"]
l1_ratio = config["estimators"]["ElasticNet"]["params"]["l1_ratio"]

target = [config["base"]["target_col"]]

train = pd.read_csv(train_data_path, sep=",")
test = pd.read_csv(test_data_path, sep=",")

train_y = train[target]
test_y = test[target]

train_x = train.drop(target, axis=1)
test_x = test.drop(target, axis=1)

In [31]:
train_x.columns

Index(['fixed_acidity', 'volatile_acidity', 'citric_acid', 'residual_sugar',
       'chlorides', 'free_sulfur_dioxide', 'total_sulfur_dioxide', 'density',
       'pH', 'sulphates', 'alcohol'],
      dtype='object')

## MLFLOW


In [32]:
mlflow_config = config["mlflow_config"]
mlflow_config

{'artifacts_dir': 'artifacts',
 'experiment_name': 'ElasticNet regression',
 'run_name': 'mlops',
 'registered_model_name': 'ElasticNetWineModel',
 'remote_server_uri': 'http://0.0.0.0:5000'}

In [33]:
mlflow_config = config["mlflow_config"]
remote_server_uri = mlflow_config["remote_server_uri"]

In [34]:
# ?mlflow.set_tracking_uri

In [35]:
mlflow.set_tracking_uri(remote_server_uri)

tracking_uri = mlflow.get_tracking_uri()
print(f"Current tracking uri: {tracking_uri}")


Current tracking uri: http://0.0.0.0:5000


In [36]:
mlflow.get_artifact_uri()

'mlflow-artifacts:/546033799678305098/7bde9a6978474518b69aad96c4846871/artifacts'

In [37]:
urlparse(mlflow.get_artifact_uri())

ParseResult(scheme='mlflow-artifacts', netloc='', path='/546033799678305098/7bde9a6978474518b69aad96c4846871/artifacts', params='', query='', fragment='')

In [38]:
 mlflow.end_run()

In [39]:
mlflow_config = config["mlflow_config"]
remote_server_uri = mlflow_config["remote_server_uri"]

mlflow.set_tracking_uri(remote_server_uri)

mlflow.set_experiment(mlflow_config["experiment_name"])

with mlflow.start_run(run_name=mlflow_config["run_name"]) as mlops_run:
    lr = ElasticNet(
        alpha=alpha, 
        l1_ratio=l1_ratio, 
        random_state=random_state)
    lr.fit(train_x, train_y)

    predicted_qualities = lr.predict(test_x)

    (rmse, mae, r2) = eval_metrics(test_y, predicted_qualities)

    mlflow.log_param("alpha", alpha)
    mlflow.log_param("l1_ratio", l1_ratio)

    mlflow.log_metric("rmse", rmse)
    mlflow.log_metric("mae", mae)
    mlflow.log_metric("r2", r2)

    tracking_url_type_store = urlparse(mlflow.get_artifact_uri()).scheme

    if tracking_url_type_store != "file":
        mlflow.sklearn.log_model(
            lr, 
            "model", 
            registered_model_name=mlflow_config["registered_model_name"])
    else:
        mlflow.sklearn.load_model(lr, "model")


    # tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme

    # if tracking_url_type_store != "file":
    #     mlflow.sklearn.log_model(lr, "model", registered_model_name=mlflow_config['registered_model_name'])
    # else:
    #     mlflow.sklearn.log_model(lr, "model")

Registered model 'ElasticNetWineModel' already exists. Creating a new version of this model...
2023/10/16 19:54:40 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: ElasticNetWineModel, version 2
Created version '2' of model 'ElasticNetWineModel'.


### MLFLOW Model Production

In [40]:
from src.get_data import read_params
import argparse
import mlflow
from mlflow.tracking import MlflowClient
from pprint import pprint
import joblib
import os

In [41]:
mlflow_config = config["mlflow_config"] 
model_name = mlflow_config["registered_model_name"]
model_name

'ElasticNetWineModel'

In [42]:
remote_server_uri = mlflow_config["remote_server_uri"]
mlflow.set_tracking_uri(remote_server_uri)

In [43]:
mlflow.get_tracking_uri()

'http://0.0.0.0:5000'

In [44]:
mlflow.search_runs(experiment_ids=[0]) 

Unnamed: 0,run_id,experiment_id,status,artifact_uri,start_time,end_time,tags.mlflow.runName,tags.mlflow.source.name,tags.mlflow.user,tags.mlflow.source.type
0,6fa91945a68f4e5eab2ea870db021fbf,0,FINISHED,mlflow-artifacts:/0/6fa91945a68f4e5eab2ea870db...,2023-10-16 14:12:06.925000+00:00,2023-10-16 14:13:57.321000+00:00,ambitious-kit-377,/home/kousik/anaconda3/lib/python3.11/site-pac...,kousik,LOCAL


In [45]:
mlflow.get_experiment_by_name(name=mlflow_config["experiment_name"]).experiment_id

'546033799678305098'

In [46]:
experiment_id = mlflow.get_experiment_by_name(name=mlflow_config["experiment_name"]).experiment_id
mlflow.search_runs(experiment_ids= experiment_id) 

Unnamed: 0,run_id,experiment_id,status,artifact_uri,start_time,end_time,metrics.r2,metrics.mae,metrics.rmse,params.l1_ratio,params.alpha,tags.mlflow.runName,tags.mlflow.log-model.history,tags.mlflow.user,tags.mlflow.source.type,tags.mlflow.source.name
0,8859d9c293b54236b190b7aa83d8cc2a,546033799678305098,FINISHED,mlflow-artifacts:/546033799678305098/8859d9c29...,2023-10-16 14:24:38.116000+00:00,2023-10-16 14:24:40.807000+00:00,0.10964,0.618863,0.762795,0.5,0.5,mlops,"[{""run_id"": ""8859d9c293b54236b190b7aa83d8cc2a""...",kousik,LOCAL,/home/kousik/anaconda3/lib/python3.11/site-pac...
1,7bde9a6978474518b69aad96c4846871,546033799678305098,FINISHED,mlflow-artifacts:/546033799678305098/7bde9a697...,2023-10-16 14:24:36.816000+00:00,2023-10-16 14:24:37.596000+00:00,,,,,,capable-zebra-79,,kousik,LOCAL,/home/kousik/anaconda3/lib/python3.11/site-pac...
2,1526e10983084bd1b46503e54c538980,546033799678305098,FINISHED,mlflow-artifacts:/546033799678305098/1526e1098...,2023-10-16 14:13:57.948000+00:00,2023-10-16 14:14:00.873000+00:00,0.008382,0.659818,0.805002,0.89,0.88,mlops,"[{""run_id"": ""1526e10983084bd1b46503e54c538980""...",kousik,LOCAL,/home/kousik/anaconda3/lib/python3.11/site-pac...


In [47]:
runs = mlflow.search_runs(experiment_ids= experiment_id)

In [48]:
lowest = runs["metrics.mae"].sort_values().iloc[0]
lowest

0.6188630472018415

In [49]:
lowest_run_id = runs[runs["metrics.mae"] == lowest]["run_id"].iloc[0]
lowest_run_id

'8859d9c293b54236b190b7aa83d8cc2a'

In [50]:
client = MlflowClient()
for mv in client.search_model_versions(f"name='{model_name}'"):
    mv = dict(mv)
    print(mv)

{'aliases': [], 'creation_timestamp': 1697466280797, 'current_stage': 'None', 'description': '', 'last_updated_timestamp': 1697466280797, 'name': 'ElasticNetWineModel', 'run_id': '8859d9c293b54236b190b7aa83d8cc2a', 'run_link': '', 'source': 'mlflow-artifacts:/546033799678305098/8859d9c293b54236b190b7aa83d8cc2a/artifacts/model', 'status': 'READY', 'status_message': '', 'tags': {}, 'user_id': '', 'version': '2'}
{'aliases': [], 'creation_timestamp': 1697465640866, 'current_stage': 'Production', 'description': '', 'last_updated_timestamp': 1697465674849, 'name': 'ElasticNetWineModel', 'run_id': '1526e10983084bd1b46503e54c538980', 'run_link': '', 'source': 'mlflow-artifacts:/546033799678305098/1526e10983084bd1b46503e54c538980/artifacts/model', 'status': 'READY', 'status_message': '', 'tags': {}, 'user_id': '', 'version': '1'}


In [51]:
client = MlflowClient()
for mv in client.search_model_versions(f"name='{model_name}'"):
    mv = dict(mv)

    if mv["run_id"] == lowest_run_id:
        current_version = mv["version"]
        logged_model = mv["source"]
        pprint(mv, indent=4)
        client.transition_model_version_stage(
            name=model_name,
            version=current_version,
            stage="Production"
        )
    else:
        current_version = mv["version"]
        client.transition_model_version_stage(
            name=model_name,
            version=current_version,
            stage="Staging"
        )        

######### Load the Production model #################
loaded_model = mlflow.pyfunc.load_model(logged_model)

model_path = config["webapp_model_dir"] #"prediction_service/model"

joblib.dump(loaded_model, model_path)

{   'aliases': [],
    'creation_timestamp': 1697466280797,
    'current_stage': 'None',
    'description': '',
    'last_updated_timestamp': 1697466280797,
    'name': 'ElasticNetWineModel',
    'run_id': '8859d9c293b54236b190b7aa83d8cc2a',
    'run_link': '',
    'source': 'mlflow-artifacts:/546033799678305098/8859d9c293b54236b190b7aa83d8cc2a/artifacts/model',
    'status': 'READY',
    'status_message': '',
    'tags': {},
    'user_id': '',
    'version': '2'}


Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

['prediction_service/model/model.joblib']