In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns


In [2]:
# importing data for testing any one of the csv files content
df = pd.read_csv('./data/diamonds/diamonds.csv', index_col=0)
print(df.head())
df.shape

   carat      cut color clarity  depth  table  price     x     y     z
1   0.23    Ideal     E     SI2   61.5   55.0    326  3.95  3.98  2.43
2   0.21  Premium     E     SI1   59.8   61.0    326  3.89  3.84  2.31
3   0.23     Good     E     VS1   56.9   65.0    327  4.05  4.07  2.31
4   0.29  Premium     I     VS2   62.4   58.0    334  4.20  4.23  2.63
5   0.31     Good     J     SI2   63.3   58.0    335  4.34  4.35  2.75


(53940, 10)

In [3]:
df.describe()

Unnamed: 0,carat,depth,table,price,x,y,z
count,53940.0,53940.0,53940.0,53940.0,53940.0,53940.0,53940.0
mean,0.79794,61.749405,57.457184,3932.799722,5.731157,5.734526,3.538734
std,0.474011,1.432621,2.234491,3989.439738,1.121761,1.142135,0.705699
min,0.2,43.0,43.0,326.0,0.0,0.0,0.0
25%,0.4,61.0,56.0,950.0,4.71,4.72,2.91
50%,0.7,61.8,57.0,2401.0,5.7,5.71,3.53
75%,1.04,62.5,59.0,5324.25,6.54,6.54,4.04
max,5.01,79.0,95.0,18823.0,10.74,58.9,31.8


In [4]:
df.dtypes

carat      float64
cut         object
color       object
clarity     object
depth      float64
table      float64
price        int64
x          float64
y          float64
z          float64
dtype: object

In [5]:
diamonds = df.copy()
print("Unique values in Cut Columns: ", diamonds['cut'].unique())
print("Unique values in Color Columns: ", diamonds['color'].unique())
print("Unique values in Clarity Columns: ", diamonds['clarity'].unique())

Unique values in Cut Columns:  ['Ideal' 'Premium' 'Good' 'Very Good' 'Fair']
Unique values in Color Columns:  ['E' 'I' 'J' 'H' 'F' 'G' 'D']
Unique values in Clarity Columns:  ['SI2' 'SI1' 'VS1' 'VS2' 'VVS2' 'VVS1' 'I1' 'IF']


In [6]:
# create dictionaries for categorical variables
cut_dict = {'Fair':1, 'Good':2, 'Very Good':3, 'Premium':4, 'Ideal':5}
color_dict = {'J':1, 'I':2, 'H':3, 'G':4, 'F':5, 'E':6, 'D':7}
clarity_dict = {'I1':1, 'SI2':2, 'SI1':3, 'VS2':4, 'VS1':5, 'VVS2':6, 'VVS1':7, 'IF':8}

In [7]:
# map the dictionaries to the categorical variables
diamonds['cut'] = diamonds['cut'].map(cut_dict)
diamonds['color'] = diamonds['color'].map(color_dict)
diamonds['clarity'] = diamonds['clarity'].map(clarity_dict)
diamonds.head()

Unnamed: 0,carat,cut,color,clarity,depth,table,price,x,y,z
1,0.23,5,6,2,61.5,55.0,326,3.95,3.98,2.43
2,0.21,4,6,3,59.8,61.0,326,3.89,3.84,2.31
3,0.23,2,6,5,56.9,65.0,327,4.05,4.07,2.31
4,0.29,4,2,4,62.4,58.0,334,4.2,4.23,2.63
5,0.31,2,1,2,63.3,58.0,335,4.34,4.35,2.75


In [8]:
# checking null values
diamonds.isnull().sum()

carat      0
cut        0
color      0
clarity    0
depth      0
table      0
price      0
x          0
y          0
z          0
dtype: int64

In [9]:
from sklearn.utils import shuffle
from sklearn import preprocessing
from sklearn.model_selection import train_test_split

In [10]:
# shuffle the dataset
diamonds = shuffle(diamonds)

In [11]:
# split the dataset into train and test
X_train_diamonds, X_test_diamonds, y_train_diamonds, y_test_diamonds = train_test_split(diamonds.drop('price', axis=1),
                                                                                        diamonds['price'], test_size=0.2,
                                                                                        random_state=42)

In [12]:
# scale the data
X_train_diamonds_scaled = preprocessing.scale(X_train_diamonds)
X_test_diamonds_scaled = preprocessing.scale(X_test_diamonds)

In [13]:
train_x = X_train_diamonds_scaled
train_y = y_train_diamonds
validation_x = X_test_diamonds_scaled
validation_y = y_test_diamonds

print(type(train_x), type(validation_x))
# train_y = np.array(train_y)
# validation_y = np.array(validation_y)
print(type(train_y), type(validation_y))
print(train_x.shape)
print(train_y.shape)

<class 'numpy.ndarray'> <class 'numpy.ndarray'>
<class 'pandas.core.series.Series'> <class 'pandas.core.series.Series'>
(43152, 9)
(43152,)


### XGBoost Regression

In [14]:
import xgboost as xgb
import mlflow

MLFLOW_TRACKING_URI = "sqlite:///mlflow.db"
MLFLOW_EXPERIMENT_NAME = "diamonds-price-experiment"

mlflow.set_tracking_uri(uri=MLFLOW_TRACKING_URI)
mlflow.set_experiment(experiment_name=MLFLOW_EXPERIMENT_NAME)

<Experiment: artifact_location='./mlruns/2', creation_time=1691677228447, experiment_id='2', last_update_time=1691677228447, lifecycle_stage='active', name='diamonds-price-experiment', tags={}>

In [15]:
# Create regression matrices
dtrain_reg = xgb.DMatrix(train_x, label=train_y)
dtest_reg = xgb.DMatrix(validation_x, label=validation_y)

In [16]:
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.model_selection import KFold, cross_val_score

from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from hyperopt.pyll import scope
from mlflow import xgboost, sklearn

In [17]:
def objective(params):
    with mlflow.start_run():
        mlflow.set_tag("model", "xgboost")
        mlflow.log_params(params)

        booster = xgb.train(
            params=params,
            dtrain=dtrain_reg,
            num_boost_round=10000,
            evals=[(dtest_reg, "validation")],
            early_stopping_rounds=15
        )
        y_pred = booster.predict(dtest_reg)
        mae = mean_absolute_error(validation_y, y_pred)
        rmse = mean_squared_error(validation_y, y_pred, squared=False)
        score = booster.best_score
        mlflow.log_metric("rmse", rmse)
        mlflow.log_metric("mae", mae)
        mlflow.log_metric("accuracy", score)

    return {'loss': rmse, 'status': STATUS_OK}

In [18]:
search_space = {
    'max_depth': scope.int(hp.quniform('max_depth', 2, 25, 1)),
    'learning_rate': hp.loguniform('learning_rate', -3, 0),
    'reg_alpha': hp.loguniform('reg_alpha', -5, -1),
    'reg_lambda': hp.loguniform('reg_lambda', -6, -1),
    'min_child_weight': hp.loguniform('min_child_weight', -1, 3),
    'objective': hp.choice('objective', ['reg:squarederror','reg:linear']),              # 'reg:squarederror',
    'tree_method': 'gpu_hist',
    'seed': 42
}

best_result = fmin(
    fn=objective,
    space=search_space,
    algo=tpe.suggest,
    max_evals=20,
    trials=Trials()
)

[0]	validation-rmse:4938.02477                      
[1]	validation-rmse:4374.16254                      
[2]	validation-rmse:3899.93807                      
[3]	validation-rmse:3513.46774                      
[4]	validation-rmse:3186.21681                      
[5]	validation-rmse:2917.76819                      
[6]	validation-rmse:2692.15282                      
[7]	validation-rmse:2509.51179                      
[8]	validation-rmse:2361.32079                      
[9]	validation-rmse:2238.12708                      
[10]	validation-rmse:2139.44473                     
[11]	validation-rmse:2046.08032                     
[12]	validation-rmse:1974.53540                     
[13]	validation-rmse:1917.24568                     
[14]	validation-rmse:1874.95469                     
[15]	validation-rmse:1840.85431                     
[16]	validation-rmse:1803.13823                     
[17]	validation-rmse:1780.27380                     
[18]	validation-rmse:1765.25708               

### ElasticNet Regression

In [19]:
from sklearn.linear_model import ElasticNet

In [20]:
def evaluate(y, pred):
    rmse = mean_squared_error(y, pred, squared=False)
    mae = mean_absolute_error(y, pred)
    r2 = r2_score(y, pred)
    return rmse, mae, r2

In [33]:
def objective_EN(params):
    with mlflow.start_run():
        mlflow.set_tag("model", "ElasticNet")
        mlflow.log_params(params)

        lr = ElasticNet(**params)
        lr.fit(train_x,train_y)

        pred = lr.predict(validation_x)

        rmse,mae,r2 = evaluate(validation_y, pred)
        score = lr.score(validation_x, validation_y)
        mlflow.log_metric("accuracy", score)
        mlflow.log_metric("rmse", rmse)
        mlflow.log_metric("mae",mae)
        mlflow.log_metric("r2",r2)

    return {'loss': rmse, 'status': STATUS_OK}

In [34]:
search_space_EN = {
    'alpha' : hp.loguniform('alpha', -3, 1),
    'l1_ratio' : hp.uniform('l1_ratio', 0, 1)
}

In [35]:
best_result_EN = fmin(
    fn=objective_EN,
    space=search_space_EN,
    algo=tpe.suggest,
    max_evals=20,
    trials=Trials()
)

100%|██████████| 20/20 [00:31<00:00,  1.59s/it, best loss: 1211.5176534516222]


### SGD Regressor

In [36]:
from sklearn.linear_model import SGDRegressor

In [38]:
def objective_SGD(params):
    with mlflow.start_run():
        mlflow.set_tag("model", "SGDRegressor")
        mlflow.log_params(params)

        lr = SGDRegressor(**params)
        lr.fit(train_x, train_y)

        pred = lr.predict(validation_x)

        rmse,mae,r2 = evaluate(validation_y, pred)
        score = lr.score(validation_x, validation_y)
        mlflow.log_metric("accuracy", score)
        mlflow.log_metric("rmse", rmse)
        mlflow.log_metric("mae",mae)
        mlflow.log_metric("r2",r2)

    return {'loss': rmse, 'status': STATUS_OK}

In [39]:
search_space_SGD = {
    "penalty": hp.choice('penalty', ['l1', 'l2']),
    "alpha": hp.loguniform('alpha', -3, 1),
    "learning_rate": hp.choice('learning_rate', ["optimal", "adaptive"])
}

In [40]:
best_result_SGD = fmin(
    fn=objective_SGD,
    space=search_space_SGD,
    algo=tpe.suggest,
    max_evals=20,
    trials=Trials()
)

  0%|          | 0/20 [00:00<?, ?it/s, best loss: ?]




  5%|▌         | 1/20 [00:05<01:49,  5.77s/it, best loss: 1436.9637988958848]




 10%|█         | 2/20 [00:10<01:37,  5.41s/it, best loss: 1436.9637988958848]




 70%|███████   | 14/20 [00:31<00:08,  1.45s/it, best loss: 1208.3368870924082]




 75%|███████▌  | 15/20 [00:36<00:11,  2.29s/it, best loss: 1208.3368870924082]




 95%|█████████▌| 19/20 [00:44<00:01,  1.89s/it, best loss: 1208.3368870924082]




100%|██████████| 20/20 [00:49<00:00,  2.49s/it, best loss: 1208.3368870924082]


### Support Vector Regression

In [41]:
from sklearn.svm import SVR

In [42]:
def objective_SVR(params):
    with mlflow.start_run():
        mlflow.set_tag("model", "SVR")
        mlflow.log_params(params)

        lr = SVR(**params)
        lr.fit(train_x, train_y)

        pred = lr.predict(validation_x)

        rmse,mae,r2 = evaluate(validation_y, pred)
        score = lr.score(validation_x, validation_y)
        mlflow.log_metric("accuracy", score)
        mlflow.log_metric("rmse", rmse)
        mlflow.log_metric("mae",mae)
        mlflow.log_metric("r2",r2)

    return {'loss': rmse, 'status': STATUS_OK}

In [45]:
search_space_SVR = {
    "kernel": hp.choice('kernel', ["linear", "rbf"]),
    "C": hp.choice('svm_C', [10, 100, 1000]),
    "gamma": hp.choice('gamma', ["scale", "auto"])
}

In [46]:
best_result_SVR = fmin(
    fn=objective_SVR,
    space=search_space_SVR,
    algo=tpe.suggest,
    max_evals=20,
    trials=Trials()
)

100%|██████████| 20/20 [48:55<00:00, 146.78s/it, best loss: 647.0305585468889] 


## Model selection and registry

### Model tracking

In [47]:
from mlflow.tracking import MlflowClient
from mlflow.entities import ViewType

In [48]:
ml_flow_client = MlflowClient(tracking_uri=MLFLOW_TRACKING_URI)
ml_flow_client.search_experiments()

[<Experiment: artifact_location='./mlruns/2', creation_time=1691677228447, experiment_id='2', last_update_time=1691677228447, lifecycle_stage='active', name='diamonds-price-experiment', tags={}>,
 <Experiment: artifact_location='./mlruns/1', creation_time=1691571810964, experiment_id='1', last_update_time=1691571810964, lifecycle_stage='active', name='MSFT-stock-experiment', tags={}>,
 <Experiment: artifact_location='./mlruns/0', creation_time=1691571810833, experiment_id='0', last_update_time=1691571810833, lifecycle_stage='active', name='Default', tags={}>]

In [50]:
runs = ml_flow_client.search_runs(
    experiment_ids='2',
    filter_string="metrics.accuracy > 0.9075",
    run_view_type=ViewType.ACTIVE_ONLY,
    max_results=15,
    order_by=["metrics.rmse ASC"]
)
print(f"Number of runs where accuracy is less than 0.9075 is: {len(runs)}\n-----------------")
runs[0:3]

Number of runs where RSME is less than 0.475 is: 15
-----------------


[<Run: data=<RunData: metrics={'accuracy': 0.9737062301668077,
  'mae': 357.8287960118412,
  'r2': 0.9737062301668077,
  'rmse': 647.0305585468889}, params={'C': '1000', 'gamma': 'auto', 'kernel': 'rbf'}, tags={'mlflow.runName': 'casual-mare-969',
  'mlflow.source.name': 'D:\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py',
  'mlflow.source.type': 'LOCAL',
  'mlflow.user': 'Alienware',
  'model': 'SVR'}>, info=<RunInfo: artifact_uri='./mlruns/2/c5cfa3fd18a04e85a2a28c03147ded4e/artifacts', end_time=1691688317913, experiment_id='2', lifecycle_stage='active', run_id='c5cfa3fd18a04e85a2a28c03147ded4e', run_name='casual-mare-969', run_uuid='c5cfa3fd18a04e85a2a28c03147ded4e', start_time=1691688118252, status='FINISHED', user_id='Alienware'>>,
 <Run: data=<RunData: metrics={'accuracy': 0.9737062279702726,
  'mae': 357.8289109229976,
  'r2': 0.9737062279702726,
  'rmse': 647.0305855727838}, params={'C': '1000', 'gamma': 'scale', 'kernel': 'rbf'}, tags={'mlflow.runName': 'merciful-cod-391

In [51]:
for run in runs:
    print(f"model name: {run.data.tags['model']}, run id: {run.info.run_id}, rmse: {run.data.metrics['rmse']:.4f}")

model name: SVR, run id: c5cfa3fd18a04e85a2a28c03147ded4e, rmse: 647.0306
model name: SVR, run id: d44eeedfec87408f97e685e0d1298a90, rmse: 647.0306
model name: SVR, run id: 6e4bf6a9addd4c8088f38b0ea1eb964c, rmse: 647.0306
model name: SVR, run id: a1c9ebceb33649d9903e4a0c9d1c0858, rmse: 767.6516
model name: SVR, run id: 75979e6de79342fb85f56b2eee0e5b8a, rmse: 767.6516
model name: SVR, run id: 6346b6f245ff44508148e9855c0ce06e, rmse: 767.6516
model name: SVR, run id: 178d44e2f4ef41d9b8e2771415dc67cd, rmse: 767.6516
model name: SGDRegressor, run id: eea096ac3c0f4ab39d861688c98bc945, rmse: 1208.3369
model name: SGDRegressor, run id: fcb45f74c1384abbbaba78a8991cf027, rmse: 1208.8362
model name: SGDRegressor, run id: ec72ad16500d4025b7f6519a3703cbc9, rmse: 1208.9492
model name: SGDRegressor, run id: 7c60112dfac14aa6bb55ef9400081b93, rmse: 1210.4206
model name: SGDRegressor, run id: a6734efedd9a40ef825026d32cad7e9b, rmse: 1210.9508
model name: ElasticNet, run id: 53ef8901f8bb4002b53d8a7422a834

- We get the top 15 models above based on top 12 low RMSE values of those models
- For our MSFT stocks data, we found out that SGDRegressor, SVR and ElasticNet models yields low RMSE values.
- Below we will re-train the respective models using the best params found in the above runs for them.

In [52]:
# acquiring best parameters for each model
interested_run_indices = [0,        # SVR
                          7,        # SGDRegressor
                          12]       # ElasticNet

params_dict = {}
models_dict = {}

In [53]:
for i in interested_run_indices:
    model_name_extracted = runs[i].data.tags['model']
    params_dict = runs[i].data.params
    run_id = runs[i].info.run_id
    key = model_name_extracted + f'_r{i}'
    models_dict[key] = {
        'run_id': run_id,
        'params': params_dict
    }

models_dict

{'SVR_r0': {'run_id': 'c5cfa3fd18a04e85a2a28c03147ded4e',
  'params': {'C': '1000', 'gamma': 'auto', 'kernel': 'rbf'}},
 'SGDRegressor_r7': {'run_id': 'eea096ac3c0f4ab39d861688c98bc945',
  'params': {'alpha': '0.9261551556523054',
   'learning_rate': 'adaptive',
   'penalty': 'l1'}},
 'ElasticNet_r12': {'run_id': '53ef8901f8bb4002b53d8a7422a83497',
  'params': {'alpha': '0.05069452355618102',
   'l1_ratio': '0.9964817442518504'}}}

In [54]:
# training the models with best parameters and saving these newly trained models as artifacts in MLFlow
def formatting_dictionary_values(dict):
    for k, v in dict.items():
        try:
            dict[k] = float(v)
        except ValueError:
            dict[k] = v
    return dict

In [55]:
def training_model_with_best_params(model_name: str, best_params: dict):
    if model_name == 'SGDRegressor':
        reg = SGDRegressor(**best_params)
    elif model_name == 'SVR':
        reg = SVR(**best_params)
    elif model_name.lower() == 'elasticnet':
        reg = ElasticNet(**best_params)
    else:
        print("Not a valid model name")
        raise
    return reg

In [57]:
# all the best models are from sklearn library so will be using mlflow.sklearn.log_model() for saving the artifacts here
for key, value in models_dict.items():
    # print(key, '\n', value)
    model_name_to_train = key.split('_')[0]
    best_params_for_model = formatting_dictionary_values(dict=value['params'])
    with mlflow.start_run():
        mlflow.set_tag("model", f"{model_name_to_train}_best")
        mlflow.log_params(best_params_for_model)

        reg = training_model_with_best_params(model_name=model_name_to_train,
                                              best_params=best_params_for_model)
        print(reg)
        reg.fit(train_x, train_y)

        pred = reg.predict(validation_x)
        sklearn.log_model(sk_model=reg, artifact_path='model_artifact')

        rmse, mae, r2 = evaluate(validation_y, pred)
        score = reg.score(validation_x, validation_y)
        mlflow.log_metric("rmse", rmse)
        mlflow.log_metric("mae", mae)
        mlflow.log_metric("r2", r2)
        mlflow.log_metric("accuracy", score)


SVR(C=1000.0, gamma='auto')
SGDRegressor(alpha=0.9261551556523054, learning_rate='adaptive', penalty='l1')
ElasticNet(alpha=0.05069452355618102, l1_ratio=0.9964817442518504)


In [65]:
# acquiring the run ids for the models to register
runs = ml_flow_client.search_runs(
    experiment_ids='2',
    filter_string="metrics.accuracy > 0.9075",
    run_view_type=ViewType.ACTIVE_ONLY,
    max_results=20,
    order_by=["metrics.rmse ASC"]
)
print(f"Number of runs where accuracy is less than 0.9075 is: {len(runs)}\n-----------------")

Number of runs where accuracy is less than 0.9075 is: 20
-----------------


In [66]:
for run in runs:
    print(f"model name: {run.data.tags['model']}, run id: {run.info.run_id}, rmse: {run.data.metrics['rmse']:.4f}")

model name: SVR_best, run id: 5dd101d8d808403dac580024d8936533, rmse: 647.0306
model name: SVR_best, run id: fb6cadc9593a483c99be2edad2e3b947, rmse: 647.0306
model name: SVR, run id: c5cfa3fd18a04e85a2a28c03147ded4e, rmse: 647.0306
model name: SVR, run id: d44eeedfec87408f97e685e0d1298a90, rmse: 647.0306
model name: SVR, run id: 6e4bf6a9addd4c8088f38b0ea1eb964c, rmse: 647.0306
model name: SVR, run id: a1c9ebceb33649d9903e4a0c9d1c0858, rmse: 767.6516
model name: SVR, run id: 75979e6de79342fb85f56b2eee0e5b8a, rmse: 767.6516
model name: SVR, run id: 6346b6f245ff44508148e9855c0ce06e, rmse: 767.6516
model name: SVR, run id: 178d44e2f4ef41d9b8e2771415dc67cd, rmse: 767.6516
model name: SGDRegressor, run id: eea096ac3c0f4ab39d861688c98bc945, rmse: 1208.3369
model name: SGDRegressor, run id: fcb45f74c1384abbbaba78a8991cf027, rmse: 1208.8362
model name: SGDRegressor, run id: ec72ad16500d4025b7f6519a3703cbc9, rmse: 1208.9492
model name: SGDRegressor_best, run id: 550c00c1b5e547029e4d46f1623ec14f,

In [67]:
# acquiring best parameters for each model
interested_run_indices = [0,        # SVR
                          12,        # SGDRegressor
                          16]
models_for_registering = {}

In [68]:
for i in interested_run_indices:
    model_name_extracted = runs[i].data.tags['model'].split('_')[0]
    run_id = runs[i].info.run_id
    key = model_name_extracted
    models_for_registering[key] = run_id

models_for_registering

{'SVR': '5dd101d8d808403dac580024d8936533',
 'SGDRegressor': '550c00c1b5e547029e4d46f1623ec14f',
 'ElasticNet': 'de190e779259475f9df1db87ba56c721'}

Above are the best models selected by us for different Algorithms

### Model Registry

In [69]:
# Creating the model-name and registering ElasticNet as first version model to it
en_run_id = models_for_registering['ElasticNet']
artifact_name = 'model_artifact'
model_uri = f'runs:/{en_run_id}/{artifact_name}'
model_name_mlflow = 'diamonds_price_predictor'

mlflow.register_model(model_uri=model_uri, name=model_name_mlflow)

Successfully registered model 'diamonds_price_predictor'.
2023/08/11 08:58:19 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation.                     Model name: diamonds_price_predictor, version 1
Created version '1' of model 'diamonds_price_predictor'.


<ModelVersion: creation_timestamp=1691724498971, current_stage='None', description=None, last_updated_timestamp=1691724498971, name='diamonds_price_predictor', run_id='de190e779259475f9df1db87ba56c721', run_link=None, source='./mlruns/2/de190e779259475f9df1db87ba56c721/artifacts/model_artifact', status='READY', status_message=None, tags={}, user_id=None, version=1>

In [70]:
# Assigning stage to this newly created model version
model_version = 1
new_stage = 'Staging'

ml_flow_client.transition_model_version_stage(
    name=model_name_mlflow,
    version=model_version,
    stage=new_stage,
    archive_existing_versions=False
)

<ModelVersion: creation_timestamp=1691724498971, current_stage='Staging', description=None, last_updated_timestamp=1691725284442, name='diamonds_price_predictor', run_id='de190e779259475f9df1db87ba56c721', run_link=None, source='./mlruns/2/de190e779259475f9df1db87ba56c721/artifacts/model_artifact', status='READY', status_message=None, tags={}, user_id=None, version=1>

In [71]:
# display latest version stages update of the model
def get_status_of_latest_model_version(mlflow_client, model_name):
    latest_versions = mlflow_client.get_latest_versions(name=model_name)
    for version in latest_versions:
        print(f"version: {version.version}, stage: {version.current_stage}")

get_status_of_latest_model_version(mlflow_client=ml_flow_client,
                                   model_name=model_name_mlflow)

version: 1, stage: Staging


In [79]:
from datetime import datetime

def update_latest_change_in_model_version_description(mlflow_client, model_name, model_version, new_stage):
    date = datetime.today().date()
    mlflow_client.update_model_version(
        name=model_name,
        version=model_version,
        description=f"The model version {model_version} was transitioned to {new_stage} on {date}"
    )
    mlflow_client.set_model_version_tag(
        name=model_name,
        version=model_version,
        key='run_id',
        value=en_run_id
    )

In [80]:
update_latest_change_in_model_version_description(mlflow_client=ml_flow_client,
                                                  model_name=model_name_mlflow,
                                                  model_version=model_version,
                                                  new_stage=new_stage)