### Quickstart: Compare runs , choose a model , and deploy it to a REST API

In this quickstart , you will:

- Run a hyperparameter sweep on a training script

- Compare the results of the runs in the MLflow UI

- Choose the best run and register it as a model

- Deploy the model to a REST API

- Build a container image suitable for deployment to a cloud platform

When we register a model into the model registry we then convert it into an API(ready for deployment) which we deploy.
Basically we are deploying a model as a REST API.
hyperopt -> Library which will allow us to do hyperparameter tuning in ANN

In [2]:
import keras
import numpy as np
import pandas as pd
from hyperopt import STATUS_OK,Trials,fmin,hp,tpe
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

import mlflow
from mlflow.models import infer_signature

In [None]:
## load the dataset
data = pd.read_csv(
    "https://raw.githubusercontent.com/mlflow/mlflow/master/tests/datasets/winequality-white.csv",
    sep=";",
)
data  ## quality is the output feature

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.0,0.27,0.36,20.7,0.045,45.0,170.0,1.00100,3.00,0.45,8.8,6
1,6.3,0.30,0.34,1.6,0.049,14.0,132.0,0.99400,3.30,0.49,9.5,6
2,8.1,0.28,0.40,6.9,0.050,30.0,97.0,0.99510,3.26,0.44,10.1,6
3,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.99560,3.19,0.40,9.9,6
4,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.99560,3.19,0.40,9.9,6
...,...,...,...,...,...,...,...,...,...,...,...,...
4893,6.2,0.21,0.29,1.6,0.039,24.0,92.0,0.99114,3.27,0.50,11.2,6
4894,6.6,0.32,0.36,8.0,0.047,57.0,168.0,0.99490,3.15,0.46,9.6,5
4895,6.5,0.24,0.19,1.2,0.041,30.0,111.0,0.99254,2.99,0.46,9.4,6
4896,5.5,0.29,0.30,1.1,0.022,20.0,110.0,0.98869,3.34,0.38,12.8,7


In [None]:
## Split the data into training , validation and test sets
train,test = train_test_split(data,test_size=0.25,random_state=42)
train    ## quality is the dependent feature and rest other becomes our independent features

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
2835,6.3,0.25,0.22,3.30,0.048,41.0,161.0,0.99256,3.16,0.50,10.5,6
1157,7.8,0.30,0.29,16.85,0.054,23.0,135.0,0.99980,3.16,0.38,9.0,6
744,7.4,0.38,0.27,7.50,0.041,24.0,160.0,0.99535,3.17,0.43,10.0,5
1448,7.4,0.16,0.49,1.20,0.055,18.0,150.0,0.99170,3.23,0.47,11.2,6
3338,7.2,0.27,0.28,15.20,0.046,6.0,41.0,0.99665,3.17,0.39,10.9,6
...,...,...,...,...,...,...,...,...,...,...,...,...
4426,6.2,0.21,0.52,6.50,0.047,28.0,123.0,0.99418,3.22,0.49,9.9,6
466,7.0,0.14,0.32,9.00,0.039,54.0,141.0,0.99560,3.22,0.43,9.4,6
3092,7.6,0.27,0.52,3.20,0.043,28.0,152.0,0.99129,3.02,0.53,11.4,6
3772,6.3,0.24,0.29,13.70,0.035,53.0,134.0,0.99567,3.17,0.38,10.6,6


In [5]:
train_x = train.drop(['quality'], axis = 1).values ## Dropping the quality feature as this is the output , 
## remaining all are the input features. Basically we are splitting our independent and dependent features.
## .values for converting it to a array.
train_y = train[['quality']].values.ravel()  ## .values -> 2D array , while .ravel() -> 1D array. No reshaping required

## test dataset(will be treated as new data to do the prediction)
test_x = test.drop(['quality'],axis=1).values  ##independent
test_y = test[['quality']].values.ravel()      ##dependent

## validation data(further splitting the train data)
## splitting this train data into train and validation
train_x,valid_x,train_y,valid_y = train_test_split(train_x,train_y,test_size=0.20,random_state=42)

signature = infer_signature(train_x,train_y) ## Input and output schema passed -> train_x is input schema and train_y is output schema

In [None]:
### ANN Model

def train_model(params,epochs,train_x,train_y,valid_x,valid_y,test_x,test_y):
    ## Define model architecture
    mean = np.mean(train_x,axis=0) ## axis = 0 means column wise , its giving us mean of every column -> this is required as we have to perform normalisation while training our artificial neural network
    var = np.var(train_x,axis=0)  ## variance , both mean and variance parameter i have taken and this will be used in my layer normalisation(on input layer)

    model = keras.Sequential(
        [
            keras.Input([train_x.shape[1]]),  ## 11 features passed here
            keras.layers.Normalization(mean=mean,variance=var),
            keras.layers.Dense(64,activation='relu'),
            keras.layers.Dense(1) ## output node
        ]
    )

    ## compile the model
    ## In learning rate their are list of learning rate's , and list of momentum's in momentum
    ## the list will have possible values which we will use to check , we are taking these params as it will help us to log
    ## the best params(log some experiments as we are testing/playing with multiple parameters)
    model.compile(optimizer=keras.optimizers.SGD(
        learning_rate=params["lr"],momentum=params["momentum"]  ## here for every value we will try to track each and every experiment
        ## This is why we are using MLFlow and hyperopt(this is going to check with each and every parameter which is given over their)
    ),
    loss = "mean_squared_error",
    metrics = [keras.metrics.RootMeanSquaredError()]
    )

    ## Train the ANN model with lr and momentum params with MLFLOW tracking and track each of them.
    ## here with mlflow its tracking the evaluation result with each and every parameter
    with mlflow.start_run(nested=True): ## Here we have to try out with multiple parameters nested=True -> nested structure
        model.fit(train_x,train_y,validation_data=[valid_x,valid_y],
        epochs=epochs,
        batch_size=64)

        ## Evaluate the model to find the best model
        eval_result = model.evaluate(valid_x,valid_y,batch_size=64)

        eval_rmse = eval_result[1]

        ## Log the parameters and results
        mlflow.log_params(params) ## log_params as multiple parameters
        mlflow.log_metric("eval_rmse",eval_rmse)

        ## Log the model
        mlflow.tensorflow.log_model(model,"model",signature=signature) ## signature -> defining schema , name = "model"

        return {"loss":eval_rmse,"status":STATUS_OK,"model":model}



In [7]:
## For Hyperopt we will create a objective function
def objective(params):
    # MLflow will track the parameters and results for each run
    result = train_model(
        params,
        epochs=3,
        train_x=train_x,
        train_y=train_y,
        valid_x=valid_x,
        valid_y=valid_y,
        test_x=test_x,
        test_y=test_y
    )
    return result

In [9]:
space = {
    "lr":hp.loguniform("lr",np.log(1e-5),np.log(1e-1)), ## Learning rate ranges between 10^-5 to 10^-1
    "momentum":hp.uniform("momentum",0.0,1.0)
}
## This is the space in which we are going to try

In [11]:
mlflow.set_experiment("wine-quality")
with mlflow.start_run():
    ## Conduct the hyperparameter search using Hyperopt, Trials library -> will basically perform hyperparameter tuning
    trials = Trials() ## This is the parameter which is set and its set only for hyperparameter search
    best = fmin(
        fn = objective, ## it will call the objective function created above, objective function will call -> train_model function
        space = space,  ## space contains the parameters which will be passed
        algo = tpe.suggest, ## Internally using different types of algorithm based on the suggestion
        max_evals = 4,
        trials = trials
    )

    # Fetch the details of the best run
    best_run = sorted(trials.results,key=lambda x: x["loss"])[0]  ## Will take the one with the minimum loss

    # Log the best parameters, loss, and model
    mlflow.log_params(best)
    mlflow.log_metric("eval_rmse",best_run["loss"])
    mlflow.tensorflow.log_model(best_run["model"],"model",signature=signature) ## here name is "model"

    ## Print out the best parameters and corresponding loss
    print(f"Best parameters: {best}")
    print(f"Best eval rmse: {best_run['loss']}")

Epoch 1/3                                            

[1m 1/46[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m28s[0m 629ms/step - loss: 36.4066 - root_mean_squared_error: 6.0338
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 34.3392 - root_mean_squared_error: 5.8597 - val_loss: 31.0453 - val_root_mean_squared_error: 5.5718

Epoch 2/3                                            

[1m 1/46[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 25ms/step - loss: 30.4777 - root_mean_squared_error: 5.5207
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 29.4758 - root_mean_squared_error: 5.4289 - val_loss: 26.6874 - val_root_mean_squared_error: 5.1660

Epoch 3/3                                            

[1m 1/46[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 28ms/step - loss: 27.0704 - root_mean_squared_error: 5.2029
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 25.5925 - root_mean_squared_error: 5.0585 -




Epoch 1/3                                                                      

[1m 1/46[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m13s[0m 297ms/step - loss: 36.4898 - root_mean_squared_error: 6.0407
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 35.0951 - root_mean_squared_error: 5.9240 - val_loss: 33.4476 - val_root_mean_squared_error: 5.7834

Epoch 2/3                                                                      

[1m 1/46[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 25ms/step - loss: 34.6898 - root_mean_squared_error: 5.8898
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 32.7283 - root_mean_squared_error: 5.7208 - val_loss: 31.2572 - val_root_mean_squared_error: 5.5908

Epoch 3/3                                                                      

[1m 1/46[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 20ms/step - loss: 31.3575 - root_mean_squared_error: 5.5998
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[3




Epoch 1/3                                                                      

[1m 1/46[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m14s[0m 316ms/step - loss: 33.9337 - root_mean_squared_error: 5.8253
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 31.1812 - root_mean_squared_error: 5.5829   
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 31.1328 - root_mean_squared_error: 5.5785 - val_loss: 23.5547 - val_root_mean_squared_error: 4.8533

Epoch 2/3                                                                      

[1m 1/46[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 28ms/step - loss: 22.5081 - root_mean_squared_error: 4.7443
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 20.4334 - root_mean_squared_error: 4.5174 - val_loss: 14.0678 - val_root_mean_squared_error: 3.7507

Epoch 3/3                                                                      

[1m 1/46[0m [37m━━━━━━━━━━━━━━━━




Epoch 1/3                                                                      

[1m 1/46[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m15s[0m 343ms/step - loss: 48.1449 - root_mean_squared_error: 6.9387
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 27.9467 - root_mean_squared_error: 5.2065 - val_loss: 8.2158 - val_root_mean_squared_error: 2.8663

Epoch 2/3                                                                      

[1m 1/46[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 21ms/step - loss: 7.7711 - root_mean_squared_error: 2.7877
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 3.8520 - root_mean_squared_error: 1.9517 - val_loss: 4.2577 - val_root_mean_squared_error: 2.0634

Epoch 3/3                                                                      

[1m 1/46[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 20ms/step - loss: 4.5615 - root_mean_squared_error: 2.1358
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0




100%|██████████| 4/4 [01:09<00:00, 17.49s/trial, best loss: 1.4679157733917236]




Best parameters: {'lr': 0.00044434548739297376, 'momentum': 0.9884497503434634}
Best eval rmse: 1.4679157733917236


Good practice is creating mlruns inside the folder where we are creating our project
cd into the respective folder and then in terminal do mlflow ui

Why so many experiemnts ??
Because of different learning rates values and different momentum values

After u execute in wine quality u will be able to see so many experiments created (+) icon will be their click that , all the experiments/instances will come.
4 different sub experiments were their , why ??
Because of different learning rates and momentum

To compare select everything and click compare 
parameters giving lowest eval_rmse are the best parameters
The model with the best parameters have to be registered
Go to that model click register model -> register the model with the name of ur choice
Then this model will be available in the models section , there we can add tags and also add aliases
Once we have the model we can load it with using pyfunc and test it on our new test data/do execution

In [None]:
## Inferencing 

from mlflow.models import validate_serving_input
model_uri = 'runs:/cca2584b26884c8bba50931ec981a418/model' ## Get the model uri from mlflow server(choose the best model)-> 'runs:/(uri)/model'

from mlflow.models import convert_input_example_to_serving_input
serving_payload = convert_input_example_to_serving_input(test_x)
validate_serving_input(model_uri,serving_payload)

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step


array([[4.5525556],
       [5.3258586],
       [4.8752823],
       ...,
       [4.625868 ],
       [4.8580575],
       [4.5574207]], dtype=float32)

The output which we are getting here is the quality of the wine

In [13]:
## Another way of doing the same as above
## Load the model as PyfuncModel.
model_uri = 'runs:/cca2584b26884c8bba50931ec981a418/model'
loaded_model = mlflow.pyfunc.load_model(model_uri)

## Predict on a Pandas DataFrame.
import pandas as pd
loaded_model.predict(pd.DataFrame(test_x))

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step


array([[4.5525556],
       [5.3258586],
       [4.8752823],
       ...,
       [4.625868 ],
       [4.8580575],
       [4.5574207]], dtype=float32)

In [14]:
## Register in the model registry -> registering manually with code
mlflow.register_model(model_uri,"wine-quality") ## name of the registered model will be "wine-quality"

Successfully registered model 'wine-quality'.
Created version '1' of model 'wine-quality'.


<ModelVersion: aliases=[], creation_timestamp=1757528686243, current_stage='None', deployment_job_state=None, description=None, last_updated_timestamp=1757528686243, metrics=[<Metric: dataset_digest=None, dataset_name=None, key='eval_rmse', model_id='m-e648477b62cb4ce8b62db57c1ac47bb4', run_id='cca2584b26884c8bba50931ec981a418', step=0, timestamp=1757527073425, value=1.4679157733917236>], model_id='m-e648477b62cb4ce8b62db57c1ac47bb4', name='wine-quality', params={'lr': '0.00044434548739297376', 'momentum': '0.9884497503434634'}, run_id='cca2584b26884c8bba50931ec981a418', run_link=None, source='models:/m-e648477b62cb4ce8b62db57c1ac47bb4', status='READY', status_message=None, tags={}, user_id=None, version=1>

Above will create version 1 of wine-quality , -> this will be accessible in the models section of mlflow as it is
a registered model