#### Overview

In [36]:
### Run a hyperparameter sweep on training script
### Compare the results of runs in MLFLOW UI
### Choose the best model and register it
### Deploy the model to restapi
### Build a container image suitable for deployment to cloud platform

#### Importing necessary libraries and dataset preparation

In [37]:
import keras
import numpy as np
import pandas as pd
from hyperopt import STATUS_OK, fmin, hp, tpe, Trials
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

import mlflow
from mlflow.models import infer_signature


In [38]:
df = pd.read_csv('https://raw.githubusercontent.com/mlflow/mlflow-example/master/wine-quality.csv')

In [39]:
df

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.0,0.27,0.36,20.7,0.045,45.0,170.0,1.00100,3.00,0.45,8.8,6
1,6.3,0.30,0.34,1.6,0.049,14.0,132.0,0.99400,3.30,0.49,9.5,6
2,8.1,0.28,0.40,6.9,0.050,30.0,97.0,0.99510,3.26,0.44,10.1,6
3,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.99560,3.19,0.40,9.9,6
4,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.99560,3.19,0.40,9.9,6
...,...,...,...,...,...,...,...,...,...,...,...,...
4893,6.2,0.21,0.29,1.6,0.039,24.0,92.0,0.99114,3.27,0.50,11.2,6
4894,6.6,0.32,0.36,8.0,0.047,57.0,168.0,0.99490,3.15,0.46,9.6,5
4895,6.5,0.24,0.19,1.2,0.041,30.0,111.0,0.99254,2.99,0.46,9.4,6
4896,5.5,0.29,0.30,1.1,0.022,20.0,110.0,0.98869,3.34,0.38,12.8,7


In [40]:
## Splitting the data 
train,test = train_test_split(df,test_size=0.25,random_state=40)

In [41]:
train

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
2213,7.4,0.17,0.29,1.4,0.047,23.0,107.0,0.99390,3.52,0.65,10.4,6
3375,7.8,0.22,0.26,9.0,0.047,38.0,132.0,0.99700,3.25,0.53,10.2,6
4596,6.7,0.53,0.29,4.3,0.069,20.0,114.0,0.99014,3.22,0.59,13.4,6
4360,7.3,0.28,0.35,1.6,0.054,31.0,148.0,0.99178,3.18,0.47,10.7,5
269,5.4,0.59,0.07,7.0,0.045,36.0,147.0,0.99440,3.34,0.57,9.7,6
...,...,...,...,...,...,...,...,...,...,...,...,...
3603,7.0,0.32,0.27,7.1,0.027,37.0,122.0,0.99165,3.15,0.60,12.6,7
4722,6.0,0.25,0.28,7.7,0.053,37.0,132.0,0.99489,3.06,0.50,9.4,6
3340,6.2,0.30,0.31,1.2,0.048,19.0,125.0,0.98999,3.32,0.54,12.6,6
3064,7.4,0.21,0.80,12.3,0.038,77.0,183.0,0.99778,2.95,0.48,9.0,5


In [42]:
test

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
1876,8.0,0.190,0.30,2.00,0.053,48.0,140.0,0.99400,3.18,0.49,9.60,6
147,6.4,0.595,0.14,5.20,0.058,15.0,97.0,0.99510,3.38,0.36,9.00,4
3121,7.6,0.230,0.34,1.60,0.043,24.0,129.0,0.99305,3.12,0.70,10.40,5
4778,5.8,0.315,0.19,19.40,0.031,28.0,106.0,0.99704,2.97,0.40,10.55,6
4207,6.1,0.250,0.30,1.20,0.036,42.0,107.0,0.99100,3.34,0.56,10.80,7
...,...,...,...,...,...,...,...,...,...,...,...,...
2530,5.8,0.300,0.12,1.60,0.036,57.0,163.0,0.99239,3.38,0.59,10.50,6
2335,8.3,0.270,0.34,10.20,0.048,50.0,118.0,0.99716,3.18,0.51,10.30,5
3653,6.3,0.320,0.17,17.75,0.060,51.0,190.0,0.99916,3.13,0.48,8.80,6
4232,6.6,0.390,0.28,9.20,0.036,10.0,92.0,0.99206,3.07,0.35,12.10,6


In [43]:
X_train = train.drop(['quality'],axis=1).values
y_train = train[['quality']].values.ravel()  
## ravel() gives us o/p y_train array as 1D instead of 2D

# Test Dataset
X_test = test.drop(['quality'],axis=1).values
y_test = test[['quality']].values.ravel() 

In [44]:
## Splitting this data into train and validation
X_train,X_valid,y_train,y_valid=train_test_split(X_train,y_train,test_size=0.2,random_state=40)

In [45]:
signature = infer_signature(X_train,y_train)

#### Training model

In [46]:
import keras.layers
import keras.metrics
import keras.optimizers


def train_model(params,epochs,X_train,y_train,X_test,y_test,X_valid,y_valid,):

    mean = np.mean(X_train,axis=0)  ## column wise mean
    var = np.var(X_train,axis=0)

    ## model architecture
    model = keras.Sequential(
        [
            keras.Input([X_train.shape[1]]),
            keras.layers.Normalization(mean=mean,variance=var),
            keras.layers.Dense(64,activation="relu"),
            keras.layers.Dense(1)
        ]
    )

    ## compile model
    model.compile(optimizer=keras.optimizers.SGD(
        learning_rate=params["lr"],
        momentum=params["momentum"]),
        loss= "mean_squared_error",
        metrics=[keras.metrics.RootMeanSquaredError()]
    )

    ## Training ANN model with lr and momentum params and tracking using MLFLOW
    with mlflow.start_run(nested=True):
        model.fit(X_train,y_train,validation_data=(X_valid,y_valid),
                  epochs=epochs,
                  batch_size=64
                  )
        
        #evaluating the model
        eval_result=model.evaluate(X_valid,y_valid,batch_size=64)
        eval_rmse=eval_result[1]

        ## Logging the paramaeters and results
        mlflow.log_params(params)
        mlflow.log_metric("eval_rmse",eval_rmse)

        ## Logging the model
        mlflow.tensorflow.log_model(model,"model_1",signature=signature)

        return {"loss":eval_rmse,
                "status": STATUS_OK,
                "model": model
                }


In [47]:
def objective(params):
    ## MLFlow will track all the parameters and results for each run
    result = train_model(
        params,
        epochs=3,
        X_train=X_train,
        y_train=y_train,
        X_test=X_test,
        y_test=y_test,
        X_valid=X_valid,
        y_valid=y_valid,
    )
    return result

In [48]:
space = {
    "lr" : hp.loguniform("lr",np.log(1e-5),np.log(1e-1)),
    "momentum" : hp.uniform("momentum",0,1)
}

In [49]:
mlflow.set_experiment("/02_DL_MLFLOW")
with mlflow.start_run():
    ## conducting hyperparameter tuning search using hyperopt's trial
    trials=Trials()
    best=fmin(
        fn = objective,
        space = space,
        algo = tpe.suggest,
        max_evals = 3,
        trials = trials
    )

    #Fetching best details of best run
    best_run = sorted(trials.results,key=lambda x: x["loss"])[0]

    ## Log best parameters , loss and model
    mlflow.log_params(best)
    mlflow.log_metric("eval_rmse",best_run["loss"])
    mlflow.tensorflow.log_model(best_run["model"],"model",signature=signature)

    print(f"Best parameters: {best}")
    print(f"Best eval rmse: {best_run['loss']}")


Epoch 1/3                                            

 1/46 [..............................] - ETA: 6s - loss: 38.2860 - root_mean_squared_error: 6.1876
  0%|          | 0/3 [00:00<?, ?trial/s, best loss=?]

2024-11-04 16:37:28.602015: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz



Epoch 2/3                                            

 1/46 [..............................] - ETA: 0s - loss: 2.1218 - root_mean_squared_error: 1.4567

Epoch 3/3                                            

 1/46 [..............................] - ETA: 0s - loss: 1.3569 - root_mean_squared_error: 1.1648

 1/12 [=>............................] - ETA: 0s - loss: 0.7141 - root_mean_squared_error: 0.8450

  0%|          | 0/3 [00:00<?, ?trial/s, best loss=?]INFO:tensorflow:Assets written to: /var/folders/yp/x_6z7w4s4pn9l27krxyysdnh0000gn/T/tmpy4aat5iv/model/data/model/assets
Epoch 1/3                                                                      

 1/46 [..............................] - ETA: 4s - loss: 38.5433 - root_mean_squared_error: 6.2083

Epoch 2/3                                                                      

 1/46 [..............................] - ETA: 0s - loss: 30.4343 - root_mean_squared_error: 5.5167

Epoch 3/3                                                