# compare runs, choose a model, and deploy using REST API

In [1]:
import keras
import numpy as np
import pandas as pd
from hyperopt import hp, fmin, tpe, STATUS_OK, Trials
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

import mlflow
from mlflow.models import infer_signature

2025-01-20 10:11:29.949239: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-01-20 10:11:29.955254: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-01-20 10:11:29.969634: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1737346289.992656 2276909 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1737346290.000178 2276909 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-01-20 10:11:30.024231: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU ins

In [2]:
# Load the dataset
data = pd.read_csv(
    "https://raw.githubusercontent.com/mlflow/mlflow/master/tests/datasets/winequality-white.csv",
    sep=";",
)
data.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.0,0.27,0.36,20.7,0.045,45.0,170.0,1.001,3.0,0.45,8.8,6
1,6.3,0.3,0.34,1.6,0.049,14.0,132.0,0.994,3.3,0.49,9.5,6
2,8.1,0.28,0.4,6.9,0.05,30.0,97.0,0.9951,3.26,0.44,10.1,6
3,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6
4,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6


In [3]:
# split the data
X, y = data.iloc[:, :-1], data.iloc[:, -1]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

signature = infer_signature(X_train, y_train)



In [4]:
from keras.models import Sequential
from keras.layers import Dense, BatchNormalization
from keras.optimizers import SGD, Adam
import tensorflow as tf
from tensorflow.keras.layers import Dense, Input

In [5]:
def train_model(X_train, y_train, X_test, y_test, params):
    # Define the model
    model = Sequential([
        Input(shape=(X_train.shape[1],)),
        Dense(params['units'], activation=params['activation']),
        Dense(1, activation='linear')
    ])
    model.compile(optimizer=params['optimizer'], loss='mse', metrics=['mse'])

    # Train the model
    model.fit(X_train, y_train, epochs=params['epochs'], verbose=0)

    # Evaluate the model
    mse = model.evaluate(X_test, y_test, verbose=0)[1]

    # Log parameters and metrics with MLflow
    with mlflow.start_run(nested=True):  # Set nested=True if multiple runs are needed
        mlflow.log_params(params)
        mlflow.log_metric("mse", mse)
        
        # Define input signature for TensorFlow model
        signature = tf.TensorSpec(shape=(None, X_train.shape[1]), dtype=tf.float32)
        mlflow.tensorflow.log_model(model, "model", signature=signature)

    return {"loss": mse, "status": STATUS_OK, "model": model}

In [6]:
def objective(params):
    params["epochs"] = 3
    result = train_model(
        X_train, 
        y_train, 
        X_test, 
        y_test, 
        params
    )
    return result

In [7]:
space = {
    "units": hp.choice("units", [32, 64, 128, 256, 512]),
    "activation": hp.choice("activation", ["relu", "tanh", "sigmoid"]),
    "output_activation": hp.choice("output_activation", ["relu", "tanh", "sigmoid"]),
    "optimizer": hp.choice("optimizer", ["SGD", "adam"]),
    "learning_rate": hp.uniform("learning_rate", 0.0001, 0.1),
    "momentum": hp.uniform("momentum", 0.1, 0.9),
    "loss": hp.choice("loss", ["mean_squared_error"]),
    "batch_size": hp.choice("batch_size", [32, 64, 128]),
    "epochs": 3
}

In [8]:
mlflow.set_experiment("/wine-quality")
with mlflow.start_run():
    trials = Trials()
    best = fmin(fn=objective,
                space = space,
                algo = tpe.suggest,
                max_evals = 3,
                trials=trials
        )

    # fetch the details of the best run
    best_run = sorted(trials.results, key=lambda x: x["loss"])[0]

    # log the best params, loss and model
    mlflow.log_params(best)
    mlflow.log_metric("mse", best_run["loss"])
    mlflow.tensorflow.log_model(best_run["model"], "model", signature=signature)

    print(f"Best params: {best}")
    print(f"Best MSE: {best_run['loss']}")

2025/01/20 10:11:36 INFO mlflow.tracking.fluent: Experiment with name '/wine-quality' does not exist. Creating a new experiment.


  0%|          | 0/3 [00:00<?, ?trial/s, best loss=?]

W0000 00:00:1737346296.516429 2276909 gpu_device.cc:2344] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...
job exception: 'TensorSpec' object has no attribute 'inputs'



  0%|          | 0/3 [00:01<?, ?trial/s, best loss=?]


AttributeError: 'TensorSpec' object has no attribute 'inputs'

In [12]:
# End the run after everything is done
# if mlflow.active_run():
#     mlflow.end_run()