In [1]:
import keras
import numpy as np 
import pandas as pd
from hyperopt import STATUS_OK, Trials, fmin, hp, tpe
from sklearn.model_selection import train_test_split

import mlflow
from mlflow.models import infer_signature

2025-12-28 13:46:52.826125: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  import pkg_resources


In [2]:
# load the data
data = pd.read_csv("https://raw.githubusercontent.com/mlflow/mlflow/master/tests/datasets/winequality-red.csv", sep=";")
data.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5


In [3]:
# Separate features (X) and target (y)
X = data.drop(columns=["quality"], axis=1).values  # All columns except 'quality'
y = data["quality"].values.ravel()  # Target column 'quality' as 1D array

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.2,
    random_state=42,  # 20% test, 80% remaining for training/validation
)

# Split training data further into training and validation sets
X_train, X_valid, y_train, y_valid = train_test_split(
    X_train,
    y_train,
    test_size=0.25,
    random_state=42,  # 25% of 80% training → 20% of total for validation
)

# Print shapes to verify splits
print("Training set shape:", X_train.shape)  # Shape of training set
print("Validation set shape:", X_valid.shape)  # Shape of validation set
print("Test set shape:", X_test.shape)  # Shape of test set

Training set shape: (959, 11)
Validation set shape: (320, 11)
Test set shape: (320, 11)


In [4]:
def train_model(params, epochs, X_train, y_train, X_valid, y_valid):
    ## Define the model architecture
    mean = np.mean(X_train, axis=0)
    var = np.var(X_train, axis=0) 
    model = keras.Sequential([
        keras.Input([X_train.shape[1]]),
        keras.layers.Normalization(mean=mean, variance=var),
        keras.layers.Dense(64, activation='relu'),
        keras.layers.Dense(32, activation='relu'),
        keras.layers.Dense(1)
    ])
    ## Compile the model
    model.compile(
        optimizer=keras.optimizers.SGD(
            learning_rate=params['lr'], 
            momentum=params['momentum']), 
        loss='mean_squared_error', 
        metrics=[keras.metrics.RootMeanSquaredError()]
    )
    ## Train the ANN model with the given parameters with MLflow tracking
    with mlflow.start_run(nested=True):
        model.fit(
            X_train, 
            y_train, 
            validation_data=(X_valid, y_valid), 
            epochs=epochs, 
            batch_size=32, 
            verbose=0
        )
        # Evaluate the model on validation data
        val_loss, val_rmse = model.evaluate(X_valid, y_valid,batch_size=32)
        # Log parameters and metrics to MLflow
        mlflow.log_params(params)
        mlflow.log_metric("val_loss", val_loss)
        mlflow.log_metric("val_RMSE", val_rmse)
        
        # Log the model
        signature = infer_signature(X_train, model.predict(X_train))
        mlflow.keras.log_model(model, name="model", signature=signature)
        
        return {'loss': val_rmse, 'status': STATUS_OK, 'model': model}

In [5]:
def objective(params):
    epochs = 10
    result = train_model(
        params=params, 
        epochs=epochs, 
        X_train=X_train, 
        y_train=y_train, 
        X_valid=X_valid, 
        y_valid=y_valid
    )
    return result

In [6]:
space = {
    'lr': hp.loguniform('lr', np.log(0.0001), np.log(0.1)),
    'momentum': hp.uniform('momentum', 0.0, 0.9)
}

In [7]:
## Set the MLflow experiment 
mlflow.set_experiment("DL-Wine-Quality-Prediction-Hyperopt")
## Start the MLflow run
with mlflow.start_run(run_name="DL-Hyperopt-Optimization"):
    ## Conduct hyperparameter optimization using Hyperopt
    trials = Trials()
    best_result = fmin(
        fn=objective,
        space=space,
        algo=tpe.suggest,
        max_evals=5,
        trials=trials
    )
    ## Fetch details of the best run
    best_run = sorted(trials.results, key=lambda x: x['loss'])[0]
    ## Log the best hyperparameters
    mlflow.log_params(best_result)
    ## Log the best loss and model
    mlflow.log_metric("best_val_RMSE", best_run['loss'])
    signature = infer_signature(X_train, best_run['model'].predict(X_train))
    mlflow.keras.log_model(best_run['model'], name="best_model", signature=signature)
    ## Print the best hyperparameters
    print("Best hyperparameters:", best_result)
    print("Best validation RMSE:", best_run['loss'])

2025/12/28 13:46:56 INFO mlflow.store.db.utils: Creating initial MLflow database tables...
2025/12/28 13:46:56 INFO mlflow.store.db.utils: Updating database tables
2025/12/28 13:46:56 INFO alembic.runtime.migration: Context impl SQLiteImpl.
2025/12/28 13:46:56 INFO alembic.runtime.migration: Will assume non-transactional DDL.
2025/12/28 13:46:56 INFO alembic.runtime.migration: Context impl SQLiteImpl.
2025/12/28 13:46:56 INFO alembic.runtime.migration: Will assume non-transactional DDL.


  0%|          | 0/5 [00:00<?, ?trial/s, best loss=?]

W0000 00:00:1766908917.067066  285980 gpu_device.cc:2342] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


[1m 1/10[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 12ms/step - loss: 0.6008 - root_mean_squared_error: 0.7751
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.9560 - root_mean_squared_error: 0.9778 

[1m 1/30[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 32ms/step
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 

[1m 1/10[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 12ms/step - loss: 0.5170 - root_mean_squared_error: 0.7191
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.7175 - root_mean_squared_error: 0.8471 

[1m 1/30[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 32ms/step               
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step       

[1m 1/10[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 13ms/step - loss: 0.4202 - root_mean_squared_error: 0.6482
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0

In [10]:
import mlflow.keras

model_name = (
    "final-model"  # The name you used for model registry 'registered_model_name'
)
model_version = "1"  # The best version of your registered model

model_uri = f"models:/{model_name}/{model_version}"
model = mlflow.keras.load_model(model_uri)

y_pred = model.predict(X_test)
print(y_pred)

[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[[5.4740524]
 [5.2378716]
 [5.728071 ]
 [5.4299517]
 [5.730378 ]
 [5.2789664]
 [5.3519955]
 [4.798749 ]
 [6.049017 ]
 [5.637619 ]
 [6.170869 ]
 [5.027384 ]
 [5.74936  ]
 [5.323105 ]
 [5.5683303]
 [6.1771736]
 [5.3666763]
 [5.6996927]
 [6.773551 ]
 [5.1585517]
 [4.627742 ]
 [5.4207835]
 [5.520452 ]
 [6.2966776]
 [5.3533   ]
 [5.721052 ]
 [6.3340917]
 [5.231279 ]
 [5.4158807]
 [6.471649 ]
 [5.3615427]
 [5.283028 ]
 [6.007335 ]
 [4.9404984]
 [5.675933 ]
 [5.09798  ]
 [6.1822734]
 [5.804502 ]
 [5.4885106]
 [5.8571854]
 [5.536747 ]
 [5.5070853]
 [6.3448954]
 [5.3130145]
 [5.9635377]
 [5.8135386]
 [6.5308776]
 [5.7776403]
 [4.944296 ]
 [5.448472 ]
 [5.1540437]
 [5.4437866]
 [5.4360867]
 [6.579914 ]
 [5.085454 ]
 [5.3479176]
 [6.25021  ]
 [5.5750866]
 [5.82306  ]
 [5.143217 ]
 [5.8515005]
 [6.1049967]
 [5.2070255]
 [5.149163 ]
 [6.3652115]
 [5.494742 ]
 [6.285998 ]
 [5.622909 ]
 [6.314387 ]
 [5.299115 ]
 [6.0299387]
 [5