## Deep Learning Models with MLFLow

In [1]:
import keras
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from mlflow.models import infer_signature
import mlflow
from hyperopt import hp, tpe, Trials, STATUS_OK, fmin

2025-11-26 03:50:20.319145: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2025-11-26 03:50:20.379697: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-11-26 03:50:22.335313: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.


In [2]:
data = pd.read_csv("DATA/winequality-red.csv")

In [3]:
data

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.700,0.00,1.9,0.076,11.0,34.0,0.99780,3.51,0.56,9.4,5
1,7.8,0.880,0.00,2.6,0.098,25.0,67.0,0.99680,3.20,0.68,9.8,5
2,7.8,0.760,0.04,2.3,0.092,15.0,54.0,0.99700,3.26,0.65,9.8,5
3,11.2,0.280,0.56,1.9,0.075,17.0,60.0,0.99800,3.16,0.58,9.8,6
4,7.4,0.700,0.00,1.9,0.076,11.0,34.0,0.99780,3.51,0.56,9.4,5
...,...,...,...,...,...,...,...,...,...,...,...,...
1594,6.2,0.600,0.08,2.0,0.090,32.0,44.0,0.99490,3.45,0.58,10.5,5
1595,5.9,0.550,0.10,2.2,0.062,39.0,51.0,0.99512,3.52,0.76,11.2,6
1596,6.3,0.510,0.13,2.3,0.076,29.0,40.0,0.99574,3.42,0.75,11.0,6
1597,5.9,0.645,0.12,2.0,0.075,32.0,44.0,0.99547,3.57,0.71,10.2,5


##### Spliting the data into train, test, validation

In [4]:
train,test=train_test_split(data, test_size=0.25,random_state=42)

train_x=train.drop(["quality"], axis=1).values
train_y=train[["quality"]].values.ravel()


## Testing data:
test_x=test.drop(["quality"],axis=1).values
test_y=test[["quality"]].values.ravel()


## Validation data from train data:
train_x, val_x, train_y, val_y = train_test_split(train_x, train_y, test_size=0.20, random_state=42)


## Infer-Signature:
signature=infer_signature(train_x, train_y)

In [5]:
train_x.shape[1]

11

In [6]:
# Build the ANN:
def train_model(train_x, train_y, test_x, test_y, val_x, val_y, epochs, params):
    
    ## Define model architecture:
    mean=np.mean(train_x,axis=0)
    var=np.var(train_x,axis=0)
    
    model=keras.Sequential(
        [
            keras.Input([train_x.shape[1]]),
            keras.layers.Normalization(mean=mean, variance=var),
            keras.layers.Dense(64, activation="relu"),
            keras.layers.Dense(1)
        ]
    )
    
    ## Compile the model:
    model.compile(optimizer=keras.optimizers.SGD(
        learning_rate=params['lr'],
        momentum=params["momentum"],
    ),
    loss=keras.losses.mean_squared_error,
    metrics=[keras.metrics.RootMeanSquaredError()]
    )
    
    ## Train the model:
    with mlflow.start_run(nested=True):
        model.fit(train_x, train_y, validation_data=(val_x,val_y), batch_size=64, epochs=epochs)
        
        ## Evaluate the model:
        eval_result=model.evaluate(val_x,val_y,batch_size=64)
        eval_rmse=eval_result[1]
        
        
        ## Log the results and metrics:
        mlflow.log_params(params=params)
        mlflow.log_metric("rmse", eval_rmse)
        
        
        ## Log the model:
        mlflow.tensorflow.log_model(
            model=model,
            artifact_path="model_tf-SGD",
            signature=signature
        )
        
        return {"loss": eval_rmse, "status": STATUS_OK, "model": model}

In [7]:
def objective(params):
    result = train_model(
        train_x=train_x,
        train_y=train_y,
        test_x=test_x,
        test_y=test_y,
        val_x=val_x,
        val_y=val_y,
        epochs=10,
        params=params
    )
    return result

In [8]:
spaces = {
    "lr": hp.loguniform("lr", np.log(1e-5), np.log(1e-1)),
    "momentum": hp.uniform("momentum", 0.0, 1.0)
}

In [9]:
mlflow.set_experiment("wine-quality-SGD")
with mlflow.start_run():
    trial=Trials()
    best=fmin(
        fn=objective,
        algo=tpe.suggest,
        space=spaces,
        max_evals=4,
        trials=trial
    )
    
    # Fetch the details of the best runs:
    best_run=sorted(trial.results, key=lambda x:x["loss"])[0]
    
    mlflow.log_params(best)
    mlflow.log_metric("rmse", best_run["loss"])
    mlflow.tensorflow.log_model(best_run["model"], "model_tf", signature=signature)
    

  return FileStore(store_uri, store_uri)
2025/11/26 03:50:25 INFO mlflow.tracking.fluent: Experiment with name 'wine-quality-SGD' does not exist. Creating a new experiment.


  0%|          | 0/4 [00:00<?, ?trial/s, best loss=?]

2025-11-26 03:50:25.505575: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


Epoch 1/10                                           

[1m 1/15[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m5s[0m 379ms/step - loss: 32.0523 - root_mean_squared_error: 5.6615
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 10.2728 - root_mean_squared_error: 3.2051 - val_loss: 2.5018 - val_root_mean_squared_error: 1.5817

Epoch 2/10                                           

[1m 1/15[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 17ms/step - loss: 2.1404 - root_mean_squared_error: 1.4630
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 1.7807 - root_mean_squared_error: 1.3344 - val_loss: 1.4681 - val_root_mean_squared_error: 1.2116

Epoch 3/10                                           

[1m 1/15[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 21ms/step - loss: 1.2472 - root_mean_squared_error: 1.1168
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 1.1873 - root_mean_sq




Epoch 1/10                                                                     

[1m 1/15[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m3s[0m 283ms/step - loss: 27.7969 - root_mean_squared_error: 5.2723
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - loss: 29.5032 - root_mean_squared_error: 5.4317 - val_loss: 27.9556 - val_root_mean_squared_error: 5.2873

Epoch 2/10                                                                     

[1m 1/15[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 16ms/step - loss: 28.0814 - root_mean_squared_error: 5.2992
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 28.4979 - root_mean_squared_error: 5.3383 - val_loss: 26.9609 - val_root_mean_squared_error: 5.1924

Epoch 3/10                                                                     

[1m 1/15[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 16ms/step - loss: 29.5073 - root_mean_squared_error: 5.4321
[1m15/15[0m [32m




Epoch 1/10                                                                     

[1m 1/15[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m3s[0m 278ms/step - loss: 38.1439 - root_mean_squared_error: 6.1761
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 20.2462 - root_mean_squared_error: 4.4996 - val_loss: 7.3197 - val_root_mean_squared_error: 2.7055

Epoch 2/10                                                                     

[1m 1/15[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 16ms/step - loss: 7.3491 - root_mean_squared_error: 2.7109
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 4.5148 - root_mean_squared_error: 2.1248 - val_loss: 2.7650 - val_root_mean_squared_error: 1.6628

Epoch 3/10                                                                     

[1m 1/15[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 15ms/step - loss: 2.8198 - root_mean_squared_error: 1.6792
[1m15/15[0m [32m━━━━━




Epoch 1/10                                                                     

[1m 1/15[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m3s[0m 276ms/step - loss: 35.0344 - root_mean_squared_error: 5.9190
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 29.9144 - root_mean_squared_error: 5.4694 - val_loss: 22.2116 - val_root_mean_squared_error: 4.7129

Epoch 2/10                                                                     

[1m 1/15[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 16ms/step - loss: 23.1674 - root_mean_squared_error: 4.8133
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 17.9902 - root_mean_squared_error: 4.2415 - val_loss: 13.2513 - val_root_mean_squared_error: 3.6402

Epoch 3/10                                                                     

[1m 1/15[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 15ms/step - loss: 14.2923 - root_mean_squared_error: 3.7805
[1m15/15[0m [32m




100%|██████████| 4/4 [00:23<00:00,  5.75s/trial, best loss: 0.8448653817176819]




