## Lets Build an ANN Model to predict the Quality of wine:
We will see in this project:
1) Hyperparameter sweep on training set
2) compare the results of the run in Mlflow Ui
3) Choose the best run and register it as a model
4) Deploy the model to Rest API
5) Build a Container Image suitable for deployment to cloud platform

## Before we move ahead lets just import some important libraries


In [1]:
import pandas as pd
import numpy as np 
import mlflow
from mlflow.models import infer_signature


In [6]:
import tensorflow
import keras
import hyperopt
from hyperopt import STATUS_OK,Trials,fmin,hp,tpe
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split



In [10]:
import zipfile

zip_ref=zipfile.ZipFile(r"C:\Users\Saurabh Guru\Downloads\archive.zip")
zip_ref.extractall()
zip_ref.close()

In [14]:
# Now lets import a data
data=pd.read_csv(r"C:\Users\Saurabh Guru\OneDrive\Desktop\Last\Wine_Quality\WineQT.csv")
data[:5]

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality,Id
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5,0
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5,1
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5,2
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6,3
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5,4


In [16]:
# Lets go to the data and check all the things
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1143 entries, 0 to 1142
Data columns (total 13 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   fixed acidity         1143 non-null   float64
 1   volatile acidity      1143 non-null   float64
 2   citric acid           1143 non-null   float64
 3   residual sugar        1143 non-null   float64
 4   chlorides             1143 non-null   float64
 5   free sulfur dioxide   1143 non-null   float64
 6   total sulfur dioxide  1143 non-null   float64
 7   density               1143 non-null   float64
 8   pH                    1143 non-null   float64
 9   sulphates             1143 non-null   float64
 10  alcohol               1143 non-null   float64
 11  quality               1143 non-null   int64  
 12  Id                    1143 non-null   int64  
dtypes: float64(11), int64(2)
memory usage: 116.2 KB


In [17]:
len(data)

1143

In [19]:
data.isnull().sum()

fixed acidity           0
volatile acidity        0
citric acid             0
residual sugar          0
chlorides               0
free sulfur dioxide     0
total sulfur dioxide    0
density                 0
pH                      0
sulphates               0
alcohol                 0
quality                 0
Id                      0
dtype: int64

In [95]:
X=data.drop("quality",axis=1).values
y=data["quality"].values.ravel()

In [96]:
X[:5]

array([[7.400e+00, 7.000e-01, 0.000e+00, 1.900e+00, 7.600e-02, 1.100e+01,
        3.400e+01, 9.978e-01, 3.510e+00, 5.600e-01, 9.400e+00, 0.000e+00],
       [7.800e+00, 8.800e-01, 0.000e+00, 2.600e+00, 9.800e-02, 2.500e+01,
        6.700e+01, 9.968e-01, 3.200e+00, 6.800e-01, 9.800e+00, 1.000e+00],
       [7.800e+00, 7.600e-01, 4.000e-02, 2.300e+00, 9.200e-02, 1.500e+01,
        5.400e+01, 9.970e-01, 3.260e+00, 6.500e-01, 9.800e+00, 2.000e+00],
       [1.120e+01, 2.800e-01, 5.600e-01, 1.900e+00, 7.500e-02, 1.700e+01,
        6.000e+01, 9.980e-01, 3.160e+00, 5.800e-01, 9.800e+00, 3.000e+00],
       [7.400e+00, 7.000e-01, 0.000e+00, 1.900e+00, 7.600e-02, 1.100e+01,
        3.400e+01, 9.978e-01, 3.510e+00, 5.600e-01, 9.400e+00, 4.000e+00]])

In [97]:
y[:5]

array([5, 5, 5, 6, 5])

In [98]:
# now lets just split the train and test data

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

In [99]:
# Lets go through the trrain and test data
len(X_train),len(X_test)

(914, 229)

In [100]:
# lets set the signature so we can understand what is our input and output
signature=infer_signature(X_train,y_train)


In [101]:
X_train.shape

(914, 12)

In [102]:
X_train[:5]

array([[8.5000e+00, 2.8000e-01, 5.6000e-01, 1.8000e+00, 9.2000e-02,
        3.5000e+01, 1.0300e+02, 9.9690e-01, 3.3000e+00, 7.5000e-01,
        1.0500e+01, 1.6000e+01],
       [9.9000e+00, 3.2000e-01, 5.6000e-01, 2.0000e+00, 7.3000e-02,
        3.0000e+00, 8.0000e+00, 9.9534e-01, 3.1500e+00, 7.3000e-01,
        1.1400e+01, 1.0760e+03],
       [8.9000e+00, 3.1000e-01, 3.6000e-01, 2.6000e+00, 5.6000e-02,
        1.0000e+01, 3.9000e+01, 9.9562e-01, 3.4000e+00, 6.9000e-01,
        1.1800e+01, 9.0000e+02],
       [6.6000e+00, 8.8000e-01, 4.0000e-02, 2.2000e+00, 6.6000e-02,
        1.2000e+01, 2.0000e+01, 9.9636e-01, 3.5300e+00, 5.6000e-01,
        9.9000e+00, 1.5560e+03],
       [7.6000e+00, 4.2000e-01, 2.5000e-01, 3.9000e+00, 1.0400e-01,
        2.8000e+01, 9.0000e+01, 9.9784e-01, 3.1500e+00, 5.7000e-01,
        9.1000e+00, 1.0570e+03]])

In [103]:
# Lets take the mean
mean=np.mean(X_train,axis=0)
mean

array([8.25809628e+00, 5.31017505e-01, 2.65798687e-01, 2.51909190e+00,
       8.64759300e-02, 1.57067834e+01, 4.58386214e+01, 9.96682724e-01,
       3.31423414e+00, 6.55645514e-01, 1.04391867e+01, 8.15590810e+02])

In [104]:
var=np.var(X_train,axis=0)
var

array([2.87446946e+00, 3.20228159e-02, 3.79762221e-02, 1.70305180e+00,
       2.24776364e-03, 1.04691376e+02, 1.01837139e+03, 3.66090057e-06,
       2.33517657e-02, 2.75742551e-02, 1.15387924e+00, 2.15851380e+05])

In [114]:
import tensorflow as tf
normalizer = tf.keras.layers.Normalization()
normalizer.adapt(X_train)  # X_train is your training data

In [120]:
# lets create a function to build Ann Model
def train_model(params,epochs,X_train,y_train,X_test,y_test):
    # Define model Architecture
    ANN_mod=keras.Sequential([
        # keras.Input([X_train.shape[1]]),
        normalizer,
        keras.layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
        keras.layers.Dense(1,activation="sigmoid")
    ])

    # Compile a model
    ANN_mod.compile(loss='mean_squared_error',
                    optimizer=keras.optimizers.SGD(learning_rate=params["lr"],momentum=params["momentum"]),
                    metrics=[keras.metrics.RootMeanSquaredError()])
    
    # train model with lr and momentumn param with MLflow tracking:
    with mlflow.start_run(nested=True):
        ANN_mod.fit(X_train,y_train,validation_data=(X_test,y_test),epochs=epochs,batch_size=64)

        #Evaluate the model
        eval_result=ANN_mod.evaluate(X_test,y_test,batch_size=64)
        eval_rmse=eval_result[1]

        #log the params and result
        mlflow.log_params(params)
        mlflow.log_metric("eval_rmse",eval_rmse)

        #log the model 
        mlflow.tensorflow.log_model(model=ANN_mod, signature=signature)
        return {"loss":eval_rmse,"status":STATUS_OK,"model":ANN_mod}

In [121]:
## Now lets just Create a Objective Function(using hyperopt so we can choose best params for our model)

def objective(params):
    #mlflow will track the parameters and results for each run
    result=train_model(params,epochs=5,X_train=X_train,y_train=y_train,X_test=X_test,y_test=y_test)

    return result

In [122]:
# Lets create a Space so we can decide which will be the best lr and momentum
space={
    "lr":hp.loguniform("lr",np.log(1e-5),np.log(1e-1)),
    "momentum": hp.uniform("momentum",0.0,1.0)
}

In [132]:
# print(best_run)
print(best_run)

{'state': 2, 'tid': 1, 'spec': None, 'result': {'loss': 4.726466655731201, 'status': 'ok', 'model': <Sequential name=sequential_34, built=True>}, 'misc': {'tid': 1, 'cmd': ('domain_attachment', 'FMinIter_Domain'), 'workdir': None, 'idxs': {'lr': [np.int64(1)], 'momentum': [np.int64(1)]}, 'vals': {'lr': [np.float64(0.006320554971193353)], 'momentum': [np.float64(0.7950781948837115)]}}, 'exp_key': None, 'owner': None, 'version': 0, 'book_time': datetime.datetime(2025, 10, 26, 14, 56, 17, 857000), 'refresh_time': datetime.datetime(2025, 10, 26, 14, 56, 24, 509000)}


In [136]:
# Now lets jus set a experiment using mlflow
mlflow.set_experiment("/Wine_Quality")
with mlflow.start_run():

    # Conduct the hyperparamenter research using Hyperopt
    trials=Trials()
    best= fmin(
        fn=objective,
        space=space,
        algo=tpe.suggest,
        max_evals=6,
        trials=trials)

    # lets see the output of our best run
    best_run=sorted(trials.results,key=lambda x:x["loss"])[0]

    #now finally lets log the best parameter ,loss and the model
    mlflow.log_param("best",best)
    mlflow.log_metric("eval_rmse", best_run["loss"])
    mlflow.tensorflow.log_model(best_run["model"],
                                signature=signature)
    
    # print out the best parameters and coressponding loss
    print(f"Best Parameters:{best}")
    # print(f"Best eval_rmse: {best_loss}")



Epoch 1/5                                            

[1m 1/15[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m4s[0m 304ms/step - loss: 26.3973 - root_mean_squared_error: 5.1378
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 25.3390 - root_mean_squared_error: 5.0338 - val_loss: 23.6462 - val_root_mean_squared_error: 4.8627

Epoch 2/5                                            

[1m 1/15[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 43ms/step - loss: 23.4260 - root_mean_squared_error: 4.8400
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 23.3591 - root_mean_squared_error: 4.8331 - val_loss: 22.9117 - val_root_mean_squared_error: 4.7866

Epoch 3/5                                            

[1m 1/15[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 31ms/step - loss: 23.3231 - root_mean_squared_error: 4.8294
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 22.8859 - root_m

In [None]:
## Lets register this model
mlflow.register_model(model_uri="http://127.0.0.1:5000/#/experiments/199088966918682078/runs/cad5bd03950c41e4848c5321d1b4ce98",name="BEst_ANN")

Successfully registered model 'BEst_ANN'.
Created version '1' of model 'BEst_ANN'.


<ModelVersion: aliases=[], creation_timestamp=1761492221251, current_stage='None', deployment_job_state=None, description=None, last_updated_timestamp=1761492221251, metrics=None, model_id=None, name='BEst_ANN', params=None, run_id=None, run_link=None, source='http://127.0.0.1:5000/#/experiments/199088966918682078/runs/cad5bd03950c41e4848c5321d1b4ce98', status='READY', status_message=None, tags={}, user_id=None, version=1>

: 