# CIFAR-10 with TensorFlow, MLflow, and Azure Machine Learning

## 1. Environment and prerequisites

In [1]:
import os
import itertools

import mlflow
import mlflow.tensorflow

import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix

import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.utils import to_categorical

from azureml.core import Workspace

# from azure.ai.ml import MLClient
# from azure.ai.ml.entities import Model as AMLModel
# from azure.identity import DefaultAzureCredential

## 2. Connect MLflow to Azure Machine Learning

In [2]:
ws = Workspace.from_config()
mlflow.set_tracking_uri(ws.get_mlflow_tracking_uri())

In [3]:
ws.get_mlflow_tracking_uri()

'azureml://spaincentral.api.azureml.ms/mlflow/v2.0/subscriptions/90a31f8a-4dc0-4c70-934c-603798f37582/resourceGroups/M311-Ilyas/providers/Microsoft.MachineLearningServices/workspaces/cloud-native-ds?'

In [4]:
# Create or select the experiment that will appear under Jobs in Azure ML Studio
experiment_name = "cifar10_tensorflow_cnn_azureml"
mlflow.set_experiment(experiment_name)
print("Using MLflow experiment: ", experiment_name)

mlflow.tensorflow.autolog(True)

Using MLflow experiment:  cifar10_tensorflow_cnn_azureml


## 3. Load and preprocess CIFAR-10 data

We load the CIFAR-10 dataset from Keras, normalize images to [0, 1],
convert labels to one-hot encoding, and create a validation split from the training set.

In [5]:
# Load and preprocess CIFAR-10
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()

# Flatten labels to 1D vectors
y_train = y_train.flatten()
y_test = y_test.flatten()

# Normalize images
x_train = x_train.astype("float32") / 255.0
x_test = x_test.astype("float32") / 255.0

num_classes = 10
y_train_cat = to_categorical(y_train, num_classes)
y_test_cat = to_categorical(y_test, num_classes)

# Use a validation split from the training set
val_split = 0.2
num_val = int(len(x_train) * val_split)
x_val, y_val_cat = x_train[:num_val], y_train_cat[:num_val]
x_train_sub, y_train_sub = x_train[num_val:], y_train_cat[num_val:]
y_val = y_train[:num_val]

print("Train subset: ", x_train_sub.shape, y_train_sub.shape)
print("Validation: ", x_val.shape, y_val_cat.shape)
print("Test: ", x_test.shape, y_test_cat.shape)

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
Train subset:  (40000, 32, 32, 3) (40000, 10)
Validation:  (10000, 32, 32, 3) (10000, 10)
Test:  (10000, 32, 32, 3) (10000, 10)


## 4. Define the CNN model and utility functions

We define a simple CNN model factory and a helper to plot the confusion matrix,
which will be saved and logged as an MLflow artifact.

In [6]:
# Define a simple CNN model factory
def create_model(dropout_rate: float = 0.5) -> tf.keras.Model:
    model = models.Sequential([
        layers.Conv2D(32, (3, 3), activation="relu", input_shape=(32, 32, 3)),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(64, (3, 3), activation="relu"),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(64, (3, 3), activation="relu"),
        layers.Flatten(),
        layers.Dropout(dropout_rate),
        layers.Dense(64, activation="relu"),
        layers.Dense(num_classes, activation="softmax"),
    ])
    return model

In [7]:
# Utility: plot and save confusion matrix
def plot_confusion_matrix(cm, classes, normalize: bool = False,
                          title: str = "Confusion matrix",
                          cmap=plt.cm.Blues,
                          filename: str = "confusion_matrix.png"):
    """Plot confusion matrix and save to a PNG file."""
    if normalize:
        cm = cm.astype("float") / cm.sum(axis=1, keepdims=True)

    plt.figure(figsize=(6, 6))
    plt.imshow(cm, interpolation="nearest", cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = ".2f" if normalize else "d"
    thresh = cm.max() / 2.0
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(
            j,
            i,
            format(cm[i, j], fmt),
            horizontalalignment="center",
            color="white" if cm[i, j] > thresh else "black",
        )

    plt.ylabel("True label")
    plt.xlabel("Predicted label")
    plt.tight_layout()
    plt.savefig(filename, bbox_inches="tight")
    plt.close()
    return filename

## 5. Hyperparameter loop with MLflow tracking on Azure ML

In [8]:
dropout_rates = [0.3, 0.5]
epochs = 10
batch_size = 64

class_names = [str(i) for i in range(num_classes)]

for dropout_rate in dropout_rates:
    print("=== Starting run for dropout_rate=", dropout_rate)
    # Autologging will automatically create a run when model.fit() is called.
    # We use start_run() explicitly to ensure our custom artifact (the plot)
    # is logged to the same run.
    with mlflow.start_run():
        
        mlflow.log_param("dropout_rate", dropout_rate)

        model = create_model(dropout_rate=dropout_rate)
        model.compile(
            optimizer=tf.keras.optimizers.Adam(),
            loss="categorical_crossentropy",
            metrics=["accuracy"],
        )

        history = model.fit(
            x_train_sub,
            y_train_sub,
            validation_data=(x_val, y_val_cat),
            epochs=epochs,
            batch_size=batch_size,
            verbose=2,
        )

        val_accuracy = float(history.history["val_accuracy"][-1])

        y_val_pred_prob = model.predict(x_val, verbose=0)
        y_val_pred = np.argmax(y_val_pred_prob, axis=1)
        cm = confusion_matrix(y_val, y_val_pred)

        cm_path = plot_confusion_matrix(
            cm,
            classes=class_names,
            title="Validation Confusion Matrix (dropout={})".format(dropout_rate),
            filename=f"confusion_matrix_dropout_{dropout_rate}.png",
        )
        mlflow.log_artifact(cm_path)

        # Evaluate test set (autologging does not do this by default)
        test_loss, test_accuracy = model.evaluate(x_test, y_test_cat, verbose=0)
        mlflow.log_metric("test_loss", float(test_loss))
        mlflow.log_metric("test_accuracy", float(test_accuracy))

        print(
            f"Run finished. val_acc={val_accuracy:.4f}, test_acc={test_accuracy:.4f}")

=== Starting run for dropout_rate= 0.3




Epoch 1/10
625/625 - 24s - loss: 1.6726 - accuracy: 0.3814 - val_loss: 1.4051 - val_accuracy: 0.4796 - 24s/epoch - 38ms/step
Epoch 2/10




625/625 - 21s - loss: 1.3089 - accuracy: 0.5292 - val_loss: 1.2313 - val_accuracy: 0.5532 - 21s/epoch - 34ms/step
Epoch 3/10




625/625 - 18s - loss: 1.1530 - accuracy: 0.5913 - val_loss: 1.0392 - val_accuracy: 0.6438 - 18s/epoch - 28ms/step
Epoch 4/10
625/625 - 18s - loss: 1.0598 - accuracy: 0.6245 - val_loss: 1.0553 - val_accuracy: 0.6289 - 18s/epoch - 28ms/step
Epoch 5/10




625/625 - 19s - loss: 0.9833 - accuracy: 0.6550 - val_loss: 0.9898 - val_accuracy: 0.6563 - 19s/epoch - 31ms/step
Epoch 6/10




625/625 - 20s - loss: 0.9265 - accuracy: 0.6750 - val_loss: 0.8992 - val_accuracy: 0.6873 - 20s/epoch - 31ms/step
Epoch 7/10




625/625 - 20s - loss: 0.8734 - accuracy: 0.6929 - val_loss: 0.8639 - val_accuracy: 0.6978 - 20s/epoch - 32ms/step
Epoch 8/10




625/625 - 21s - loss: 0.8440 - accuracy: 0.7028 - val_loss: 0.8479 - val_accuracy: 0.7032 - 21s/epoch - 33ms/step
Epoch 9/10




625/625 - 21s - loss: 0.8051 - accuracy: 0.7153 - val_loss: 0.8219 - val_accuracy: 0.7157 - 21s/epoch - 33ms/step
Epoch 10/10
625/625 - 20s - loss: 0.7766 - accuracy: 0.7274 - val_loss: 0.8378 - val_accuracy: 0.7083 - 20s/epoch - 32ms/step




INFO:tensorflow:Assets written to: C:\Users\Ilyas\AppData\Local\Temp\tmpsipdhvba\model\data\model\assets


INFO:tensorflow:Assets written to: C:\Users\Ilyas\AppData\Local\Temp\tmpsipdhvba\model\data\model\assets


Run finished. val_acc=0.7083, test_acc=0.7088


2025/11/16 11:51:20 INFO mlflow.tracking._tracking_service.client: üèÉ View run strong_wing_yn35wyst at: https://spaincentral.api.azureml.ms/mlflow/v2.0/subscriptions/90a31f8a-4dc0-4c70-934c-603798f37582/resourceGroups/M311-Ilyas/providers/Microsoft.MachineLearningServices/workspaces/cloud-native-ds/#/experiments/7faaab65-bc56-45af-b039-46ea3845b6d8/runs/4d86556b-a6ef-45cf-b59e-eafda6e4f8ed.
2025/11/16 11:51:20 INFO mlflow.tracking._tracking_service.client: üß™ View experiment at: https://spaincentral.api.azureml.ms/mlflow/v2.0/subscriptions/90a31f8a-4dc0-4c70-934c-603798f37582/resourceGroups/M311-Ilyas/providers/Microsoft.MachineLearningServices/workspaces/cloud-native-ds/#/experiments/7faaab65-bc56-45af-b039-46ea3845b6d8.


=== Starting run for dropout_rate= 0.5




Epoch 1/10
625/625 - 24s - loss: 1.7491 - accuracy: 0.3500 - val_loss: 1.3737 - val_accuracy: 0.4964 - 24s/epoch - 38ms/step
Epoch 2/10




625/625 - 23s - loss: 1.3689 - accuracy: 0.5072 - val_loss: 1.2203 - val_accuracy: 0.5726 - 23s/epoch - 37ms/step
Epoch 3/10




625/625 - 20s - loss: 1.2384 - accuracy: 0.5571 - val_loss: 1.0888 - val_accuracy: 0.6158 - 20s/epoch - 32ms/step
Epoch 4/10
625/625 - 20s - loss: 1.1539 - accuracy: 0.5913 - val_loss: 1.0985 - val_accuracy: 0.6087 - 20s/epoch - 33ms/step
Epoch 5/10




625/625 - 21s - loss: 1.0906 - accuracy: 0.6141 - val_loss: 1.0005 - val_accuracy: 0.6461 - 21s/epoch - 33ms/step
Epoch 6/10




625/625 - 21s - loss: 1.0331 - accuracy: 0.6369 - val_loss: 0.9708 - val_accuracy: 0.6636 - 21s/epoch - 34ms/step
Epoch 7/10




625/625 - 20s - loss: 0.9877 - accuracy: 0.6513 - val_loss: 0.9020 - val_accuracy: 0.6871 - 20s/epoch - 32ms/step
Epoch 8/10




625/625 - 20s - loss: 0.9479 - accuracy: 0.6640 - val_loss: 0.8979 - val_accuracy: 0.6899 - 20s/epoch - 32ms/step
Epoch 9/10




625/625 - 19s - loss: 0.9254 - accuracy: 0.6740 - val_loss: 0.8723 - val_accuracy: 0.6977 - 19s/epoch - 30ms/step
Epoch 10/10
625/625 - 17s - loss: 0.8913 - accuracy: 0.6855 - val_loss: 0.9200 - val_accuracy: 0.6765 - 17s/epoch - 28ms/step




INFO:tensorflow:Assets written to: C:\Users\Ilyas\AppData\Local\Temp\tmppw_401ct\model\data\model\assets


INFO:tensorflow:Assets written to: C:\Users\Ilyas\AppData\Local\Temp\tmppw_401ct\model\data\model\assets


Run finished. val_acc=0.6765, test_acc=0.6685


2025/11/16 11:55:11 INFO mlflow.tracking._tracking_service.client: üèÉ View run gray_nut_t5scwdr6 at: https://spaincentral.api.azureml.ms/mlflow/v2.0/subscriptions/90a31f8a-4dc0-4c70-934c-603798f37582/resourceGroups/M311-Ilyas/providers/Microsoft.MachineLearningServices/workspaces/cloud-native-ds/#/experiments/7faaab65-bc56-45af-b039-46ea3845b6d8/runs/adf2229d-f56f-4e27-a292-b8d74528c2fc.
2025/11/16 11:55:11 INFO mlflow.tracking._tracking_service.client: üß™ View experiment at: https://spaincentral.api.azureml.ms/mlflow/v2.0/subscriptions/90a31f8a-4dc0-4c70-934c-603798f37582/resourceGroups/M311-Ilyas/providers/Microsoft.MachineLearningServices/workspaces/cloud-native-ds/#/experiments/7faaab65-bc56-45af-b039-46ea3845b6d8.


## 6. Load a model from MLflow and evaluate it

In [9]:
best_run_id = "4d86556b-a6ef-45cf-b59e-eafda6e4f8ed"

model_uri = f"runs:/{best_run_id}/model"
print("Loading model from URI: ", model_uri)
loaded_model = mlflow.tensorflow.load_model(model_uri)

test_loss, test_accuracy = loaded_model.evaluate(x_test, y_test_cat, verbose=0)
print(f"Loaded model - test loss: {test_loss:.4f}")
print(f"Loaded model - test accuracy: {test_accuracy:.4f}")

Loading model from URI:  runs:/4d86556b-a6ef-45cf-b59e-eafda6e4f8ed/model


  from .autonotebook import tqdm as notebook_tqdm
Downloading artifacts: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 12/12 [00:00<00:00, 63.50it/s] 


Loaded model - test loss: 0.8428
Loaded model - test accuracy: 0.7088


## Register the best CIFAR-10 model in Azure ML

In [10]:
from azureml.core import Experiment
experiment = Experiment(ws, experiment_name)

In [11]:
run_id = best_run_id
run = [r for r in experiment.get_runs() if r.id == run_id][0]

In [14]:
model = run.register_model(model_name = 'cifar10_cnn', model_path = 'model')

## Deploying locally as a webservice

In [17]:
from azureml.core.webservice import LocalWebservice
from azureml.core.model import InferenceConfig
from azureml.core.environment import Environment
from azureml.core.model import Model

model = Model(ws, "cifar10_cnn")

env = Environment.from_conda_specification(
    name="mlflow-env",
    file_path="./conda.yaml"
)
env.register(ws)

{
    "assetId": "azureml://locations/spaincentral/workspaces/5bdc0051-63da-45cc-866f-3ab5f4910583/environments/mlflow-env/versions/2",
    "databricks": {
        "eggLibraries": [],
        "jarLibraries": [],
        "mavenLibraries": [],
        "pypiLibraries": [],
        "rcranLibraries": []
    },
    "docker": {
        "arguments": [],
        "baseDockerfile": null,
        "baseImage": "mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu22.04:20250505.v1",
        "baseImageRegistry": {
            "address": null,
            "password": null,
            "registryIdentity": null,
            "username": null
        },
        "buildContext": null,
        "enabled": false,
        "platform": {
            "architecture": "amd64",
            "os": "Linux"
        },
        "sharedVolumes": true,
        "shmSize": null
    },
    "environmentVariables": {
        "EXAMPLE_ENV_VAR": "EXAMPLE_VALUE"
    },
    "inferencingStackVersion": null,
    "name": "mlflow-env",
    "pyt

In [18]:
inference_config = InferenceConfig(
    source_directory=".",
    entry_script="score.py",
    environment=env
)

deployment_config = LocalWebservice.deploy_configuration(port=6789)

service = Model.deploy(
    workspace = ws,
    name = 'cifar10-prediction-service',
    models = [model],
    inference_config = inference_config,
    deployment_config = deployment_config,
    overwrite=True,
)

To leverage new model deployment capabilities, AzureML recommends using CLI/SDK v2 to deploy models as online endpoint, 
please refer to respective documentations 
https://docs.microsoft.com/azure/machine-learning/how-to-deploy-managed-online-endpoints /
https://docs.microsoft.com/azure/machine-learning/how-to-attach-kubernetes-anywhere 
For more information on migration, see https://aka.ms/acimoemigration 
  service = Model.deploy(


Downloading model cifar10_cnn:1 to C:\Users\Ilyas\AppData\Local\Temp\azureml_87komfow\cifar10_cnn\1
Generating Docker build context.
The run ID for the image build on compute is imgbldrun_3bfcd7a
Additional logs for the run: https://ml.azure.com/experiments/id/prepare_image/runs/imgbldrun_3bfcd7a?wsid=/subscriptions/90a31f8a-4dc0-4c70-934c-603798f37582/resourcegroups/M311-Ilyas/workspaces/cloud-native-ds&tid=39626157-a047-4689-87a2-6fa645cb5cb7
2025-11-16T11:21:40: Logging into Docker registry: 5bdc005163da45cc866f3ab5f4910583.azurecr.io

2025-11-16T11:21:41: https://docs.docker.com/go/credential-store/

2025-11-16T11:21:41: Login Succeeded


2025-11-16T11:21:41: Running: ['docker', 'build', '-f', 'azureml-environment-setup/Dockerfile', '.', '-t', '5bdc005163da45cc866f3ab5f4910583.azurecr.io/azureml/azureml_bfbf439b0b7136108b9b4865a82572a3', '-t', '5bdc005163da45cc866f3ab5f4910583.azurecr.io/azureml/azureml_bfbf439b0b7136108b9b4865a82572a3:1']
2025-11-16T11:21:41: #0 building with "def

In [19]:
service.wait_for_deployment(show_output=True)
print(f"Scoring URI is : {service.scoring_uri}")

Checking container health...
Local webservice is running at http://localhost:6789
Scoring URI is : http://localhost:6789/score


### Testing the local web service

In [20]:
import json
import numpy as np
import requests
from tensorflow.keras.datasets import cifar10

(_, _), (x_test_sample, y_test_sample) = cifar10.load_data()
x_sample = x_test_sample[0:1].astype("float32") / 255.0

payload = {"input": x_sample.tolist()}

print("Sending request to:", service.scoring_uri)
response = requests.post(service.scoring_uri, json=payload)
print("Status code:", response.status_code)
print("Raw response:", response.text)

try:
    result = response.json()
    print("Parsed JSON response:")
    print(json.dumps(result, indent=2))
except Exception as e:
    print("Could not parse JSON response:", e)

Sending request to: http://localhost:6789/score
Status code: 200
Raw response: "{\"predicted_classes\": [5], \"probabilities\": [[0.006330316420644522, 0.004652707371860743, 0.0034306782763451338, 0.2694395184516907, 0.00023940345272421837, 0.6903281211853027, 0.007022896781563759, 0.0017177676782011986, 0.015083890408277512, 0.0017545808805152774]]}"
Parsed JSON response:
"{\"predicted_classes\": [5], \"probabilities\": [[0.006330316420644522, 0.004652707371860743, 0.0034306782763451338, 0.2694395184516907, 0.00023940345272421837, 0.6903281211853027, 0.007022896781563759, 0.0017177676782011986, 0.015083890408277512, 0.0017545808805152774]]}"
Status code: 200
Raw response: "{\"predicted_classes\": [5], \"probabilities\": [[0.006330316420644522, 0.004652707371860743, 0.0034306782763451338, 0.2694395184516907, 0.00023940345272421837, 0.6903281211853027, 0.007022896781563759, 0.0017177676782011986, 0.015083890408277512, 0.0017545808805152774]]}"
Parsed JSON response:
"{\"predicted_classes

## Deploying on ACI

In [38]:
from azureml.core import Workspace
ws = Workspace.from_config()

from azureml.core.model import Model
model = Model(ws, "cifar10_cnn")

from azureml.core import Environment
env = Environment.get(workspace = ws, name = 'mlflow-env')

In [39]:
from azureml.core.model import InferenceConfig
inference_config = InferenceConfig(
    environment=env,
    source_directory=".",
    entry_script="./score.py")

In [None]:
from azureml.core.webservice import AciWebservice
deployment_config = AciWebservice.deploy_configuration(cpu_cores=0.1, memory_gb=0.5, auth_enabled=False)

In [None]:
service = Model.deploy(
    workspace = ws,
    name = 'cifar10-prediction-service-aci',
    models = [model],
    inference_config = inference_config,
    deployment_config = deployment_config,
    overwrite=True)
    
service.wait_for_deployment(show_output=True)

To leverage new model deployment capabilities, AzureML recommends using CLI/SDK v2 to deploy models as online endpoint, 
please refer to respective documentations 
https://docs.microsoft.com/azure/machine-learning/how-to-deploy-managed-online-endpoints /
https://docs.microsoft.com/azure/machine-learning/how-to-attach-kubernetes-anywhere 
For more information on migration, see https://aka.ms/acimoemigration 
  service = Model.deploy(


Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
RunningRunning
2025-11-16 13:26:36+01:00 Registering the environment.
2025-11-16 13:26:36+01:00 Use the existing image.
2025-11-16 13:26:37+01:00 Generating deployment configuration.
2025-11-16 13:26:42+01:00 Submitting deployment to compute.
2025-11-16 13:26:49+01:00 Checking the status of deployment cifar10-prediction-service-aci.
2025-11-16 13:26:36+01:00 Registering the environment.
2025-11-16 13:26:36+01:00 Use the existing image.
2025-11-16 13:26:37+01:00 Generating deployment configuration.
2025-11-16 13:26:42+01:00 Submitting deployment to compute.
2025-11-16 13:26:49+01:00 Checking the status of deployment cifar10-prediction-service-aci..
2025-11-16 13:28:48+01:00 Checking the status of inference endpoint cifar10-prediction-service-aci..
2025-11-16 13:28:48+01:00 Checking the status of inference endp

In [42]:
print(f"ACI Scoring URI is : {service.scoring_uri}")

ACI Scoring URI is : http://1d9f7c07-d8ae-40c0-ba42-2ccb8f6e2e8b.spaincentral.azurecontainer.io/score


### I created a Flask app to consume this API. Run `python app.py`