In [1]:
# Handle to the workspace
from azure.ai.ml import MLClient

# Authentication package
from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential, AzureCliCredential

try:
    credential = AzureCliCredential()
    # Check if given credential can get token successfully.
    credential.get_token("https://management.azure.com/.default")
except Exception as ex:
    # Fall back to InteractiveBrowserCredential in case DefaultAzureCredential not work
    credential = InteractiveBrowserCredential()

AzureCliCredential.get_token failed: ERROR: AADSTS70043: The refresh token has expired or is invalid due to sign-in frequency checks by conditional access. The token was issued on 2025-04-01T06:22:55.4629892Z and the maximum allowed lifetime for this request is 14400. Trace ID: 45106e2e-d82f-4110-82b1-0205d53c1700 Correlation ID: 53c5813f-6bd2-4ee0-be52-146ae5291245 Timestamp: 2025-04-03 06:57:52Z
Interactive authentication is needed. Please run:
az login --scope https://management.azure.com/.default



In [2]:
ml_client = MLClient.from_config(credential)

Found the config file in: /home/daniel/repos/aml_demo/config.json


In [3]:
from azure.ai.ml.entities import Data
from azure.ai.ml.constants import AssetTypes

In [23]:
#lets register the catsanddogs dataset:
cats_and_dogs_data = Data(
                          name="catsanddogs_flat", 
                          path="../data/catsanddogs_flat",
                          type=AssetTypes.URI_FOLDER,
                          description="A dataset containing images of cats and dogs"
                          )

In [24]:
# cats_and_dogs_data = ml_client.data.create_or_update(cats_and_dogs_data)

In [25]:
cats_and_dogs_data = ml_client.data.get("catsanddogs_flat", version=1)

In [8]:
%%writefile ../environments/catsandogsenv.yaml
name: catsanddogsenv
channels:
  - conda-forge
dependencies:
  - python=3.8
  - numpy=1.21.2
  - pip=21.2.4
  - scikit-learn=0.24.2
  - scipy=1.7.1
  - pandas>=1.1,<1.2
  - pip:
    - tensorflow 
    - keras
    - azureml-mlflow==1.42.0
    - azureml-core
    - azure-core
    - azure-ai-ml
    - pillow

Writing ../environments/catsandogsenv.yaml


In [6]:
from azure.ai.ml.entities import Environment

custom_env_name = "catsanddogsenv"

catsanddogsenv = Environment(
    name=custom_env_name,
    description="Custom environment for Cats and Dogs",
    tags={"scikit-learn": "0.24.2"},
    conda_file= "../environments/catsandogsenv.yaml",
    image="mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:latest",
    )

In [7]:
# ml_client.environments.create_or_update(catsanddogsenv)


In [8]:
import os
os.makedirs("../components/catsanddogsprep", exist_ok=True)

os.makedirs("../components/catsanddogstrain", exist_ok=True)

In [9]:
%%writefile ../components/catsanddogsprep/castanddogsprep.py 
#components for inspecting the cats and dogs dataset

import os
import argparse
import glob 
import mlflow
import numpy as np
import shutil

def main():
    parser = argparse.ArgumentParser(description="Inspect the Cats and Dogs dataset, split it out into test and training sets")
    parser.add_argument("--datafolder", type=str, help="Path to the folder containing the Cats and Dogs dataset")
    parser.add_argument("--test_train_ratio", type=float, required=False, default=0.25)
    parser.add_argument("--train_data", type=str, help="path to train data")
    parser.add_argument("--test_data", type=str, help="path to test data")
    args = parser.parse_args()
    
    mlflow.start_run()
    datafolder = args.datafolder
    print(f"Data folder: {datafolder}")
    
    # Log the data folder path
    mlflow.log_param("datafolder", datafolder)
    
    # Get the number of files in the data folder
    num_folders = len(os.listdir(datafolder))
    print(f"Number of subfolders in the data folder: {num_folders}")

    # Log the number of files
    mlflow.log_metric("num_files", num_folders)
    
    images = glob.glob(datafolder + "/**/*.jpg", recursive=True)   
    print(f"Number of images in the data folder: {len(images)}")
    base_path = os.path.dirname(datafolder)
    training_path = args.train_data
    test_path = args.test_data

    # Write data to train_data and test_data paths
    print(f"Writing training data to: {args.train_data}")
    print(f"Writing test data to: {args.test_data}")

    for species in ["cat", "dog"]:
        species_images = [img for img in images if species in img]
        os.makedirs(os.path.join(training_path, species), exist_ok=True)
        os.makedirs(os.path.join(test_path, species), exist_ok=True)
        n_training = int(len(species_images) * (1-args.test_train_ratio))
        print(f"Species: {species}, Total images: {len(species_images)}, Training images: {n_training}")
        training_images = np.random.choice(species_images, n_training, replace=False)
        test_images = [img for img in species_images if img not in training_images]
        for img in training_images:
            destination_path = os.path.join(training_path, species, os.path.basename(img))
            shutil.copy(img, destination_path)
        mlflow.log_metric(f"training_{species}", len(training_images))
        for img in test_images:
            test_destination_path = os.path.join(test_path, species, os.path.basename(img))
            shutil.copy(img, test_destination_path)
        mlflow.log_metric(f"test_{species}", len(test_images))

    print(os.listdir(training_path))
    for species in os.listdir(training_path):
        print(f"Number of {species} images in the training set: {len(os.listdir(os.path.join(training_path, species)))}")
    print(os.listdir(test_path))
    for species in os.listdir(test_path):
        print(f"Number of {species} images in the test set: {len(os.listdir(os.path.join(test_path, species)))}")

    mlflow.end_run()

if __name__ == "__main__":
    main()

Overwriting ../components/catsanddogsprep/castanddogsprep.py


In [10]:
%%writefile ../components/catsanddogstrain/catsanddogstrain.py
import os
import argparse
import glob 
import mlflow

import keras
from keras.preprocessing.image import ImageDataGenerator
import tensorflow as tf
from keras.models import Sequential
from keras import layers
from tensorflow.keras.optimizers import Adam

os.makedirs("./outputs", exist_ok=True)

def main():
    parser = argparse.ArgumentParser(description="Inspect the Cats and Dogs dataset")
    parser.add_argument("--train_data", type=str, help="path to train data")
    parser.add_argument("--test_data", type=str, help="path to test data")
    parser.add_argument("--epochs", type=int, required=False, default=10)
    parser.add_argument("--model_name", type=str, required=False, default="catsanddogs_model.keras")
    
    args = parser.parse_args()

    mlflow.start_run()
    train_data = args.train_data
    test_data = args.test_data
    #print number of files in train and test data
    num_train_files = len(os.listdir(train_data))
    num_test_files = len(os.listdir(test_data))
    print(f"Number of files in train data: {num_train_files}")
    print(f"Number of files in test data: {num_test_files}")

    # construct data sets
    # Create dataset
    image_size = (200, 200)
    batch_size = 32

    datagen = ImageDataGenerator()
    train_generator = datagen.flow_from_directory(train_data, target_size=image_size, batch_size=batch_size, class_mode='binary')
    test_generator = datagen.flow_from_directory(test_data, target_size=image_size, batch_size=batch_size, class_mode='binary')

    #construct model
    model = Sequential([
        layers.InputLayer(input_shape=(200, 200, 3)),
        layers.Conv2D(16, 3, padding='same', activation='relu'),
        layers.MaxPooling2D(),
        layers.Conv2D(32, 3, padding='same', activation='relu'),
        layers.MaxPooling2D(),
        layers.Conv2D(64, 3, padding='same', activation='relu'),
        layers.MaxPooling2D(),
        layers.Flatten(),
        layers.Dense(128, activation='relu'),
        layers.Dense(1, activation='sigmoid')
    ])

    model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

    model.fit(train_generator, validation_data=test_generator, epochs=args.epochs)

    model.save(f"outputs/{args.model_name}")
    print(f"Model saved to {args.model_name}")
    # Log the model
    # mlflow.log_artifact(f"outputs/{args.model_name}.keras", artifact_path="model")


    mlflow.end_run()

if __name__ == "__main__":
    main()


Overwriting ../components/catsanddogstrain/catsanddogstrain.py


In [11]:
from azure.ai.ml import command, Input, Output

inputs = {"datafolder": Input(type = "uri_folder"), "test_train_ratio": Input(type = "number", default=0.25)}
outputs={"train_data": Output(type="uri_folder", mode="rw_mount"), "test_data": Output(type="uri_folder", mode="rw_mount")}

catsanddogsprep = command(
    name="catsanddogsprep",
    code = "../components/catsanddogsprep/",
    inputs=inputs,
    outputs=outputs,
    command = """python castanddogsprep.py --datafolder ${{inputs.datafolder}} --test_train_ratio ${{inputs.test_train_ratio}} \
            --train_data ${{outputs.train_data}} --test_data ${{outputs.test_data}} \
            """,
    environment="catsanddogsenv:1",
)

In [12]:
inputs = {"train_data": Input(type = "uri_folder"), "test_data": Input(type = "uri_folder"), "epochs": Input(type="number", default=10), "model_name": Input(type = "string", default="catsanddogs_model")}
outputs={"model": Output(type="uri_folder", mode="rw_mount")}
# outputs={"model": Output(type="custom_model")}

catsanddogstrain = command(
    name="catsanddogstrain",
    code = "../components/catsanddogstrain/",
    inputs=inputs,
    outputs=outputs,
    command = """python catsanddogstrain.py --train_data ${{inputs.train_data}} --test_data ${{inputs.test_data}} --epochs ${{inputs.epochs}} --model_name ${{inputs.model_name}}""",
    environment="catsanddogsenv:1",
)

In [13]:
#import dsl
from azure.ai.ml import dsl

In [15]:
@dsl.pipeline(
    compute="defaultcompute",
    description="catsanddogs pipeline test name arg",
    name="catsanddogs_pipeline_register_direct",
)
def catsanddogs_pipeline(catsanddogs_data_input, test_train_ratio=0.25, epochs=1):
    data_prep_job = catsanddogsprep(datafolder = catsanddogs_data_input, test_train_ratio=test_train_ratio)
    train_job = catsanddogstrain(train_data=data_prep_job.outputs.train_data, test_data=data_prep_job.outputs.test_data, epochs=epochs)
    return {"train_job_model": train_job.outputs.model}

In [16]:
pipeline = catsanddogs_pipeline(catsanddogs_data_input=cats_and_dogs_data, epochs =1)


In [137]:
ml_client.jobs.create_or_update(pipeline)

Uploading catsanddogstrain (0.0 MBs): 100%|██████████| 2471/2471 [00:00<00:00, 74340.63it/s]


pathOnCompute is not a known attribute of class <class 'azure.ai.ml._restclient.v2023_04_01_preview.models._models_py3.UriFolderJobOutput'> and will be ignored
pathOnCompute is not a known attribute of class <class 'azure.ai.ml._restclient.v2023_04_01_preview.models._models_py3.UriFolderJobOutput'> and will be ignored
pathOnCompute is not a known attribute of class <class 'azure.ai.ml._restclient.v2023_04_01_preview.models._models_py3.UriFolderJobOutput'> and will be ignored


Experiment,Name,Type,Status,Details Page
notebooks,strong_cup_wmf496hgv3,pipeline,NotStarted,Link to Azure Machine Learning studio


In [17]:
#lets add a registering step to the pipeline
os.makedirs("../components/catsanddogsregister", exist_ok=True)

In [19]:
%%writefile ../components/catsanddogsregister/catsanddogsregister.py
import os
import argparse
import mlflow
from keras.models import load_model
import shutil


def main():
    parser = argparse.ArgumentParser(description="Register the Cats and Dogs model")
    parser.add_argument("--model", type=str, help="path to the model")
    parser.add_argument("--model_name", type=str, help="name of the model")
    parser.add_argument("--model_description", type=str, help="description of the model")
    parser.add_argument("--output_path", type=str, help="path to save the model")
    mlflow.start_run()
    args = parser.parse_args()
    model_path = args.model
    model_name = args.model_name
    model_description = args.model_description

    print(f"Model path: {model_path}")
    print(f"These are the files found in the model path: {os.listdir(model_path)}")

    print(f"Model name: {model_name}")
    print(f"Model description: {model_description}")
    print(f"Output path: {args.output_path}")

    print(os.listdir(model_path))

    #if output_model folder in the model path, construct the path to output_model:
    # if "output_model" in os.listdir(os.path.split(model_path)[0]):
    #     model_path = os.path.join(model_path, "output_model")
    #     print(f"Model path: {model_path}")
  
    keras_model = load_model(model_path)
    mlflow.keras.log_model(keras_model, args.output_path)
    mlflow.end_run()
    
    #copy the model to the output path
    # shutil.copytree(model_path, args.output_path, dirs_exist_ok=True)

    # print(f"Contents of output directory {args.output_path}:")
    # if os.path.exists(args.output_path):
    #     print(os.listdir(args.output_path))
    # else:
    #     print("Output directory doesn't exist!")

    

if __name__ == "__main__":
    main()

Overwriting ../components/catsanddogsregister/catsanddogsregister.py


In [26]:
inputs = {"model": Input(type = "uri_folder"), "model_name": Input(type="string"), "model_description": Input(type="string")}
outputs={"output_model": Output(type="custom_model")}

catsanddogsregister = command(
    name="catsanddogsregister",
    code = "../components/catsanddogsregister/",
    inputs=inputs,
    outputs=outputs,
    command = """python catsanddogsregister.py --model ${{inputs.model}} --model_name ${{inputs.model_name}} --model_description ${{inputs.model_description}} --output_path ${{outputs.output_model}}""",
    environment="catsanddogsenv:1", #Note updated environment version to install azureml-core and azure-ai-ml
)

In [27]:
@dsl.pipeline(
    compute="defaultcompute",
    description="catsanddogs pipeline correct split with model registration",
    name="catsanddogs_pipeline_with_model_registration"
)
def catsanddogs_pipeline_with_model_registration(catsanddogs_data_input, test_train_ratio=0.25, epochs=1, model_name="catsanddogsmodel", model_description="'A model to classify cats and dogs'"):
    data_prep_job = catsanddogsprep(datafolder = catsanddogs_data_input, test_train_ratio=test_train_ratio)
    train_job = catsanddogstrain(train_data=data_prep_job.outputs.train_data, test_data=data_prep_job.outputs.test_data, epochs=epochs)
    register_job = catsanddogsregister(model=train_job.outputs.model, model_name=model_name, model_description=model_description)
    # return {"data_prep_job_train_data": data_prep_job.outputs.train_data, "data_prep_job_test_data": data_prep_job.outputs.test_data, "registered_model": register_job.outputs.output_model}
    return {"registered_model": register_job.outputs.output_model}


In [28]:
catsanddogs_pipeline_with_model_registration_instance = catsanddogs_pipeline_with_model_registration(catsanddogs_data_input=cats_and_dogs_data)
ml_client.jobs.create_or_update(catsanddogs_pipeline_with_model_registration_instance)

pathOnCompute is not a known attribute of class <class 'azure.ai.ml._restclient.v2023_04_01_preview.models._models_py3.UriFolderJobOutput'> and will be ignored
pathOnCompute is not a known attribute of class <class 'azure.ai.ml._restclient.v2023_04_01_preview.models._models_py3.UriFolderJobOutput'> and will be ignored
pathOnCompute is not a known attribute of class <class 'azure.ai.ml._restclient.v2023_04_01_preview.models._models_py3.UriFolderJobOutput'> and will be ignored
pathOnCompute is not a known attribute of class <class 'azure.ai.ml._restclient.v2023_04_01_preview.models._models_py3.CustomModelJobOutput'> and will be ignored


Experiment,Name,Type,Status,Details Page
notebooks,busy_cup_90y9ntb449,pipeline,NotStarted,Link to Azure Machine Learning studio


In [461]:
from azureml.core import Model, workspace
help(Model.register)

Help on function register in module azureml.core.model:

register(workspace, model_path, model_name, tags=None, properties=None, description=None, datasets=None, model_framework=None, model_framework_version=None, child_paths=None, sample_input_dataset=None, sample_output_dataset=None, resource_configuration=None)
    Register a model with the provided workspace.
    
    .. remarks::
    
        In addition to the content of the model file itself, a registered model also stores model metadata,
        including model description, tags, and framework information, that is useful when managing and
        deploying the model in your workspace. For example, with tags you can categorize your models and
        apply filters when listing models in your workspace.
    
        The following sample shows how to register a model specifying tags and a description.
    
        .. code-block:: python
    
            from azureml.core.model import Model
    
            model = Model.register(m