In [1]:
# Handle to the workspace
from azure.ai.ml import MLClient

# Authentication package
from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential, AzureCliCredential

try:
    credential = AzureCliCredential()
    # Check if given credential can get token successfully.
    credential.get_token("https://management.azure.com/.default")
except Exception as ex:
    # Fall back to InteractiveBrowserCredential in case DefaultAzureCredential not work
    credential = InteractiveBrowserCredential()

In [2]:
ml_client = MLClient.from_config(credential)

Found the config file in: /home/daniel/repos/aml2/config.json


In [3]:
from azure.ai.ml.entities import Data
from azure.ai.ml.constants import AssetTypes

In [4]:
#lets register the catsanddogs dataset:
cats_and_dogs_data = Data(
                          name="catsanddogs_flat", 
                          path="../data/catsanddogs_flat",
                          type=AssetTypes.URI_FOLDER,
                          description="A dataset containing images of cats and dogs"
                          )

In [5]:
# cats_and_dogs_data = ml_client.data.create_or_update(cats_and_dogs_data)

In [6]:
cats_and_dogs_data = ml_client.data.get("catsanddogs_flat", version=1)

In [8]:
%%writefile ../environments/catsandogsenv.yaml
name: catsanddogsenv
channels:
  - conda-forge
dependencies:
  - python=3.8
  - numpy=1.21.2
  - pip=21.2.4
  - scikit-learn=0.24.2
  - scipy=1.7.1
  - pandas>=1.1,<1.2
  - pip:
    - tensorflow 
    - keras
    - azureml-mlflow==1.42.0
    - azureml-core
    - azure-core
    - azure-ai-ml
    - pillow

Writing ../environments/catsandogsenv.yaml


In [9]:
from azure.ai.ml.entities import Environment

custom_env_name = "catsanddogsenv"

catsanddogsenv = Environment(
    name=custom_env_name,
    description="Custom environment for Cats and Dogs",
    tags={"scikit-learn": "0.24.2"},
    conda_file= "../environments/catsandogsenv.yaml",
    image="mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:latest",
    )

In [10]:
ml_client.environments.create_or_update(catsanddogsenv)


Environment({'arm_type': 'environment_version', 'latest_version': None, 'image': 'mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:latest', 'intellectual_property': None, 'is_anonymous': False, 'auto_increment_version': False, 'auto_delete_setting': None, 'name': 'catsanddogsenv', 'description': 'Custom environment for Cats and Dogs', 'tags': {'scikit-learn': '0.24.2'}, 'properties': {'azureml.labels': 'latest'}, 'print_as_yaml': False, 'id': '/subscriptions/11f51dee-57cd-4d47-b542-8e244706e163/resourceGroups/sbx-dondorp/providers/Microsoft.MachineLearningServices/workspaces/amlsbxdondorp/environments/catsanddogsenv/versions/5', 'Resource__source_path': '', 'base_path': '/home/daniel/repos/aml2/notebooks', 'creation_context': <azure.ai.ml.entities._system_data.SystemData object at 0x7f5fc17e9ba0>, 'serialize': <msrest.serialization.Serializer object at 0x7f5ffc140e20>, 'version': '5', 'conda_file': {'channels': ['conda-forge'], 'dependencies': ['python=3.8', 'numpy=1.21.2', 'pip=21.2

In [11]:
import os
os.makedirs("../components/catsanddogsprep", exist_ok=True)

os.makedirs("../components/catsanddogstrain", exist_ok=True)

In [12]:
%%writefile ../components/catsanddogsprep/castanddogsprep.py 
#components for inspecting the cats and dogs dataset

import os
import argparse
import glob 
import mlflow
import numpy as np
import shutil

def main():
    parser = argparse.ArgumentParser(description="Inspect the Cats and Dogs dataset, split it out into test and training sets")
    parser.add_argument("--datafolder", type=str, help="Path to the folder containing the Cats and Dogs dataset")
    parser.add_argument("--test_train_ratio", type=float, required=False, default=0.25)
    parser.add_argument("--train_data", type=str, help="path to train data")
    parser.add_argument("--test_data", type=str, help="path to test data")
    args = parser.parse_args()
    
    mlflow.start_run()
    datafolder = args.datafolder
    print(f"Data folder: {datafolder}")
    
    # Log the data folder path
    mlflow.log_param("datafolder", datafolder)
    
    # Get the number of files in the data folder
    num_folders = len(os.listdir(datafolder))
    print(f"Number of subfolders in the data folder: {num_folders}")

    # Log the number of files
    mlflow.log_metric("num_files", num_folders)
    
    images = glob.glob(datafolder + "/**/*.jpg", recursive=True)   
    print(f"Number of images in the data folder: {len(images)}")
    base_path = os.path.dirname(datafolder)
    training_path = args.train_data
    test_path = args.test_data

    # Write data to train_data and test_data paths
    print(f"Writing training data to: {args.train_data}")
    print(f"Writing test data to: {args.test_data}")

    for species in ["cat", "dog"]:
        species_images = [img for img in images if species in img]
        os.makedirs(os.path.join(training_path, species), exist_ok=True)
        os.makedirs(os.path.join(test_path, species), exist_ok=True)
        n_training = int(len(species_images) * (1-args.test_train_ratio))
        print(f"Species: {species}, Total images: {len(species_images)}, Training images: {n_training}")
        training_images = np.random.choice(species_images, n_training, replace=False)
        test_images = [img for img in species_images if img not in training_images]
        for img in training_images:
            destination_path = os.path.join(training_path, species, os.path.basename(img))
            shutil.copy(img, destination_path)
        mlflow.log_metric(f"training_{species}", len(training_images))
        for img in test_images:
            test_destination_path = os.path.join(test_path, species, os.path.basename(img))
            shutil.copy(img, test_destination_path)
        mlflow.log_metric(f"test_{species}", len(test_images))

    print(os.listdir(training_path))
    for species in os.listdir(training_path):
        print(f"Number of {species} images in the training set: {len(os.listdir(os.path.join(training_path, species)))}")
    print(os.listdir(test_path))
    for species in os.listdir(test_path):
        print(f"Number of {species} images in the test set: {len(os.listdir(os.path.join(test_path, species)))}")

    mlflow.end_run()

if __name__ == "__main__":
    main()

Writing ../components/catsanddogsprep/castanddogsprep.py


In [13]:
%%writefile ../components/catsanddogstrain/catsanddogstrain.py
import os
import argparse
import glob 
import mlflow

import keras
from keras.preprocessing.image import ImageDataGenerator
import tensorflow as tf
from keras.models import Sequential
from keras import layers
from tensorflow.keras.optimizers import Adam

os.makedirs("./outputs", exist_ok=True)

def main():
    parser = argparse.ArgumentParser(description="Inspect the Cats and Dogs dataset")
    parser.add_argument("--train_data", type=str, help="path to train data")
    parser.add_argument("--test_data", type=str, help="path to test data")
    parser.add_argument("--epochs", type=int, required=False, default=10)
    args = parser.parse_args()

    mlflow.start_run()
    train_data = args.train_data
    test_data = args.test_data
    #print number of files in train and test data
    num_train_files = len(os.listdir(train_data))
    num_test_files = len(os.listdir(test_data))
    print(f"Number of files in train data: {num_train_files}")
    print(f"Number of files in test data: {num_test_files}")

    # construct data sets
    # Create dataset
    image_size = (200, 200)
    batch_size = 32

    datagen = ImageDataGenerator()
    train_generator = datagen.flow_from_directory(train_data, target_size=image_size, batch_size=batch_size, class_mode='binary')
    test_generator = datagen.flow_from_directory(test_data, target_size=image_size, batch_size=batch_size, class_mode='binary')

    #construct model
    model = Sequential([
        layers.InputLayer(input_shape=(200, 200, 3)),
        layers.Conv2D(16, 3, padding='same', activation='relu'),
        layers.MaxPooling2D(),
        layers.Conv2D(32, 3, padding='same', activation='relu'),
        layers.MaxPooling2D(),
        layers.Conv2D(64, 3, padding='same', activation='relu'),
        layers.MaxPooling2D(),
        layers.Flatten(),
        layers.Dense(128, activation='relu'),
        layers.Dense(1, activation='sigmoid')
    ])

    model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

    model.fit(train_generator, validation_data=test_generator, epochs=args.epochs)

    model.save("outputs/catsanddogs_model.keras")


    mlflow.end_run()

if __name__ == "__main__":
    main()


Writing ../components/catsanddogstrain/catsanddogstrain.py


In [14]:
from azure.ai.ml import command, Input, Output

inputs = {"datafolder": Input(type = "uri_folder"), "test_train_ratio": Input(type = "number", default=0.25)}
outputs={"train_data": Output(type="uri_folder", mode="rw_mount"), "test_data": Output(type="uri_folder", mode="rw_mount")}

catsanddogsprep = command(
    name="catsanddogsprep",
    code = "../components/catsanddogsprep/",
    inputs=inputs,
    outputs=outputs,
    command = """python castanddogsprep.py --datafolder ${{inputs.datafolder}} --test_train_ratio ${{inputs.test_train_ratio}} \
            --train_data ${{outputs.train_data}} --test_data ${{outputs.test_data}} \
            """,
    environment="catsanddogsenv:3",
)

In [15]:
inputs = {"train_data": Input(type = "uri_folder"), "test_data": Input(type = "uri_folder"), "epochs": Input(type="number", default=10)}
outputs={"model": Output(type="uri_folder", mode="rw_mount")}

catsanddogstrain = command(
    name="catsanddogstrain",
    code = "../components/catsanddogstrain/",
    inputs=inputs,
    outputs=outputs,
    command = """python catsanddogstrain.py --train_data ${{inputs.train_data}} --test_data ${{inputs.test_data}} --epochs ${{inputs.epochs}}""",
    environment="catsanddogsenv:3",
)

In [16]:
#import dsl
from azure.ai.ml import dsl

In [17]:
@dsl.pipeline(
    compute="defaultclustersbxdondorp",
    description="catsanddogs pipeline correct split",
)
def catsanddogs_pipeline(catsanddogs_data_input, test_train_ratio=0.25, epochs=10):
    data_prep_job = catsanddogsprep(datafolder = catsanddogs_data_input, test_train_ratio=test_train_ratio)
    train_job = catsanddogstrain(train_data=data_prep_job.outputs.train_data, test_data=data_prep_job.outputs.test_data, epochs=epochs)
    return {"data_prep_job_train_data": data_prep_job.outputs.train_data, "data_prep_job_test_data": data_prep_job.outputs.test_data, "train_job_model": train_job.outputs.model}

In [18]:
pipeline = catsanddogs_pipeline(catsanddogs_data_input=cats_and_dogs_data, epochs =1)


In [19]:
ml_client.jobs.create_or_update(pipeline)

Class AutoDeleteSettingSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.


Class AutoDeleteConditionSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class BaseAutoDeleteSettingSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class IntellectualPropertySchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class ProtectionLevelSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class BaseIntellectualPropertySchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
pathOnCompute is not a known attribute of class <class 'azure.ai.ml._restclient.v2023_04_01_preview.models._models_py3.UriFolderJobOutput'> and will be ignored
pathOnCompute is not a known attribu

Experiment,Name,Type,Status,Details Page
notebooks,cool_nose_89wpfxfqdk,pipeline,NotStarted,Link to Azure Machine Learning studio


In [20]:
#lets add a registering step to the pipeline
os.makedirs("../components/catsanddogsregister", exist_ok=True)

In [21]:
%%writefile ../components/catsanddogsregister/catsanddogsregister.py
import os
import argparse
import mlflow
from keras.models import load_model


def main():
    parser = argparse.ArgumentParser(description="Register the Cats and Dogs model")
    parser.add_argument("--model", type=str, help="path to the model")
    parser.add_argument("--model_name", type=str, help="name of the model")
    parser.add_argument("--model_description", type=str, help="description of the model")
    parser.add_argument("--output_path", type=str, help="path to save the model")
    mlflow.start_run()
    args = parser.parse_args()
    model_path = args.model
    model_name = args.model_name
    model_description = args.model_description

    print(f"Model path: {model_path}")
    print(f"Model name: {model_name}")
    print(f"Model description: {model_description}")
    print(f"Output path: {args.output_path}")
    
    keras_model = load_model(model)
    mlflow.keras.save_model(keras_model, args.output_path)
    mlflow.end_run()
    

    

if __name__ == "__main__":
    main()

Writing ../components/catsanddogsregister/catsanddogsregister.py


In [22]:
inputs = {"model": Input(type = "uri_folder"), "model_name": Input(type="string"), "model_description": Input(type="string")}
outputs={"output_model": Output(type="custom_model")}

catsanddogsregister = command(
    name="catsanddogsregister",
    code = "../components/catsanddogsregister/",
    inputs=inputs,
    outputs=outputs,
    command = """python catsanddogsregister.py --model ${{inputs.model}} --model_name ${{inputs.model_name}} --model_description ${{inputs.model_description}} --output_path ${{outputs.output_model}}""",
    environment="catsanddogsenv:5", #Note updated environment version to install azureml-core and azure-ai-ml
)

In [23]:
@dsl.pipeline(
    compute="defaultclustersbxdondorp",
    description="catsanddogs pipeline correct split with model registration",
    name="catsanddogs_pipeline_with_model_registration"
)
def catsanddogs_pipeline_with_model_registration(catsanddogs_data_input, test_train_ratio=0.25, epochs=10, model_name="catsanddogsmodel", model_description="'A model to classify cats and dogs'"):
    data_prep_job = catsanddogsprep(datafolder = catsanddogs_data_input, test_train_ratio=test_train_ratio)
    train_job = catsanddogstrain(train_data=data_prep_job.outputs.train_data, test_data=data_prep_job.outputs.test_data, epochs=epochs)
    register_job = catsanddogsregister(model=train_job.outputs.model, model_name=model_name, model_description=model_description)
    return {"data_prep_job_train_data": data_prep_job.outputs.train_data, "data_prep_job_test_data": data_prep_job.outputs.test_data, "registered_model": register_job.outputs.output_model}


In [24]:
catsanddogs_pipeline_with_model_registration_instance = catsanddogs_pipeline_with_model_registration(catsanddogs_data_input=cats_and_dogs_data)
ml_client.jobs.create_or_update(catsanddogs_pipeline_with_model_registration_instance)

pathOnCompute is not a known attribute of class <class 'azure.ai.ml._restclient.v2023_04_01_preview.models._models_py3.UriFolderJobOutput'> and will be ignored
pathOnCompute is not a known attribute of class <class 'azure.ai.ml._restclient.v2023_04_01_preview.models._models_py3.UriFolderJobOutput'> and will be ignored
pathOnCompute is not a known attribute of class <class 'azure.ai.ml._restclient.v2023_04_01_preview.models._models_py3.UriFolderJobOutput'> and will be ignored
pathOnCompute is not a known attribute of class <class 'azure.ai.ml._restclient.v2023_04_01_preview.models._models_py3.CustomModelJobOutput'> and will be ignored


Experiment,Name,Type,Status,Details Page
notebooks,goofy_caravan_x90vg89pph,pipeline,NotStarted,Link to Azure Machine Learning studio


In [461]:
from azureml.core import Model, workspace
help(Model.register)

Help on function register in module azureml.core.model:

register(workspace, model_path, model_name, tags=None, properties=None, description=None, datasets=None, model_framework=None, model_framework_version=None, child_paths=None, sample_input_dataset=None, sample_output_dataset=None, resource_configuration=None)
    Register a model with the provided workspace.
    
    .. remarks::
    
        In addition to the content of the model file itself, a registered model also stores model metadata,
        including model description, tags, and framework information, that is useful when managing and
        deploying the model in your workspace. For example, with tags you can categorize your models and
        apply filters when listing models in your workspace.
    
        The following sample shows how to register a model specifying tags and a description.
    
        .. code-block:: python
    
            from azureml.core.model import Model
    
            model = Model.register(m