In [1]:
%cd ..

c:\Workspace\PracticeProjects\aml


# Torch on Azure ML

In [2]:
from azure.ai.ml import MLClient, command, Input
from azure.ai.ml.constants import AssetTypes, InputOutputModes
from azure.ai.ml.entities import AmlCompute, Data, Environment
from azure.identity import DefaultAzureCredential

from aml.settings import *

In [3]:
HYMENOPTERA_DATA_PATH = "data/hymenoptera"
HYMENOPTERA_DATA_NAME = "hymenoptera"
HYMENOPTERA_DATA_DESCRIPTION = "This dataset contains images of ants and bees intended for training a classification model. It consists of approximately 120 training images for each class (ants and bees) and 75 validation images for each class."

## Connect to Workspace

In [4]:
credential = DefaultAzureCredential()

# Get a handle to the workspace. You can find the info on the workspace tab on ml.azure.com
ml_client = MLClient(
    credential=credential,
    subscription_id=SUBSCRIPTION_ID,  # this will look like xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
    resource_group_name=RESOURCE_GROUP,
    workspace_name=WORKSPACE_NAME,
)

## Create Data asset

In [5]:
# Set the path, supported paths include:
# local: './<path>/<folder>' (this will be automatically uploaded to cloud storage)
# blob:  'wasbs://<container_name>@<account_name>.blob.core.windows.net/<path>/<folder>'
# ADLS gen2: 'abfss://<file_system>@<account_name>.dfs.core.windows.net/<path>/<folder>'
# Datastore: 'azureml://datastores/<data_store_name>/paths/<path>/<folder>'

# Define the Data asset object
my_data = Data(
    path=HYMENOPTERA_DATA_PATH,
    type=AssetTypes.URI_FOLDER,
    description=HYMENOPTERA_DATA_DESCRIPTION,
    name=HYMENOPTERA_DATA_NAME,
)

## create data asset if it doesn't already exist:
try:
    data_asset = ml_client.data.get(name=HYMENOPTERA_DATA_NAME, version=DATA_VERSION)
    print(
        f"Data asset already exists. Name: {my_data.name}, version: {my_data.version}"
    )
except:
    ml_client.data.create_or_update(my_data)
    data_asset = ml_client.data.get(name=HYMENOPTERA_DATA_NAME)
    print(f"Data asset created. Name: {my_data.name}, version: {my_data.version}")

Data asset already exists. Name: hymenoptera, version: None


## Create GPU cluster

In [6]:
create_or_update = False

if GPU_NAME in [com.name for com in ml_client.compute.list()]:
    print(
        f"You already have a cluster named {GPU_NAME}, we'll check whether its attributes match your specifications."
    )
    compute_target = ml_client.compute.get(GPU_NAME)

     # Check if the attributes of the existing compute match the specifications
    differences = []
    if compute_target.type != GPU_TYPE:
        differences.append("type")
    if compute_target.size.upper() != GPU_SIZE.upper():
        differences.append("size")
    if compute_target.min_instances != int(GPU_MIN_INSTANCES):
        differences.append("min_instances")
    if compute_target.max_instances != int(GPU_MAX_INSTANCES):
        differences.append("max_instances")
    if compute_target.idle_time_before_scale_down != float(GPU_IDLE_TIME):
        differences.append("idle_time_before_scale_down")
    if {"low_priority": "LowPriority", "dedicated": "Dedicated"}.get(compute_target.tier) != GPU_TIER:
        differences.append("tier")
        print({"low_priority": "LowPriority", "dedicated": "Dedicated"}.get(compute_target.tier), type({"low_priority": "LowPriority", "dedicated": "Dedicated"}.get(compute_target.tier)), GPU_TIER)

    # Print the differences, if any
    if differences:
        print(f"The following attributes of compute target are different from your specifications: {', '.join(differences)}")
        create_or_update = True
    else:
        print("All attributes of compute_target match the specifications.")
else:
    create_or_update = True

if create_or_update:
    user_input = input("-> Are you sure you want to create/update this Compute? [yes| no]: ")
    print(f"-> Are you sure you want to create/update this Compute? [yes| no]: {user_input.lower()}")
    
    if user_input.upper() == "YES":
        print("Creating/Updating compute target...")
        compute_target = AmlCompute(
            name=GPU_NAME,
            type=GPU_TYPE,
            size=GPU_SIZE,
            min_instances=GPU_MIN_INSTANCES,
            max_instances=GPU_MAX_INSTANCES,
            idle_time_before_scale_down=GPU_IDLE_TIME,
            tier=GPU_TIER,
        )
        compute_target = ml_client.begin_create_or_update(compute_target)
        print(f"AMLCompute with name {compute_target.name} is created/updated, the compute size is {compute_target.size}")
    else:
        print("No compute target created/updated.")

You already have a cluster named a100, we'll check whether its attributes match your specifications.
All attributes of compute_target match the specifications.


In [7]:
# configure the command job
job = command(
    inputs=dict(
        # uri_file refers to a specific file as a data asset
        data=Input(
            path=data_asset.id,
            type=AssetTypes.URI_FOLDER,
            mode=InputOutputModes.RO_MOUNT
        ),
    ),
    code="./src/",  # location of source code
    # The inputs/outputs are accessible in the command via the ${{ ... }} notation
    command="python hymenoptera.py --data ${{inputs.data}}",
    # This is the ready-made environment you are using
    environment="AzureML-ACPT-pytorch-1.13-py38-cuda11.7-gpu@latest",
    # This is the compute you created earlier. You can alternatively remove this line to use serverless compute to run the job
    compute=GPU_NAME,
    # An experiment is a container for all the iterations one does on a certain project. All the jobs submitted under the same experiment name would be listed next to each other in Azure ML studio.
    experiment_name=EXPERIMENT_NAME,
    display_name=EXPERIMENT_DISPLAY_NAME,
)

In [8]:
ml_client.create_or_update(job)

Class AutoDeleteSettingSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class AutoDeleteConditionSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class BaseAutoDeleteSettingSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class IntellectualPropertySchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class ProtectionLevelSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class BaseIntellectualPropertySchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
[32mUploading src (0.01 MBs): 100%|##

Experiment,Name,Type,Status,Details Page
hymenoptrea_classification,cool_pea_hvwl5sdpw3,command,Starting,Link to Azure Machine Learning studio
