In [1]:
%cd ..

c:\Workspace\PracticeProjects\aml


# Azure Machine Learning - Getting Started

##### IMPORTS

In [2]:
from azure.ai.ml import MLClient, command, Input
from azure.ai.ml.constants import AssetTypes, InputOutputModes
from azure.ai.ml.entities import AmlCompute, Data, Environment
from azure.identity import DefaultAzureCredential

from aml.settings import *

Look [here](https://github.com/Azure/azureml-examples/tree/main) for way more information.

## Connect to a Workspace

In [3]:
credential = DefaultAzureCredential()

# Get a handle to the workspace. You can find the info on the workspace tab on ml.azure.com
ml_client = MLClient(
    credential=credential,
    subscription_id=SUBSCRIPTION_ID,  # this will look like xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
    resource_group_name=RESOURCE_GROUP,
    workspace_name=WORKSPACE_NAME,
)

## Create Compute

In [4]:
try:
    # let's see if the compute target already exists
    gpu_cluster = ml_client.compute.get(COMPUTE_NAME)
    print(
        f"You already have a cluster named {COMPUTE_NAME}, we'll reuse it as is."
    )

except Exception:
    print("Creating a new gpu compute target...")

    gpu_cluster = AmlCompute(
        name=COMPUTE_NAME,
        type=COMPUTE_TYPE,
        size=COMPUTE_SIZE,
        min_instances=COMPUTE_MIN_INSTANCES,
        max_instances=COMPUTE_MAX_INSTANCES,
        idle_time_before_scale_down=COMPUTE_IDLE_TIME,
        tier=COMPUTE_TIER,
    )

    gpu_cluster = ml_client.begin_create_or_update(gpu_cluster)

print(
    f"AMLCompute with name {gpu_cluster.name} is created, the compute size is {gpu_cluster.size}"
)

You already have a cluster named a100, we'll reuse it as is.
AMLCompute with name a100 is created, the compute size is STANDARD_NC24ADS_A100_V4


## Create an Environment

More information [here](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-manage-environments-v2?view=azureml-api-2&tabs=python)

In [5]:
pipeline_job_env = Environment(
    name=ENVIRONMENT_NAME,
    description=ENVIRONMENT_DESCRIPTION,
    tags={"scikit-learn": "1.0.0"},
    conda_file=ENVIRONMENT_YAML,
    image=ENVIRONMENT_IMAGE,
)
pipeline_job_env = ml_client.environments.create_or_update(pipeline_job_env)

print(
    f"Environment with name {pipeline_job_env.name} is registered to workspace, the environment version is {pipeline_job_env.version}"
)

Environment with name aml-scikit-learn is registered to workspace, the environment version is 7


## Create a Data asset

Make sure your data is uploaded as a Data asset. Create an asset of the right type.

More information [here](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-create-data-assets?view=azureml-api-2&tabs=python)

In [6]:
my_data = Data(
    name=DATA_NAME,
    description=DATA_DESCRIPTION,
    # TODO: update the version number if you want to create a new version of the data asset
    version=DATA_VERSION,
    path=DATA_PATH,
    type=AssetTypes.URI_FILE,
)

## create data asset if it doesn't already exist:
try:
    data_asset = ml_client.data.get(name=DATA_NAME, version=DATA_VERSION)
    print(
        f"Data asset already exists. Name: {my_data.name}, version: {my_data.version}"
    )
except:
    ml_client.data.create_or_update(my_data)
    print(f"Data asset created. Name: {my_data.name}, version: {my_data.version}")

Data asset already exists. Name: credit-card, version: 1


In [7]:
# configure the command job
job = command(
    inputs=dict(
        # uri_file refers to a specific file as a data asset
        data=Input(path=data_asset.id,
              type=AssetTypes.URI_FILE,
              mode=InputOutputModes.RO_MOUNT
              ),
        test_train_ratio=JOB_TEST_TRAIN_RATIO,  # input variable in main.py
        learning_rate=JOB_LEARNING_RATE,  # input variable in main.py
        registered_model_name=MODEL_NAME,  # input variable in main.py
    ),
    code="./src/",  # location of source code
    # The inputs/outputs are accessible in the command via the ${{ ... }} notation
    command="python main.py --data ${{inputs.data}} --test_train_ratio ${{inputs.test_train_ratio}} --learning_rate ${{inputs.learning_rate}} --registered_model_name ${{inputs.registered_model_name}}",
    # This is the ready-made environment you are using
    environment=f"{ENVIRONMENT_NAME}@latest",
    #  environment="AzureML-sklearn-1.0-ubuntu20.04-py38-cpu@latest",
    # This is the compute you created earlier. You can alternatively remove this line to use serverless compute to run the job
    compute=COMPUTE_NAME,
    # An experiment is a container for all the iterations one does on a certain project. All the jobs submitted under the same experiment name would be listed next to each other in Azure ML studio.
    experiment_name=EXPERIMENT_NAME,
    display_name=EXPERIMENT_DISPLAY_NAME,
)

In [8]:
ml_client.create_or_update(job)