# Remote execution on compute cluster

In [5]:
from azure.identity import DefaultAzureCredential
from azure.ai.ml import MLClient


credential = DefaultAzureCredential()
ml_client = None
try:
    ml_client = MLClient.from_config(credential)
except Exception as ex:
    print(ex)
    # Enter details of your AML workspace
    subscription_id = "<Subscription ID>"
    resource_group = "<ResourceGroup Name>"
    workspace = "<<WorkspaceName>"
    ml_client = MLClient(credential, subscription_id, resource_group, workspace)
print(ml_client)

We could not find config.json in: . or in its parent directories. 
MLClient(credential=<azure.identity._credentials.default.DefaultAzureCredential object at 0x7f5a2d62f610>,
         subscription_id=25758354-2fe7-426a-be0e-1ad20058340d,
         resource_group_name=azureml-live,
         workspace_name=ftaliveazureml)


In [18]:
#Get the compute target
cluster_name = 'cpu-cluster'
compute_target = ml_client.compute.get(name=cluster_name)

#### Command Job

In [11]:
#Command Job with no input data parameter
from azure.ai.ml import command, Input

diabetes_train_cmd = command(
    code="./030_scripts",  # local path where the code is stored
    command="python sklearn_vanilla_train.py --alpha ${{inputs.alpha}}",
    environment="AzureML-sklearn-0.24-ubuntu18.04-py37-cpu@latest",
    compute=compute_target.name,
    display_name="sklearn-diabetes-train-vanilla",
    inputs={
        "alpha": 0.01
    },
    
    # description,
    # experiment_name
)

command()

# submit the command job
command_job = ml_client.jobs.create_or_update(diabetes_train_cmd, experiment_name="remote-sklearn-diabetes")

#### Custom Environment

In [12]:
#Custom Environment
from azure.ai.ml.entities import Environment, BuildContext

diabetes_env = Environment(image="mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04",
    conda_file="030_scripts/conda_env.yml",
    name="diabetes-env",
    description="Environment created from a Docker image plus Conda environment.",
)

In [13]:
#Command Job with custom environment

env_command_job = command(
    code="./030_scripts",  # local path where the code is stored
    command="python sklearn_vanilla_train.py --alpha ${{inputs.alpha}}",
    environment=diabetes_env,
    compute=compute_target.name,
    display_name="sklearn-diabetes-custom-env",
    inputs={
        "alpha": 0.01
    },
    
    # description,
    # experiment_name
)

custom_env_cmd_job = ml_client.jobs.create_or_update(env_command_job, experiment_name="remote-sklearn-diabetes")

In [14]:
#Register the environment
ml_client.environments.create_or_update(diabetes_env)

Environment({'is_anonymous': False, 'auto_increment_version': False, 'name': 'diabetes-env', 'description': 'Environment created from a Docker image plus Conda environment.', 'tags': {}, 'properties': {}, 'id': '/subscriptions/25758354-2fe7-426a-be0e-1ad20058340d/resourceGroups/azureml-live/providers/Microsoft.MachineLearningServices/workspaces/ftaliveazureml/environments/diabetes-env/versions/2022-06-22-08-37-41-8372981', 'base_path': './', 'creation_context': <azure.ai.ml._restclient.v2022_05_01.models._models_py3.SystemData object at 0x7f5a2cce5cd0>, 'serialize': <msrest.serialization.Serializer object at 0x7f5a2cccfaf0>, 'version': '2022-06-22-08-37-41-8372981', 'latest_version': None, 'conda_file': OrderedDict([('channels', ['conda-forge']), ('dependencies', ['python=3.8', 'pip=21.2.4', OrderedDict([('pip', ['numpy==1.21.2', 'scipy==1.7.1', 'pandas==1.3.0', 'scikit-learn==0.24.2', 'adlfs==2021.9.1', 'fsspec==2021.8.1', 'xgboost==1.4.2', 'lightgbm==3.2.1', 'mlflow', 'azureml-mlflow

#### Consuming Registered Datasets in Command Jobs

In [15]:
##Passing Data to command Jobs

file_input = Input(type="uri_file",
            path="https://azuremlexamples.blob.core.windows.net/datasets/diabetes.csv")

asset_input = Input(type = "uri_file",
            path= "azureml:diabetes-dataset-uri-file:3",
            mode= "download")

mltable_input = Input( type = "mltable",
            path= "azureml:diabetes-dataset-mltable2:1",
            mode= "download")


# create the command
data_command_job = command(
    code="./030_scripts",  # local path where the code is stored
    command="python sklearn_data_train.py --diabetes-csv ${{inputs.diabetes}}",
    inputs={
        "diabetes": asset_input
    },
    environment="AzureML-sklearn-0.24-ubuntu18.04-py37-cpu@latest",
    compute=compute_target.name,
    display_name="sklearn-diabetes-InputData",
    # description,
    # experiment_name
)

ml_client.jobs.create_or_update(data_command_job, experiment_name="remote-sklearn-diabetes")

Experiment,Name,Type,Status,Details Page
remote-sklearn-diabetes,loving_brain_k0ckqlz450,command,Starting,Link to Azure Machine Learning studio


#### HyperParameter Sweeping

In [17]:
#HyperParameter Sweeping
from azure.ai.ml.sweep import Choice, Uniform, MedianStoppingPolicy

sweep_command_job = diabetes_train_cmd(alpha = Uniform(min_value=0.01, max_value=0.2))

# apply the sweep parameter to obtain the sweep_job
sweep_job = sweep_command_job.sweep(
    compute="cpu-cluster",
    sampling_algorithm="random",
    primary_metric="training_rmse",
    goal="Minimize",
)
# submit the sweep
#returned_sweep_job = ml_client.jobs.create_or_update(sweep_job, experiment_name="remote-sklearn-diabetes")





Refer for more on Job execution using the SDK: https://github.com/Azure/azureml-examples/tree/main/sdk/jobs/single-step