## Scaling the training process with compute clusters

In [1]:
from azureml.core import Workspace, Experiment
from azureml.core import ScriptRunConfig

ws = Workspace.from_config()
target = ws.compute_targets['akt-cluster']

script = ScriptRunConfig(
    source_directory='greeter-job',
    script='greeter.py',
    compute_target=target,
    arguments=['--greet-name', 'Henry']
)

exp = Experiment(ws, 'greet-run')
run = exp.submit(script)
print(run.get_portal_url())
run.wait_for_completion(show_output=True)

https://ml.azure.com/runs/greet-run_1650543543_439eb114?wsid=/subscriptions/a8b508b6-da16-4c45-84f5-cac5c9f57513/resourcegroups/azure-mlops/workspaces/prod-dev-ml-ws&tid=e648628f-f65c-40cc-8a28-c601daf26a89
RunId: greet-run_1650543543_439eb114
Web View: https://ml.azure.com/runs/greet-run_1650543543_439eb114?wsid=/subscriptions/a8b508b6-da16-4c45-84f5-cac5c9f57513/resourcegroups/azure-mlops/workspaces/prod-dev-ml-ws&tid=e648628f-f65c-40cc-8a28-c601daf26a89

Streaming azureml-logs/20_image_build_log.txt

2022/04/21 12:19:10 Downloading source code...
2022/04/21 12:19:11 Finished downloading source code
2022/04/21 12:19:11 Creating Docker network: acb_default_network, driver: 'bridge'
2022/04/21 12:19:11 Successfully set up Docker network: acb_default_network
2022/04/21 12:19:11 Setting up Docker configuration...
2022/04/21 12:19:12 Successfully set up Docker configuration
2022/04/21 12:19:12 Logging in to registry: proddevmlwscontainerreg.azurecr.io
2022/04/21 12:19:13 Successfully logg

{'runId': 'greet-run_1650543543_439eb114',
 'target': 'akt-cluster',
 'status': 'Completed',
 'startTimeUtc': '2022-04-21T12:22:48.995024Z',
 'endTimeUtc': '2022-04-21T12:23:31.872237Z',
 'services': {},
 'properties': {'_azureml.ComputeTargetType': 'amlctrain',
  'ContentSnapshotId': '1c05325e-0da4-4dd2-a358-ba3796e057cc',
  'azureml.git.repository_uri': 'https://github.com/Aniketthani/Azure-Remote-Script-Execution-on-Clusters.git',
  'mlflow.source.git.repoURL': 'https://github.com/Aniketthani/Azure-Remote-Script-Execution-on-Clusters.git',
  'azureml.git.branch': 'main',
  'mlflow.source.git.branch': 'main',
  'azureml.git.commit': '6a9e584a6a9f57f3a693b331ee5df39f4387bfa2',
  'mlflow.source.git.commit': '6a9e584a6a9f57f3a693b331ee5df39f4387bfa2',
  'azureml.git.dirty': 'False',
  'ProcessInfoFile': 'azureml-logs/process_info.json',
  'ProcessStatusFile': 'azureml-logs/process_status.json'},
 'inputDatasets': [],
 'outputDatasets': [],
 'runDefinition': {'script': 'greeter.py',
  'c

### Exploring the outputs and logs of a run

In [2]:
from azureml.widgets import RunDetails

run = exp.submit(script)
RunDetails(run).show()

_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', '…

In [3]:
run.get_details_with_logs()

{'runId': 'greet-run_1650543902_1f9b0721',
 'target': 'akt-cluster',
 'status': 'Completed',
 'startTimeUtc': '2022-04-21T12:25:11.350619Z',
 'endTimeUtc': '2022-04-21T12:25:22.378989Z',
 'services': {},
 'properties': {'_azureml.ComputeTargetType': 'amlctrain',
  'ContentSnapshotId': '1c05325e-0da4-4dd2-a358-ba3796e057cc',
  'azureml.git.repository_uri': 'https://github.com/Aniketthani/Azure-Remote-Script-Execution-on-Clusters.git',
  'mlflow.source.git.repoURL': 'https://github.com/Aniketthani/Azure-Remote-Script-Execution-on-Clusters.git',
  'azureml.git.branch': 'main',
  'mlflow.source.git.branch': 'main',
  'azureml.git.commit': '6a9e584a6a9f57f3a693b331ee5df39f4387bfa2',
  'mlflow.source.git.commit': '6a9e584a6a9f57f3a693b331ee5df39f4387bfa2',
  'azureml.git.dirty': 'False',
  'ProcessInfoFile': 'azureml-logs/process_info.json',
  'ProcessStatusFile': 'azureml-logs/process_status.json'},
 'inputDatasets': [],
 'outputDatasets': [],
 'runDefinition': {'script': 'greeter.py',
  'c

### Training the diabetes model on a compute cluster

In [16]:
from azureml.core import Dataset
from azureml.core import Workspace
ws=Workspace.from_config()

In [12]:
from azureml.core import Datastore

In [13]:
dstore=ws.get_default_datastore()

In [15]:
dstore.upload_files(files=["data/diabetes.csv"],target_path="/diabetes_data",overwrite=True,show_progress=True)

Uploading an estimated of 1 files
Uploading data/diabetes.csv
Uploaded data/diabetes.csv, 1 files out of an estimated total of 1
Uploaded 1 files


$AZUREML_DATAREFERENCE_c4746e11a0e0484c82c8a0d55562d047

In [17]:
file_paths=[
    (dstore,"/diabetes_data"),
]

dataset=Dataset.Tabular.from_delimited_files(path=file_paths)

In [19]:
dataset.register(ws,name="diabetes",description="diabetes data")

{
  "source": [
    "('workspaceblobstore', '/diabetes_data')"
  ],
  "definition": [
    "GetDatastoreFiles",
    "ParseDelimited",
    "DropColumns",
    "SetColumnTypes"
  ],
  "registration": {
    "id": "5f539135-26c4-4cf8-bda9-27822f3b322a",
    "name": "diabetes",
    "version": 1,
    "description": "diabetes data",
    "workspace": "Workspace.create(name='prod-dev-ml-ws', subscription_id='a8b508b6-da16-4c45-84f5-cac5c9f57513', resource_group='azure-mlops')"
  }
}

In [20]:
import sklearn
print(sklearn.__version__)

0.22.1


In [21]:
from azureml.core import Environment
from azureml.core.conda_dependencies import CondaDependencies 
import sklearn

diabetes_env = Environment(name="diabetes-training-env")
diabetes_env.python.conda_dependencies = CondaDependencies.create(conda_packages=["scikit-learn"],pip_packages=["azureml-dataprep[pandas]","joblib","azureml-core","numpy","pandas"])



In [22]:
from azureml.core import Workspace, Experiment
from azureml.core import ScriptRunConfig

ws = Workspace.from_config()
target = ws.compute_targets['akt-cluster']

script = ScriptRunConfig(
    source_directory='diabetes-training',
    script='training.py',
    environment=diabetes_env,
    compute_target=target,

)

exp = Experiment(ws, 'diabetes_cluster_execution')
run = exp.submit(script)
run.wait_for_completion(show_output=True)

RunId: diabetes_cluster_execution_1650546838_8d8c1f30
Web View: https://ml.azure.com/runs/diabetes_cluster_execution_1650546838_8d8c1f30?wsid=/subscriptions/a8b508b6-da16-4c45-84f5-cac5c9f57513/resourcegroups/azure-mlops/workspaces/prod-dev-ml-ws&tid=e648628f-f65c-40cc-8a28-c601daf26a89

Execution Summary
RunId: diabetes_cluster_execution_1650546838_8d8c1f30
Web View: https://ml.azure.com/runs/diabetes_cluster_execution_1650546838_8d8c1f30?wsid=/subscriptions/a8b508b6-da16-4c45-84f5-cac5c9f57513/resourcegroups/azure-mlops/workspaces/prod-dev-ml-ws&tid=e648628f-f65c-40cc-8a28-c601daf26a89



{'runId': 'diabetes_cluster_execution_1650546838_8d8c1f30',
 'target': 'akt-cluster',
 'status': 'Completed',
 'startTimeUtc': '2022-04-21T13:14:08.723412Z',
 'endTimeUtc': '2022-04-21T13:14:28.602853Z',
 'services': {},
 'properties': {'_azureml.ComputeTargetType': 'amlctrain',
  'ContentSnapshotId': '8dab90e4-e2b7-4e12-a8e8-729178731ee8',
  'azureml.git.repository_uri': 'https://github.com/Aniketthani/Azure-Remote-Script-Execution-on-Clusters.git',
  'mlflow.source.git.repoURL': 'https://github.com/Aniketthani/Azure-Remote-Script-Execution-on-Clusters.git',
  'azureml.git.branch': 'main',
  'mlflow.source.git.branch': 'main',
  'azureml.git.commit': '6a9e584a6a9f57f3a693b331ee5df39f4387bfa2',
  'mlflow.source.git.commit': '6a9e584a6a9f57f3a693b331ee5df39f4387bfa2',
  'azureml.git.dirty': 'True',
  'ProcessInfoFile': 'azureml-logs/process_info.json',
  'ProcessStatusFile': 'azureml-logs/process_status.json'},
 'inputDatasets': [{'dataset': {'id': '5f539135-26c4-4cf8-bda9-27822f3b322a'