## Scaling the training process with compute clusters

In [None]:
from azureml.core import Workspace, Experiment
from azureml.core import ScriptRunConfig

ws = Workspace.from_config()
target = ws.compute_targets['akt-compute-cluster']

script = ScriptRunConfig(
    source_directory='greeter-job',
    script='greeter.py',
    compute_target=target,
    arguments=['--greet-name', 'packt']
)

exp = Experiment(ws, 'greet-packt')
run = exp.submit(script)
print(run.get_portal_url())
run.wait_for_completion(show_output=True)

### Exploring the outputs and logs of a run

In [None]:
from azureml.widgets import RunDetails

run = exp.submit(script)
RunDetails(run).show()

In [None]:
run.get_details_with_logs()

### Training the diabetes model on a compute cluster

In [1]:
import sklearn
print(sklearn.__version__)

0.22.1


In [3]:
from azureml.core import Environment
from azureml.core.conda_dependencies import CondaDependencies 
import sklearn

diabetes_env = Environment(name="diabetes-training-env")
diabetes_env.python.conda_dependencies = CondaDependencies.create(conda_packages=["scikit-learn"],pip_packages=["azureml-dataprep[pandas]","joblib","azureml-core","numpy","pandas"])



In [18]:
from azureml.core import Workspace, Experiment
from azureml.core import ScriptRunConfig

ws = Workspace.from_config()
target = ws.compute_targets['akt-compute-cluster']

script = ScriptRunConfig(
    source_directory='diabetes-training',
    script='training.py',
    environment=diabetes_env,
    compute_target=target,

)

exp = Experiment(ws, 'diabetes_cluster_execution')
run = exp.submit(script)
run.wait_for_completion(show_output=True)

RunId: diabetes_cluster_execution_1650541503_e4d3bfc2
Web View: https://ml.azure.com/runs/diabetes_cluster_execution_1650541503_e4d3bfc2?wsid=/subscriptions/a8b508b6-da16-4c45-84f5-cac5c9f57513/resourcegroups/azure-mlops/workspaces/prod-dev-ml-ws&tid=e648628f-f65c-40cc-8a28-c601daf26a89

Execution Summary
RunId: diabetes_cluster_execution_1650541503_e4d3bfc2
Web View: https://ml.azure.com/runs/diabetes_cluster_execution_1650541503_e4d3bfc2?wsid=/subscriptions/a8b508b6-da16-4c45-84f5-cac5c9f57513/resourcegroups/azure-mlops/workspaces/prod-dev-ml-ws&tid=e648628f-f65c-40cc-8a28-c601daf26a89



{'runId': 'diabetes_cluster_execution_1650541503_e4d3bfc2',
 'target': 'akt-compute-cluster',
 'status': 'Completed',
 'startTimeUtc': '2022-04-21T11:45:13.74319Z',
 'endTimeUtc': '2022-04-21T11:45:32.766829Z',
 'services': {},
 'properties': {'_azureml.ComputeTargetType': 'amlctrain',
  'ContentSnapshotId': '4e7a4386-db7e-472a-a9b2-9e6dfcdc287b',
  'ProcessInfoFile': 'azureml-logs/process_info.json',
  'ProcessStatusFile': 'azureml-logs/process_status.json',
  'azureml.git.repository_uri': 'https://github.com/Aniketthani/Azure-Remote-Script-Execution-on-Clusters.git',
  'mlflow.source.git.repoURL': 'https://github.com/Aniketthani/Azure-Remote-Script-Execution-on-Clusters.git',
  'azureml.git.branch': 'main',
  'mlflow.source.git.branch': 'main',
  'azureml.git.commit': '691153376c1059f55f864b03080b586ad1da6cda',
  'mlflow.source.git.commit': '691153376c1059f55f864b03080b586ad1da6cda',
  'azureml.git.dirty': 'True'},
 'inputDatasets': [{'dataset': {'id': '76c6f425-68e1-4d97-adfa-47593f