# Remote execution on compute cluster

In [None]:
from azureml.core import Workspace

ws = Workspace.from_config()
target = ws.compute_targets["cpu-cluster"]

In [None]:
from azureml.core import ScriptRunConfig

script = ScriptRunConfig(
    source_directory="030_scripts",
    script="sklearn_vanilla_train.py",
    compute_target=target,
    environment=ws.environments["AzureML-sklearn-0.24-ubuntu18.04-py37-cpu"],
    arguments=["--alpha", 0.01],
)

In [None]:
from azureml.core import Experiment

exp = Experiment(ws, "remote-script-execution")
run = exp.submit(script)
run.wait_for_completion(show_output=True)

## Custom environment

In [None]:
from azureml.core import Environment
from azureml.core.conda_dependencies import CondaDependencies
import sklearn

diabetes_env = Environment(name="diabetes-training-env")
diabetes_env.python.conda_dependencies = CondaDependencies.create(
    conda_packages=[
        f"scikit-learn=={sklearn.__version__}",
        "mlflow",
    ],
    pip_packages=["azureml-defaults", "azureml-mlflow", "azureml-dataprep[pandas]"],
)
# Or if you had a yml conda file
# diabetes_env = Environment.from_conda_specification(
#                            name = "diabetes-training-env",
#                            file_path = "diabetes-conda.yml")
# Or even from Docker file
# https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.environment.environment?view=azure-ml-py#from-dockerfile-name--dockerfile--conda-specification-none--pip-requirements-none-

In [None]:
diabetes_env.environment_variables["MY_VAR"] = "Hello from environment"

In [None]:
script = ScriptRunConfig(
    source_directory="030_scripts",
    script="sklearn_vanilla_train.py",
    compute_target=target,
    environment=diabetes_env,
    arguments=["--alpha", 0.01],
)

exp = Experiment(ws, "remote-script-execution")
run = exp.submit(script)
# First time you will see 20_image_build_log.txt.
# The image will be stored in the container registry and will
# be reused in follow up calls.
run.wait_for_completion(show_output=True)

In [None]:
# Optionally, register the environment
diabetes_env.register(ws)

## Consuming datasets

In [None]:
from azureml.core import Dataset

dataset = Dataset.get_by_name(ws, name="diabetes-tabular")

In [None]:
from azureml.core import ScriptRunConfig

script = ScriptRunConfig(
    source_directory="030_scripts",
    script="train_with_azureml_workspace.py",
    compute_target=target,
    environment=diabetes_env,
    arguments=["--alpha", 0.01, dataset.as_named_input("diabetes_dataset")],
)

In [None]:
from azureml.core import Experiment

exp = Experiment(ws, "remote-script-execution")
run = exp.submit(script)
# You shouldn't see the 20_image_build_log.txt this time
run.wait_for_completion(show_output=True)

## Hyper parameter tuning

In [None]:
# Note that we don't pass arguments
script = ScriptRunConfig(
    source_directory="030_scripts",
    script="sklearn_vanilla_train.py",
    compute_target=target,
    environment=diabetes_env,
)

In [None]:
from azureml.train.hyperdrive import HyperDriveConfig
from azureml.train.hyperdrive import RandomParameterSampling, uniform, PrimaryMetricGoal

param_sampling = RandomParameterSampling(
    {
        "alpha": uniform(0.00001, 0.1),
    }
)

hd_config = HyperDriveConfig(
    run_config=script,
    hyperparameter_sampling=param_sampling,
    primary_metric_name="training_rmse",
    primary_metric_goal=PrimaryMetricGoal.MINIMIZE,
    max_total_runs=20,
    max_concurrent_runs=2,
)

In [None]:
experiment = Experiment(ws, "hyperdrive-experiment")
hyperdrive_run = experiment.submit(hd_config)

hyperdrive_run.wait_for_completion(show_output=True)