# Chapter 09 code snippets
This notebook contains all code snippets from chapter 9.

## Hyperparameter tuning using HyperDrive

In [None]:
from azureml.core import (
    Workspace, Environment
)
from azureml.core.conda_dependencies import \
     CondaDependencies 
import sklearn

ws = Workspace.from_config()

diabetes_env = Environment(name="diabetes-training-env")
diabetes_env.python.conda_dependencies = \
     CondaDependencies.create(
      conda_packages=[
          f"scikit-learn=={sklearn.__version__}"],
      pip_packages=["azureml-core", 
          "azureml-dataset-runtime[pandas]"])

target = ws.compute_targets['cpu-sm-cluster']

In [None]:
from azureml.core import ScriptRunConfig

script = ScriptRunConfig(
    source_directory='diabetes-training',
    script='training.py',
    environment=diabetes_env,
    compute_target=target
)
# Note that you don't specify the --alpha argument.

In [None]:
from azureml.train.hyperdrive import HyperDriveConfig
from azureml.train.hyperdrive import (
    RandomParameterSampling, uniform, PrimaryMetricGoal
)

param_sampling = RandomParameterSampling({
        'alpha': uniform(0.00001, 0.1),
    }
)

hd_config = HyperDriveConfig(
                     run_config=script,                          
                     hyperparameter_sampling=param_sampling,
                     primary_metric_name="nrmse", 
                     primary_metric_goal=                   
                                 PrimaryMetricGoal.MINIMIZE,
                     max_total_runs=20,
                     max_concurrent_runs=4)

In [None]:
from azureml.core import Experiment

experiment = Experiment(ws, "chapter09-hyperdrive")
hyperdrive_run = experiment.submit(hd_config)

hyperdrive_run.wait_for_completion(show_output=True)

In [None]:
best_run = hyperdrive_run.get_best_run_by_primary_metric()
best_run_metrics = best_run.get_metrics(name='nrmse')
parameter_values = best_run.get_details()[
                        'runDefinition']['arguments']

print('Best Run Id: ', best_run.id)
print('- NRMSE:', best_run_metrics['nrmse'])
print('- alpha:', parameter_values[1])

### Using the early termination policy

In [None]:
from azureml.core import Workspace, ScriptRunConfig, Environment

ws = Workspace.from_config()
target = ws.compute_targets["cpu-sm-cluster"]

script = ScriptRunConfig(
    source_directory="termination-policy-training",
    script="training.py",
    environment=Environment.get(ws, "AzureML-Minimal"),
    compute_target=target,
)

In [None]:
from azureml.train.hyperdrive import (
    GridParameterSampling,    
    choice,
    MedianStoppingPolicy,
    HyperDriveConfig,
    PrimaryMetricGoal
)
param_sampling = GridParameterSampling(
    {
        "a": choice(1, 2, 3, 4),
        "b": choice(1, 2, 3, 4),
    }
)

early_termination_policy = MedianStoppingPolicy(
    evaluation_interval=1, delay_evaluation=5
)

# More aggressive alternative
# from azureml.train.hyperdrive import TruncationSelectionPolicy
# early_termination_policy = TruncationSelectionPolicy(
#    truncation_percentage=50, evaluation_interval=1
#)

hd_config = HyperDriveConfig(
    policy=early_termination_policy,
    run_config=script,
    hyperparameter_sampling=param_sampling,
    primary_metric_name="fake_metric",
    primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
    max_total_runs=50,
    max_concurrent_runs=4
)

In [None]:
from azureml.core import Experiment
experiment = Experiment(ws, "chapter09-hyperdrive")
hyperdrive_run = experiment.submit(hd_config)

hyperdrive_run.wait_for_completion(show_output=True)

## Running AutoML experiments via code

In [None]:
from azureml.core import Workspace, Dataset
from azureml.train.automl import AutoMLConfig

ws = Workspace.from_config()
compute_target = ws.compute_targets["cpu-sm-cluster"]

diabetes_dataset = Dataset.get_by_name(workspace=ws, name='diabetes')

train_ds,validate_ds = diabetes_dataset.random_split(percentage=0.8, seed=1337)

print(f"Train: {len(train_ds.to_pandas_dataframe().axes[0])} rows")
print(f"Validate: {len(validate_ds.to_pandas_dataframe().axes[0])} rows")

experiment_config = AutoMLConfig(
    task = "regression",
    primary_metric = 'normalized_root_mean_squared_error',
    training_data = train_ds,
    label_column_name = "target",
    validation_data = validate_ds,
    compute_target = compute_target,
    experiment_timeout_hours = 0.25,
    iterations = 4
)

In [None]:
from azureml.core.experiment import Experiment

my_experiment = Experiment(ws, 'chapter09-automl-experiment')
run = my_experiment.submit(experiment_config, show_output=True)

In [None]:
# To retrieve a previous run 
# from azureml.train.automl.run import AutoMLRun
# run = AutoMLRun(my_experiment, run_id = 'AutoML_80833402-6e7e-4c25-b6aa-b6fd44d75d09')

# Get best run and model
best_run, best_model = run.get_output()
# Or with index
best_run = run.get_output()[0]
best_model = run.get_output()[1]


In [None]:
# Scaling/normalization and model algorythm
best_model.steps

In [None]:
# Automated feature engineering
# Use 'timeseriestransformer' for task='forecasting',
# else use 'datatransformer' for 'regression' or 'classification' task.
print(best_model.named_steps['datatransformer'] \
                 .get_featurization_summary())
feature_names=best_model.named_steps['datatransformer'] \
                 .get_engineered_feature_names()
print("Engineered feature names:")
print(feature_names)