# Hyperparameter Tuning using HyperDrive

TODO: Import Dependencies. In the cell below, import all the dependencies that you will need to complete the project.

In [None]:
from azureml.core import Workspace, Experiment
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform, choice
import os, shutil

# Check core SDK version number
print("SDK version:", azureml.core.VERSION)

## Dataset

TODO: Get data. In the cell below, write code to access the data you will be using in this project. Remember that the dataset needs to be external.

In [None]:
ws = Workspace.from_config()

# choose a name for experiment
experiment_name = 'heart-failure-experiment'
experiment = Experiment(ws, experiment_name)

print('Workspace name: ' + ws.name, 'Azure region: ' + ws.location,
      'Subscription id: ' + ws.subscription_id, 'Resource group: ' + ws.resource_group, sep = '\n')

run = experiment.start_logging()

In [None]:
# choose a name for your cluster
# Compute name should contain only letters, digits, hyphen and should be 2-16 charachters long
cluster_name = "hd-cluster"

try:
    compute_target = ComputeTarget(workspace=ws, name=cluster_name)
    print(f'{cluster_name} exists already')
except ComputeTargetException:
    print('Creating a new compute target...')
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2', max_nodes=4)
    
    # create the cluster
    compute_target = ComputeTarget.create(ws, cluster_name, compute_config)
    
    compute_target.wait_for_completion(show_output=True)
    
compute_targets = ws.compute_targets
for name, ct in compute_targets.items():
    print(name, ct.type, ct.provisioning_state)

## Hyperdrive Configuration

TODO: Explain the model you are using and the reason for chosing the different hyperparameters, termination policy and config settings.

In [None]:
# TODO: Create an early termination policy. This is not required if you are using Bayesian sampling.
# Specify a Policy
early_termination_policy = BanditPolicy(evaluation_interval=2, delay_evaluation=5, slack_factor=0.1)

#TODO: Create the different params that you will be using during training
# Specify parameter sampler
param_sampling = RandomParameterSampling( {
        "--C": uniform(0.08, 0.1),
        "--max_iter": choice(25, 50, 100, 200)
    }
)

#param_sampling = RandomParameterSampling( {
    "--C": choice(0.5, 1, 1.5),
    "--coef0": choice(0,1,2,3)
    }
)

#param_sampling = RandomParameterSampling( {
    "--kernel": choice('linear', 'rbf', 'poly', 'sigmoid'),
    "--C": choice(0.5, 1, 1.5)
    }
)

if "training" not in os.listdir():
    os.mkdir("./training")

shutil.copy('train.py', 'training')

#TODO: Create your estimator and hyperdrive config
estimator = SKLearn(source_directory="./",
                      entry_script='train.py',
                      compute_target=compute_target)
               
# Create a HyperDriveConfig using the estimator, hyperparameter sampler, and policy.
hyperdrive_run_config = HyperDriveConfig(estimator=estimator,
                                     hyperparameter_sampling=param_sampling,
                                     policy=early_termination_policy,
                                     primary_metric_name='AUC_weighted',
                                     primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                                     max_total_runs=20,
                                     max_concurrent_runs=4)

In [None]:
#TODO: Submit your experiment
hyperdrive_run = experiment.submit(config=hyperdrive_config)

## Run Details

OPTIONAL: Write about the different models trained and their performance. Why do you think some models did better than others?

TODO: In the cell below, use the `RunDetails` widget to show the different experiments.

In [None]:
RunDetails(hyperdrive_run).show()
hyperdrive_run.wait_for_completion(show_output=True)

## Best Model

TODO: In the cell below, get the best model from the hyperdrive experiments and display all the properties of the model.

In [None]:
best_run = hyperDrive_run.get_best_run_by_primary_metric()
best_run_metrics = best_run.get_metrics()

print('Best Run Id: ', best_run.id)
#print(best_run_metrics)
print('\n Accuracy:', best_run_metrics['Accuracy'])
print('\n Regularization Strength:',best_run_metrics['Regularization Strength])
print('\n Max Iterations:',best_run_metrics['Max iterations])

In [None]:
print(best_run.get_file_names())

In [None]:
# Register the model
model = best_run.register_model(model_path='outputs/hyperdrive_model.pkl', model_name='hyperdrive_model',
                   tags={'Training context':'Parameterized SKLearn Estimator', 'type': 'Classification'},
                   properties={'AUC_weighted': best_run_metrics['AUC_weighted']},
                   description = 'Heart Failure Predictor')
model

In [None]:
# Register the model
model = best_run.register_model(model_path='outputs/hyperdrive_model.joblib', model_name='hyperdrive_model',
                   tags={'Training context':'Parameterized SKLearn Estimator', 'type': 'Classification'},
                   properties={'AUC_weighted': best_run_metrics['AUC_weighted']},
                   description = 'Heart Failure Predictor')
model

In [None]:
#compute_target.delete()