## 

In [17]:
# Load the workspace information from config.json using the Azure ML SDK
from azureml.core import Workspace

ws = Workspace.from_config()
ws.name

'DataScience'

In [18]:
# Next, grab our dataset from Azure. 
from azureml.core import Dataset

ds = Dataset.get_by_name(workspace=ws, name='NHL-Penalties-2020') # This is the version we registered in the last notebook
print(ds.name + ' v' + str(ds.version) + ': ' + ds.description)

# Display the data structure here for verification
ds.to_pandas_dataframe().head()

NHL-Penalties-2020 v6: A breakdown of penalty minutes per game matchup


Unnamed: 0,penaltyMinutes,type,homeTeam,awayTeam
0,12.0,R,Maple Leafs,Canadiens
1,12.0,R,Maple Leafs,Canadiens
2,76.0,R,Maple Leafs,Canadiens
3,12.0,R,Maple Leafs,Canadiens
4,10.0,R,Maple Leafs,Canadiens


In [19]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

# Now let's make sure we have a compute resource created
cluster_name = "Low-End-Compute-Cluster"
max_nodes = 4

# Fetch or create the compute resource
try:
    cpu_cluster = ComputeTarget(workspace=ws, name=cluster_name) # This will throw a ComputeTargetException if this doesn't exist
    print('Using existing compute: ' + cluster_name)
except ComputeTargetException:
    # Create the cluster
    print('Provisioning cluster...')
    compute_config = AmlCompute.provisioning_configuration(vm_size="Standard_D2DS_V4", min_nodes=0, max_nodes=max_nodes)
    cpu_cluster = ComputeTarget.create(ws, cluster_name, compute_config)

# Ensure the cluster is ready to go
cpu_cluster.wait_for_completion(show_output=True)

Using existing compute: Low-End-Compute-Cluster
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


In [20]:
# Create the configuration for the experiment
from azureml.train.automl import AutoMLConfig

# See https://docs.microsoft.com/en-us/python/api/azureml-train-automl-client/azureml.train.automl.automlconfig.automlconfig?view=azure-ml-py for details
automl_config = AutoMLConfig(
        task='regression',                                  # regression, classification, or forecasting
        training_data=ds,                                   # The data to use to train the model
        label_column_name='penaltyMinutes',                 # The column we're trying to predict
        n_cross_validations=3,                              # How many cross-validation sets to use
        primary_metric='normalized_root_mean_squared_error',# The metric we use to compare model performance
        compute_target=cpu_cluster,                         # Where the experiment should be run
        max_concurrent_iterations=max_nodes,                # How many models can be trained simultaneously
        iterations=8,                                      # The total number of models to train
        iteration_timeout_minutes=5                         # The amount of time before giving up on a single model training run
    )            

In [21]:
# List all available primary regression metrics, for reference
from azureml.train.automl.utilities import get_primary_metrics

get_primary_metrics('regression')

['normalized_mean_absolute_error',
 'spearman_correlation',
 'normalized_root_mean_squared_error',
 'r2_score']

In [22]:
from azureml.core.experiment import Experiment

# Find or Create a Machine Learning Experiment in Azure Machine Learning Studio
experiment_name = 'NHL-Penalty-Minute-Prediction'
experiment=Experiment(ws, experiment_name)

In [23]:
from azureml.widgets import RunDetails

# Start running the experiment
run = experiment.submit(automl_config)

# Wait for the experiment to complete (displays active details about the run)
RunDetails(run).show()
run.wait_for_completion(show_output=False)

Submitting remote run.


Experiment,Id,Type,Status,Details Page,Docs Page
NHL-Penalty-Minute-Prediction,AutoML_614033ad-c065-41ca-92ec-88caa0d21456,automl,NotStarted,Link to Azure Machine Learning studio,Link to Documentation


_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…

{'runId': 'AutoML_614033ad-c065-41ca-92ec-88caa0d21456',
 'target': 'Low-End-Compute-Cluster',
 'status': 'Completed',
 'startTimeUtc': '2022-06-18T04:38:00.884406Z',
 'endTimeUtc': '2022-06-18T04:48:46.289632Z',
 'services': {},
 'properties': {'num_iterations': '8',
  'training_type': 'TrainFull',
  'acquisition_function': 'EI',
  'primary_metric': 'normalized_root_mean_squared_error',
  'train_split': '0',
  'acquisition_parameter': '0',
  'num_cross_validation': '3',
  'target': 'Low-End-Compute-Cluster',
  'DataPrepJsonString': '{\\"training_data\\": {\\"datasetId\\": \\"69058fd8-167e-4e66-9aa8-733590aa2e4c\\"}, \\"datasets\\": 0}',
  'EnableSubsampling': 'False',
  'runTemplate': 'AutoML',
  'azureml.runsource': 'automl',
  'display_task_type': 'regression',
  'dependencies_versions': '{"azureml-widgets": "1.40.0", "azureml-training-tabular": "1.40.0", "azureml-train": "1.40.0", "azureml-train-restclients-hyperdrive": "1.40.0", "azureml-train-core": "1.40.0", "azureml-train-autom

In [28]:
# Grab the resulting model and best run
best_auto_run, automl_model = run.get_output()

# Display details about the best run
print('Best Run: ' + str(best_auto_run.id))
RunDetails(best_auto_run).show()

Package:azureml-automl-runtime, training version:1.42.0.post1, current version:1.40.0
Package:azureml-core, training version:1.42.0, current version:1.40.0
Package:azureml-dataprep, training version:4.0.3, current version:3.0.2
Package:azureml-dataprep-rslex, training version:2.6.3, current version:2.4.2
Package:azureml-dataset-runtime, training version:1.42.0, current version:1.40.0
Package:azureml-defaults, training version:1.42.0, current version:1.40.0
Package:azureml-interpret, training version:1.42.0, current version:1.40.0
Package:azureml-mlflow, training version:1.42.0, current version:1.40.0
Package:azureml-pipeline-core, training version:1.42.0, current version:1.40.0
Package:azureml-telemetry, training version:1.42.0, current version:1.40.0
Package:azureml-train-automl-client, training version:1.42.0.post1, current version:1.40.0
Package:azureml-train-automl-runtime, training version:1.42.0.post1, current version:1.40.0.post1
Package:azureml-train-core, training version:1.42

Best Run: AutoML_614033ad-c065-41ca-92ec-88caa0d21456_6


_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', '…

In [29]:
# Register the model in Azure
best_auto_run.register_model(
    model_name='NHL-Penalties-AutoML', 
    model_path='outputs/model.pkl', 
    description='NHL Game Penalty Prediction Best AutoML Run')

INFO:interpret_community.common.explanation_utils:Using default datastore for uploads


Model(workspace=Workspace.create(name='DataScience', subscription_id='efba8785-116c-4443-9a05-764c75c7bb0d', resource_group='datascience'), name=NHL-Penalties-AutoML, id=NHL-Penalties-AutoML:5, version=5, tags={}, properties={})

INFO:interpret_community.common.explanation_utils:Using default datastore for uploads
INFO:interpret_community.common.explanation_utils:Using default datastore for uploads
INFO:interpret_community.common.explanation_utils:Using default datastore for uploads


In [35]:
best_auto_run.download_files(output_directory='automl-output')

INFO:interpret_community.common.explanation_utils:Using default datastore for uploads
INFO:interpret_community.common.explanation_utils:Using default datastore for uploads
