## 

In [3]:
# Load the workspace information from config.json using the Azure ML SDK
from azureml.core import Workspace

ws = Workspace.from_config()
ws.name

'DataScience'

In [4]:
# Next, grab our dataset from Azure. 
from azureml.core import Dataset

ds = Dataset.get_by_name(workspace=ws, name='NHL-Penalties-2020') # This is the version we registered in the last notebook
print(ds.name + ' v' + str(ds.version) + ': ' + ds.description)

# Display the data structure here for verification
ds.to_pandas_dataframe().head()

NHL-Penalties-2020 v6: A breakdown of penalty minutes per game matchup


Unnamed: 0,penaltyMinutes,type,homeTeam,awayTeam
0,12.0,R,Maple Leafs,Canadiens
1,12.0,R,Maple Leafs,Canadiens
2,76.0,R,Maple Leafs,Canadiens
3,12.0,R,Maple Leafs,Canadiens
4,10.0,R,Maple Leafs,Canadiens


In [5]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

# Now let's make sure we have a compute resource created
cluster_name = "Low-End-Compute-Cluster"
max_nodes = 4

# Fetch or create the compute resource
try:
    cpu_cluster = ComputeTarget(workspace=ws, name=cluster_name) # This will throw a ComputeTargetException if this doesn't exist
    print('Using existing compute: ' + cluster_name)
except ComputeTargetException:
    # Create the cluster
    print('Provisioning cluster...')
    compute_config = AmlCompute.provisioning_configuration(vm_size="Standard_D2DS_V4", min_nodes=0, max_nodes=max_nodes)
    cpu_cluster = ComputeTarget.create(ws, cluster_name, compute_config)

# Ensure the cluster is ready to go
cpu_cluster.wait_for_completion(show_output=True)

Using existing compute: Low-End-Compute-Cluster
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


In [7]:
# Create the configuration for the experiment
from azureml.train.automl import AutoMLConfig

automl_config = AutoMLConfig(
        task='regression',                                  # regression, classification, or forecasting
        training_data=ds,                                   # The data to use to train the model
        label_column_name='penaltyMinutes',                 # The column we're trying to predict
        n_cross_validations=3,                              # How many cross-validation sets to use
        primary_metric='normalized_mean_absolute_error',    # The metric we use to compare model performance
        compute_target=cpu_cluster,                         # Where the experiment should be run
        max_concurrent_iterations=max_nodes,                # How many models can be trained simultaneously
        iterations=5,                                      # The total number of models to train
        iteration_timeout_minutes=5                         # The amount of time before giving up on a single model training run
    )            

In [9]:
from azureml.core.experiment import Experiment
from azureml.widgets import RunDetails

# Create a Machine Learning Experiment in Azure Machine Learning Studio
experiment_name = 'NHL-Penalty-Minute-Prediction'
experiment=Experiment(ws, experiment_name)

# Start running the experiment
run = experiment.submit(automl_config)

# Wait for the experiment to complete (displays active details about the run)
RunDetails(run).show()
run.wait_for_completion(show_output=False)

Submitting remote run.


Experiment,Id,Type,Status,Details Page,Docs Page
NHL-Penalty-Minute-Prediction,AutoML_4fdc25b3-3114-42ab-af1f-b3bff868bf4e,automl,NotStarted,Link to Azure Machine Learning studio,Link to Documentation


_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…

In [None]:
# Grab the resulting model and best run
best_auto_run, automl_model = run.get_output()

# Display details about the best run
print('Best Run: ' + str(best_auto_run.id))
RunDetails(best_auto_run).show()

In [None]:
# Get all child runs of the experiment
for model in run.get_children():

    # Get all metrics for this model
    model_metrics = model.get_metrics()

    # Grab the metric we care about for evaluation
    metric = 'r2_score'
    if metric in model_metrics:
        r2 = model_metrics[metric]

        # Display the model ID and its metric score
        print(str(model.id) + ' had a ' + metric + ' of ' + str(r2))