In [7]:
# Place to install packages
%pip install azureml-sdk[notebooks]
%pip install joblib
%pip install azureml.train.automl.runtime


Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Collecting azureml.train.automl.runtime
  Downloading azureml_train_automl_runtime-1.60.0-py3-none-any.whl.metadata (3.4 kB)
Collecting azure-storage-queue~=12.1 (from azureml.train.automl.runtime)
  Downloading azure_storage_queue-12.12.0-py3-none-any.whl.metadata (35 kB)
Collecting gensim==4.3.2 (from azureml.train.automl.runtime)
  Downloading gensim-4.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (8.4 kB)
Collecting smart-open==6.4.0 (from azureml.train.automl.runtime)
  Downloading smart_open-6.4.0-py3-none-any.whl.metadata (21 kB)
Collecting pandas==1.5.3 (from azureml.train.automl.runtime)
  Downloading pandas-1.5.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)
Collecting scikit-learn~=1.5.1 (from azureml.train.automl.runtime)
  Downloading scikit_learn-1.5.2-cp310-cp310-manylinux_2_17_x86_64.m

In [1]:
from azureml.core import Workspace, Experiment

ws = Workspace.from_config()
exp = Experiment(workspace=ws, name="udacity-project")

print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

run = exp.start_logging()

Workspace name: quick-starts-ws-284643
Azure region: eastus2
Subscription id: cdbe0b43-92a0-4715-838a-f2648cc7ad21
Resource group: aml-quickstarts-284643


In [11]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

cluster_name = "comp-cluster-D2-V2"

# TODO: Create compute cluster
# Use vm_size = "Standard_D2_V2" in your provisioning configuration.
# max_nodes should be no greater than 4.

# Verify that cluster does not exist already
try:
    cpu_cluster = ComputeTarget(workspace=ws, name=cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',
                                                            max_nodes=4)
    cpu_cluster: ComputeTarget = ComputeTarget.create(ws, cluster_name, compute_config)

cpu_cluster.wait_for_completion(show_output=True)


InProgress..
SucceededProvisioning operation finished, operation "Succeeded"
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


In [4]:
# Note: Commented out this import as the environment seems to be incompatable with this import
from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import choice, uniform
from azureml.core import Environment, ScriptRunConfig
import os

# Specify parameter sampler
ps = RandomParameterSampling({
    "C": uniform(0.01, 1.0),
    "max_iter": choice(50, 100, 150, 200)
})

# Specify a Policy
policy = BanditPolicy(evaluation_interval=2, slack_factor=0.1, delay_evaluation=5)

if "training" not in os.listdir():
    os.mkdir("./training")

# Setup environment for your training run
sklearn_env = Environment.from_conda_specification(name='sklearn-env', file_path='conda_dependencies.yml')

# Create a ScriptRunConfig Object to specify the configuration details of your training job
src = ScriptRunConfig(source_directory='./training',
                                    script='train.py',
                                    compute_target=cpu_cluster,
                                    environment=sklearn_env)

# Create a HyperDriveConfig using the src object, hyperparameter sampler, and policy.
hyperdrive_config = HyperDriveConfig(
    run_config=src,
    hyperparameter_sampling=ps,
    policy=policy,
    primary_metric_name='Accuracy',
    primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
    max_total_runs=4,
    max_concurrent_runs=4
)

In [4]:
# Submit your hyperdrive run to the experiment and show run details with the widget.
hyperdrive_run = exp.submit(config=hyperdrive_config)
hyperdrive_run.wait_for_completion(show_output=True)

RunId: HD_b078c6f8-a83d-4e7f-a2fe-e8af1e6d1bdc
Web View: https://ml.azure.com/runs/HD_b078c6f8-a83d-4e7f-a2fe-e8af1e6d1bdc?wsid=/subscriptions/cdbe0b43-92a0-4715-838a-f2648cc7ad21/resourcegroups/aml-quickstarts-284643/workspaces/quick-starts-ws-284643&tid=660b3398-b80e-49d2-bc5b-ac1dc93b5254

Streaming azureml-logs/hyperdrive.txt

[2025-07-13T22:46:13.9213255Z][GENERATOR][DEBUG]Sampled 4 jobs from search space 
[2025-07-13T22:46:14.2442311Z][SCHEDULER][INFO]Scheduling job, id='HD_b078c6f8-a83d-4e7f-a2fe-e8af1e6d1bdc_0' 
[2025-07-13T22:46:14.3215727Z][SCHEDULER][INFO]Scheduling job, id='HD_b078c6f8-a83d-4e7f-a2fe-e8af1e6d1bdc_1' 
[2025-07-13T22:46:14.3225475Z][SCHEDULER][INFO]Scheduling job, id='HD_b078c6f8-a83d-4e7f-a2fe-e8af1e6d1bdc_3' 
[2025-07-13T22:46:14.3233633Z][SCHEDULER][INFO]Scheduling job, id='HD_b078c6f8-a83d-4e7f-a2fe-e8af1e6d1bdc_2' 
[2025-07-13T22:46:14.6962826Z][SCHEDULER][INFO]Successfully scheduled a job. Id='HD_b078c6f8-a83d-4e7f-a2fe-e8af1e6d1bdc_0' 
[2025-07-13T22:4

{'runId': 'HD_b078c6f8-a83d-4e7f-a2fe-e8af1e6d1bdc',
 'target': 'comp-cluster-D2-V2',
 'status': 'Completed',
 'startTimeUtc': '2025-07-13T22:46:12.387085Z',
 'endTimeUtc': '2025-07-13T22:48:14.035222Z',
 'services': {},
 'properties': {'primary_metric_config': '{"name":"Accuracy","goal":"maximize"}',
  'resume_from': 'null',
  'runTemplate': 'HyperDrive',
  'azureml.runsource': 'hyperdrive',
  'platform': 'AML',
  'ContentSnapshotId': 'a368f1c6-a47c-4d59-a4f7-ff2ed0648eaf',
  'user_agent': 'python/3.10.16 (Linux-6.8.0-1029-azure-x86_64-with-glibc2.35) msrest/0.7.1 Hyperdrive.Service/1.0.0 Hyperdrive.SDK/core.1.60.0',
  'best_child_run_id': 'HD_b078c6f8-a83d-4e7f-a2fe-e8af1e6d1bdc_3',
  'score': '0.9078907435508344',
  'best_metric_status': 'Succeeded',
  'best_data_container_id': 'dcid.HD_b078c6f8-a83d-4e7f-a2fe-e8af1e6d1bdc_3'},
 'inputDatasets': [],
 'outputDatasets': [],
 'runDefinition': {'configuration': None,
  'attribution': None,
  'telemetryValues': {'amlClientType': 'azureml

# Efforts to fix issue with RunDetails not working.
I have been able to get the RunDetails import to work. but there is an issue with displaying the details. The browser console suggests there is an issue with the library in displaying the html components. I attempted to uninstall and reinstall the extension to no avail.

In [None]:
!jupyter nbextension list

In [None]:
!jupyter nbextension uninstall --py --user azureml.widgets
!jupyter nbextension uninstall --py --user azureml.train.widgets

In [None]:
!jupyter nbextension install --py --user azureml.widgets
!jupyter nbextension install --py --user azureml.train.widgets

In [5]:
RunDetails(hyperdrive_run).show()

_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

In [8]:
# Get your best run and save the model from that run.
best_run = hyperdrive_run.get_best_run_by_primary_metric()

# Register the model in AzureML
best_run.register_model(model_name='best_hyperdrive_model', model_path='outputs/model.joblib')

Model(workspace=Workspace.create(name='quick-starts-ws-284643', subscription_id='cdbe0b43-92a0-4715-838a-f2648cc7ad21', resource_group='aml-quickstarts-284643'), name=best_hyperdrive_model, id=best_hyperdrive_model:1, version=1, tags={}, properties={})

In [3]:
from azureml.data.dataset_factory import TabularDatasetFactory

# Create TabularDataset using TabularDatasetFactory
# Data is available at: 
# "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"

ds = TabularDatasetFactory.from_delimited_files(
    path="https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv",
    separator=",",
    header=True
)

In [7]:
# Needed this version of scipy as the newest version is not compatable while running the notebook:
%pip install scikit-learn

Collecting scikit-learn
  Using cached scikit_learn-1.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (17 kB)
Collecting threadpoolctl>=3.1.0 (from scikit-learn)
  Using cached threadpoolctl-3.6.0-py3-none-any.whl.metadata (13 kB)
Using cached scikit_learn-1.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.9 MB)
Using cached threadpoolctl-3.6.0-py3-none-any.whl (18 kB)
Installing collected packages: threadpoolctl, scikit-learn
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2/2[0m [scikit-learn][0m [scikit-learn]
[1A[2KSuccessfully installed scikit-learn-1.7.0 threadpoolctl-3.6.0
Note: you may need to restart the kernel to use updated packages.


In [4]:
from training.train import clean_data

# Use the clean_data function to clean your data.
x, y = clean_data(ds)

{'infer_column_types': 'False', 'activity': 'to_pandas_dataframe'}
{'infer_column_types': 'False', 'activity': 'to_pandas_dataframe', 'activityApp': 'TabularDataset'}


In [5]:
from azureml.train.automl import AutoMLConfig

# Set parameters for AutoMLConfig
# NOTE: DO NOT CHANGE THE experiment_timeout_minutes PARAMETER OR YOUR INSTANCE WILL TIME OUT.
# If you wish to run the experiment longer, you will need to run this notebook in your own
# Azure tenant, which will incur personal costs.
automl_config = AutoMLConfig(
    experiment_timeout_minutes=30,
    task='classification',
    primary_metric='accuracy',
    training_data=ds,
    label_column_name='loan',
    n_cross_validations=5)

In [6]:
# Submit your automl run
automl_run = exp.submit(config=automl_config)
automl_run.wait_for_completion(show_output=True)

Extension horovod.torch has not been built: /anaconda/envs/azureml_py38_PT_TF/lib/python3.10/site-packages/horovod/torch/mpi_lib_v2.cpython-310-x86_64-linux-gnu.so not found
If this is not expected, reinstall Horovod with HOROVOD_WITH_PYTORCH=1 to debug the build error.


2025-07-13 23:01:32.961327: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1752447693.925675   26650 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1752447694.208690   26650 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1752447696.740323   26650 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1752447696.740364   26650 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1752447696.740367   26650 computation_placer.cc:177] computation placer alr

Experiment,Id,Type,Status,Details Page,Docs Page
udacity-project,AutoML_8eebacc2-2edc-4d1a-943f-4100b60c7b3c,automl,Preparing,Link to Azure Machine Learning studio,Link to Documentation


 - xgboost (current: uninstalled, required: xgboost<=1.5.2)
 - prophet (current: uninstalled, required: prophet==1.1.4)
 - pytorch-transformers (current: uninstalled, required: pytorch-transformers==1.0.0)
 - spacy (current: uninstalled, required: spacy==3.7.4)
To fix the mismatches, call `mlflow.pyfunc.get_model_dependencies(model_uri)` to fetch the model's environment and install dependencies using the resulting environment file.
2025-07-13:23:35:37,354 INFO     [explanation_client.py:334] Using default datastore for uploads


Experiment,Id,Type,Status,Details Page,Docs Page
udacity-project,AutoML_8eebacc2-2edc-4d1a-943f-4100b60c7b3c,automl,Completed,Link to Azure Machine Learning studio,Link to Documentation




********************************************************************************************
DATA GUARDRAILS: 

TYPE:         Class balancing detection
STATUS:       ALERTED
DESCRIPTION:  To decrease model bias, please cancel the current run and fix balancing problem.
              Learn more about imbalanced data: https://aka.ms/AutomatedMLImbalancedData
DETAILS:      Imbalanced data can lead to a falsely perceived positive effect of a model's accuracy because the input data has bias towards one class.
+------------------------------+--------------------------------+--------------------------------------+
|Size of the smallest class    |Name/Label of the smallest class|Number of samples in the training data|
|782                           |unknown                         |32950                                 |
+------------------------------+--------------------------------+--------------------------------------+

********************************************************************

{'runId': 'AutoML_8eebacc2-2edc-4d1a-943f-4100b60c7b3c',
 'target': 'local',
 'status': 'Completed',
 'startTimeUtc': '2025-07-13T23:02:00.085423Z',
 'endTimeUtc': '2025-07-13T23:34:07.682505Z',
 'services': {},
   'message': 'Experiment timeout reached, hence experiment stopped. Current experiment timeout: 0 hour(s) 30 minute(s)'}],
 'properties': {'num_iterations': '1000',
  'training_type': 'TrainFull',
  'acquisition_function': 'EI',
  'primary_metric': 'accuracy',
  'train_split': '0',
  'acquisition_parameter': '0',
  'num_cross_validation': '5',
  'target': 'local',
  'DataPrepJsonString': '{\\"training_data\\": {\\"datasetId\\": \\"bdebf7e1-c7d4-4278-ab68-418856b7373c\\"}, \\"datasets\\": 0}',
  'EnableSubsampling': None,
  'runTemplate': 'AutoML',
  'azureml.runsource': 'automl',
  'display_task_type': 'classification',
  'dependencies_versions': '{"azureml-pipeline-steps": "1.60.0", "azureml-pipeline-core": "1.60.0", "azureml-automl-core": "1.60.0", "azureml-train-restclients

In [8]:
# Retrieve and save your best automl model.
best_run, fitted_model = automl_run.get_output()
best_run.register_model(description='Best AutoML Model', model_name='best_automl_model', model_path='outputs/model.pkl')

Model(workspace=Workspace.create(name='quick-starts-ws-284643', subscription_id='cdbe0b43-92a0-4715-838a-f2648cc7ad21', resource_group='aml-quickstarts-284643'), name=best_automl_model, id=best_automl_model:1, version=1, tags={}, properties={})

In [12]:
# Decommission (delete) the compute cluster created earlier
cpu_cluster.delete()