<a href="https://colab.research.google.com/github/FranciscoOcampoPredictiva/azureml_course/blob/main/Lecture_1_Automated_ML_in_Azure_Machine_Learning_SDK.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Step 1 - Installation and Setup

In [1]:
# Install azureml SDK package
! pip install -q azureml-sdk

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m35.3/35.3 MB[0m [31m14.5 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
# Importing the class
from azureml.core import Workspace



In [3]:
# Accessing the workspace and creating a workspace object
ws = Workspace.from_config(path='/content/config.json')

# Step 2 - Accessing the input data

In [4]:
input_ds = ws.datasets.get('Churn-Modelling-Data-SDK')

# Step 3 - Creating the compute cluster

In [5]:
cluster_name = 'azureml-sdk-cluster'

# Import the class AmlCompute
from azureml.core.compute import AmlCompute

# Provision the configuration using AmlCompute
if cluster_name not in ws.compute_targets:
  compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D11_V2',
                                                         max_nodes=2)
  cluster = AmlCompute.create(workspace=ws, name=cluster_name, provisioning_configuration=compute_config)
  cluster.wait_for_completion()
else:
  cluster = ws.compute_targets[cluster_name]

# Step 4 - Configuring the AutoML run

In [6]:
# Import the class AutoMLConfig
from azureml.train.automl import AutoMLConfig

# Create an object of the class AutoMLConfig
automl_config = AutoMLConfig(task='classification',
                             compute_target=cluster,
                             training_data=input_ds,
                             validation_size=0.3,
                             label_column_name='Exited',
                             primary_metric='accuracy',
                             iterations=10,
                             max_concurrent_iterations=2,
                             experiment_timeout_hours=0.5,
                             featurization='auto')

# Step 5 - Creating and submitting the experiment run

In [7]:
!pip install --upgrade azureml-core
!pip install --upgrade packaging


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting packaging
  Using cached packaging-23.1-py3-none-any.whl (48 kB)
Installing collected packages: packaging
  Attempting uninstall: packaging
    Found existing installation: packaging 23.0
    Uninstalling packaging-23.0:
      Successfully uninstalled packaging-23.0
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
azureml-core 1.51.0.post1 requires packaging<=23.0,>=20.0, but you have packaging 23.1 which is incompatible.[0m[31m
[0mSuccessfully installed packaging-23.1


In [8]:
# Import the class Experiment
from azureml.core.experiment import Experiment

# Create the experiment
new_exp = Experiment(workspace=ws, name='automl-experiment-sdk')

# Submit the experiment run
new_run = new_exp.submit(automl_config)
new_run.wait_for_completion(show_output=True)

Submitting remote run.


Experiment,Id,Type,Status,Details Page,Docs Page
automl-experiment-sdk,AutoML_dae9b76f-0e0d-40bd-813c-74a06b68c8a8,automl,NotStarted,Link to Azure Machine Learning studio,Link to Documentation


Experiment,Id,Type,Status,Details Page,Docs Page
automl-experiment-sdk,AutoML_dae9b76f-0e0d-40bd-813c-74a06b68c8a8,automl,NotStarted,Link to Azure Machine Learning studio,Link to Documentation



Current status: FeaturesGeneration. Generating features for the dataset.
Current status: DatasetFeaturization. Beginning to fit featurizers and featurize the dataset.
Current status: DatasetFeaturizationCompleted. Completed fit featurizers and featurizing the dataset.
Current status: ModelSelection. Beginning model selection.

********************************************************************************************
DATA GUARDRAILS: 

TYPE:         Class balancing detection
STATUS:       PASSED
DESCRIPTION:  Your inputs were analyzed, and all classes are balanced in your training data.
              Learn more about imbalanced data: https://aka.ms/AutomatedMLImbalancedData

********************************************************************************************

TYPE:         Missing feature values imputation
STATUS:       PASSED
DESCRIPTION:  No feature missing values were detected in the training data.
              Learn more about missing value imputation: https://aka.ms/Aut

{'runId': 'AutoML_dae9b76f-0e0d-40bd-813c-74a06b68c8a8',
 'target': 'azureml-sdk-cluster',
 'status': 'Completed',
 'startTimeUtc': '2023-06-25T23:37:33.705717Z',
 'endTimeUtc': '2023-06-25T23:47:23.662486Z',
 'services': {},
 'properties': {'num_iterations': '10',
  'training_type': 'TrainFull',
  'acquisition_function': 'EI',
  'primary_metric': 'accuracy',
  'train_split': '0.3',
  'acquisition_parameter': '0',
  'num_cross_validation': None,
  'target': 'azureml-sdk-cluster',
  'DataPrepJsonString': '{\\"training_data\\": {\\"datasetId\\": \\"23f4c02f-90a5-4f21-9470-ed8583e97e60\\"}, \\"datasets\\": 0}',
  'EnableSubsampling': 'False',
  'runTemplate': 'AutoML',
  'azureml.runsource': 'automl',
  'display_task_type': 'classification',
  'dependencies_versions': '{"azureml-automl-core": "1.51.0.post1", "azureml-core": "1.51.0.post1", "azureml-dataprep": "4.10.8", "azureml-dataprep-native": "38.0.0", "azureml-dataprep-rslex": "2.17.12", "azureml-dataset-runtime": "1.51.0", "azureml-p

# Step 6 - Getting the best model

In [9]:
new_run.get_best_child(metric='accuracy')

Experiment,Id,Type,Status,Details Page,Docs Page
automl-experiment-sdk,AutoML_dae9b76f-0e0d-40bd-813c-74a06b68c8a8_8,azureml.scriptrun,Completed,Link to Azure Machine Learning studio,Link to Documentation


# Step 7 - Getting the metrics for all the runs

In [10]:
# Get the metrics for all the runs
for run in new_run.get_children():
    print("")
    print("Run ID : ", run.id)
    print(run.get_metrics('accuracy'))


Run ID :  AutoML_dae9b76f-0e0d-40bd-813c-74a06b68c8a8_9
{'accuracy': 0.8633333333333333}

Run ID :  AutoML_dae9b76f-0e0d-40bd-813c-74a06b68c8a8_8
{'accuracy': 0.8673333333333333}

Run ID :  AutoML_dae9b76f-0e0d-40bd-813c-74a06b68c8a8_7
{'accuracy': 0.832}

Run ID :  AutoML_dae9b76f-0e0d-40bd-813c-74a06b68c8a8_6
{'accuracy': 0.8466666666666667}

Run ID :  AutoML_dae9b76f-0e0d-40bd-813c-74a06b68c8a8_5
{'accuracy': 0.7986666666666666}

Run ID :  AutoML_dae9b76f-0e0d-40bd-813c-74a06b68c8a8_4
{'accuracy': 0.865}

Run ID :  AutoML_dae9b76f-0e0d-40bd-813c-74a06b68c8a8_worker_1
{}

Run ID :  AutoML_dae9b76f-0e0d-40bd-813c-74a06b68c8a8_3
{'accuracy': 0.8633333333333333}

Run ID :  AutoML_dae9b76f-0e0d-40bd-813c-74a06b68c8a8_2
{'accuracy': 0.703}

Run ID :  AutoML_dae9b76f-0e0d-40bd-813c-74a06b68c8a8_1
{'accuracy': 0.8573333333333333}

Run ID :  AutoML_dae9b76f-0e0d-40bd-813c-74a06b68c8a8_0
{'accuracy': 0.8626666666666667}

Run ID :  AutoML_dae9b76f-0e0d-40bd-813c-74a06b68c8a8_worker_0
{}

Run 