In [15]:
from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential


from azure.ai.ml.constants import AssetTypes
from azure.ai.ml.entities import Data

from azure.ai.ml import Input


from dotenv import load_dotenv
import os
import shutil

from azure.ai.ml import automl
from azure.ai.ml.automl import ClassificationPrimaryMetrics




## Connect to the workspace
After the Python SDK is installed, you'll need to connect to the workspace. By connecting, you're authenticating your environment to interact with the workspace to create and manage assets and resources.

To authenticate, you need the values to three necessary parameters:

#### subscription_id: Your subscription ID.
#### resource_group: The name of your resource group.
#### workspace_name: The name of your workspace.

In [3]:
load_dotenv()

subscription_id = os.getenv("AZURE_SUBSCRIPTION_ID")
resource_group = os.getenv("AZURE_RESOURCE_GROUP")
workspace_name = os.getenv("AZURE_WORKSPACE_NAME")



try:
    credential = DefaultAzureCredential()
    # Check if given credential can get token successfully.
    credential.get_token("https://management.azure.com/.default")
except Exception as ex:
    # Fall back to InteractiveBrowserCredential in case DefaultAzureCredential not work
    credential = InteractiveBrowserCredential()


ml_client = MLClient(
    DefaultAzureCredential(), subscription_id, resource_group, workspace_name
)

# List available compute resources
compute_list = ml_client.compute.list()
for compute in compute_list:
    print(compute.name, ":", compute.type)


# List all datastores in your workspace
datastores = ml_client.datastores.list()
for datastore in datastores:
    print(f"Datastore: {datastore.name} (Type: {datastore.type})")

Overriding of current TracerProvider is not allowed
Overriding of current LoggerProvider is not allowed
Overriding of current MeterProvider is not allowed
Attempting to instrument while already instrumented
Attempting to instrument while already instrumented
Attempting to instrument while already instrumented


dvo-CPU-INS-DS11 : computeinstance
basic-ci-danja : computeinstance
Datastore: azureml_globaldatasets (Type: DatastoreType.AZURE_BLOB)
Datastore: workspaceblobstore (Type: DatastoreType.AZURE_BLOB)
Datastore: workspaceworkingdirectory (Type: DatastoreType.AZURE_FILE)
Datastore: workspaceartifactstore (Type: DatastoreType.AZURE_BLOB)
Datastore: workspacefilestore (Type: DatastoreType.AZURE_FILE)


In [5]:
my_training_data_input = Input(type=AssetTypes.MLTABLE, path="azureml:input-data-automl:1")


In [11]:


# Base directory where your CSV file is located
base_dir = r"C:\Users\dvolf\python_projects\azure_dp_100_2025\dp100_ds_associate_azure\3_module_2_experiment_ml_flow"
csv_path = os.path.join(base_dir, "diabetes.csv")

# Create a directory for the MLTable definition
mltable_dir = os.path.join(base_dir, "diabetes_mltable")
os.makedirs(mltable_dir, exist_ok=True)

# Create a data subdirectory
data_dir = os.path.join(mltable_dir, "data")
os.makedirs(data_dir, exist_ok=True)

# Copy the CSV file to the data directory
shutil.copy2(csv_path, os.path.join(data_dir, "diabetes.csv"))

# Create the MLTable definition file
mltable_content = """paths:
  - file: ./data/diabetes.csv
transformations:
  - read_delimited:
      delimiter: ','
      encoding: 'utf8'
      header: all_files_same_headers
"""

with open(os.path.join(mltable_dir, "MLTable"), "w") as f:
    f.write(mltable_content)

# Register the MLTable asset
diabetes_mltable = Data(
    path=mltable_dir,
    type=AssetTypes.MLTABLE,
    description="Diabetes dataset as MLTable format",
    name="diabetes_mltable",
    version="1"
)

registered_data = ml_client.data.create_or_update(diabetes_mltable)
print(f"Successfully registered MLTable asset: {registered_data.name}, version: {registered_data.version}")

Uploading diabetes_mltable (0.53 MBs): 100%|##########| 527915/527915 [00:00<00:00, 818611.78it/s]




Successfully registered MLTable asset: diabetes_mltable, version: 1


In [19]:
# Create an input reference to your MLTable data asset
diabetes_data_input = Input(
    type=AssetTypes.MLTABLE,
    path="azureml:diabetes_mltable:1"
)

In [20]:
# configure the classification job
classification_job = automl.classification(
    compute="dvo-CPU-INS-DS11",
    experiment_name="auto-ml-class-dev-danja",
    training_data=diabetes_data_input,
    target_column_name="Diabetic",
    primary_metric="accuracy",
    n_cross_validations=5,
    enable_model_explainability=True
)

classification_job.set_limits(
    timeout_minutes=60, 
    trial_timeout_minutes=20, 
    max_trials=10,
    enable_early_termination=True,
)

In [23]:
# submit the AutoML job
returned_job = ml_client.jobs.create_or_update(classification_job) 

In [24]:
aml_url = returned_job.studio_url
print("Monitor your job at", aml_url)

Monitor your job at https://ml.azure.com/runs/boring_watch_dqrqlhzm25?wsid=/subscriptions/6f83b388-7253-46ac-a2f7-300b7e3f313e/resourcegroups/pbi-dvo-rg/workspaces/dp100dvoML&tid=08b71322-e8b4-4d15-9892-339a33556b10


In [26]:
# Get the job details
job_detail = ml_client.jobs.get(returned_job.name)

# For completed jobs, you can access the best model
if job_detail.status == "Completed":
    # Get the best model
    # best_model = job_detail.outputs.best_model
    
    # Get the list of all child jobs (each represents a model trial)
    all_child_jobs = ml_client.jobs.list(parent_job_name=returned_job.name)
    
    print("Models tried by AutoML:")
    for job in all_child_jobs:
        # Extract model name/algorithm from the job
        if hasattr(job, 'display_name') and job.display_name:
            print(f"- {job.display_name}")

Models tried by AutoML:
- bold_pen_209zqns4
- khaki_clock_rghb07kf
- blue_box_rn5z4fwk
- sleepy_parsnip_b89z2v6d
- amusing_hat_f3dsf7t7
- coral_pumpkin_5zbcjqcd
- quiet_boniato_pgr5jzz5
- placid_bucket_wzkqyv12
- kind_plastic_cz9yctw7
- happy_nail_mvyq9v5q
- nice_quince_4fzzyk9n
- cyan_fennel_yq3g487n
- careful_watch_561g19wx


In [9]:
diabetes_path = r"C:\Users\dvolf\python_projects\azure_dp_100_2025\dp100_ds_associate_azure\3_module_2_experiment_ml_flow\diabetes.csv"

diabetes_mltable = Data(
    path=diabetes_path,
    type=AssetTypes.MLTABLE,  # Note the change from URI_FILE to MLTABLE
    description="Diabetes dataset as MLTable format",
    name="diabetes_mltable",
    version="1"
)

# Register the MLTable asset
registered_data = ml_client.data.create_or_update(diabetes_mltable)

MlException: 
[37m
[30m
1) One or more files or folders do not exist.
[39m[39m

Details: 

[31m(x) No such file or directory: C:\Users\dvolf\python_projects\azure_dp_100_2025\dp100_ds_associate_azure\3_module_2_experiment_ml_flow\diabetes.csv\MLTable[39m

Resolutions: 
1) Double-check the directory path you provided and enter the correct path.
If using the CLI, you can also check the full log in debug mode for more details by adding --debug to the end of your command

Additional Resources: The easiest way to author a yaml specification file is using IntelliSense and auto-completion Azure ML VS code extension provides: [36mhttps://code.visualstudio.com/docs/datascience/azure-machine-learning.[39m To set up VS Code, visit [36mhttps://learn.microsoft.com/azure/machine-learning/how-to-setup-vs-code[39m


## To create a URI file data asset, you can use the following code:



In [9]:
from azure.ai.ml.entities import Data
from azure.ai.ml.constants import AssetTypes

# my_path = r'module_2_explore_azure_ml\module_2_3\diabetes.csv'
my_path = r"C:\Users\dvolf\python_projects\azure_dp_100_2025\dp100_ds_associate_azure\module_2_explore_azure_ml\module_2_3\diabetes.csv"

my_data = Data(
    path=my_path,
    type=AssetTypes.URI_FILE,
    description="its my test csv data",
    name="diabetes_csv",
    version="01"
)

ml_client.data.create_or_update(my_data)

Uploading diabetes.csv (< 1 MB): 0.00B [00:00, ?B/s] (< 1 MB): 100%|##########| 528k/528k [00:00<00:00, 1.07MB/s] (< 1 MB): 100%|##########| 528k/528k [00:00<00:00, 1.07MB/s]




Data({'path': 'azureml://subscriptions/6f83b388-7253-46ac-a2f7-300b7e3f313e/resourcegroups/pbi-dvo-rg/workspaces/dp100dvoML/datastores/workspaceblobstore/paths/LocalUpload/a01a5b9f954664cdfd935246b25e7f69/diabetes.csv', 'skip_validation': False, 'mltable_schema_url': None, 'referenced_uris': None, 'type': 'uri_file', 'is_anonymous': False, 'auto_increment_version': False, 'auto_delete_setting': None, 'name': 'diabetes_csv', 'description': 'its my test csv data', 'tags': {}, 'properties': {}, 'print_as_yaml': False, 'id': '/subscriptions/6f83b388-7253-46ac-a2f7-300b7e3f313e/resourceGroups/pbi-dvo-rg/providers/Microsoft.MachineLearningServices/workspaces/dp100dvoML/data/diabetes_csv/versions/01', 'Resource__source_path': '', 'base_path': 'c:\\Users\\dvolf\\python_projects\\azure_dp_100_2025\\dp100_ds_associate_azure\\module_2_explore_azure_ml\\module_2_3', 'creation_context': <azure.ai.ml.entities._system_data.SystemData object at 0x00000270417FF7D0>, 'serialize': <msrest.serialization.S

In [11]:
from azure.ai.ml.entities import Data
from azure.ai.ml.constants import AssetTypes

my_path_folder = r'C:\Users\dvolf\python_projects\azure_dp_100_2025\dp100_ds_associate_azure\module_2_explore_azure_ml\module_2_3'

my_data_folder = Data(
    path=my_path_folder,
    type=AssetTypes.URI_FOLDER,
    description="my test folder with data",
    name="diabetes-folder",
    version='02'
)

ml_client.data.create_or_update(my_data_folder)

Uploading module_2_3 (0.53 MBs): 100%|##########| 534459/534459 [00:00<00:00, 812995.96it/s]




Data({'path': 'azureml://subscriptions/6f83b388-7253-46ac-a2f7-300b7e3f313e/resourcegroups/pbi-dvo-rg/workspaces/dp100dvoML/datastores/workspaceblobstore/paths/LocalUpload/e97a9cbc2d56aa5fed6747b53615fc7e/module_2_3/', 'skip_validation': False, 'mltable_schema_url': None, 'referenced_uris': None, 'type': 'uri_folder', 'is_anonymous': False, 'auto_increment_version': False, 'auto_delete_setting': None, 'name': 'diabetes-folder', 'description': 'my test folder with data', 'tags': {}, 'properties': {}, 'print_as_yaml': False, 'id': '/subscriptions/6f83b388-7253-46ac-a2f7-300b7e3f313e/resourceGroups/pbi-dvo-rg/providers/Microsoft.MachineLearningServices/workspaces/dp100dvoML/data/diabetes-folder/versions/02', 'Resource__source_path': '', 'base_path': 'c:\\Users\\dvolf\\python_projects\\azure_dp_100_2025\\dp100_ds_associate_azure\\module_2_explore_azure_ml\\module_2_3', 'creation_context': <azure.ai.ml.entities._system_data.SystemData object at 0x000002704223DEB0>, 'serialize': <msrest.seri