## Please note that our repo is cloned from this public repo https://github.com/Azure/azureml-examples/

## This notebook was test using Python 3.10 - SDK V2

In [None]:
!pip show azure-ai-ml
!pip install mltable

In [1]:
# import required libraries
from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential
from azure.ai.ml import command, Input
from azure.ai.ml.entities import (
    AzureBlobDatastore,
    AzureFileDatastore,
    AzureDataLakeGen1Datastore,
    AzureDataLakeGen2Datastore,
)
from azure.ai.ml.entities import Environment

In [2]:
# Enter details of your AML workspace
subscription_id = "xxxxxxxxx"
resource_group = "aml-v2-book"
workspace = "aml2-ws"

In [3]:
# get a handle to the workspace
ml_client = MLClient(
    DefaultAzureCredential(), subscription_id, resource_group, workspace
)

## Create Blob storage datastore

In [4]:
from azure.ai.ml.entities import AzureBlobDatastore
from azure.ai.ml.entities import AccountKeyConfiguration
from azure.ai.ml import MLClient

# Create a datastore with account key

blob_datastore1 = AzureBlobDatastore(
    name="blobe_storage3",
    description="AML Datastore pointing to a blob storgae.",
    account_name="amlv2sa",
    container_name="datacontainer",
    credentials=AccountKeyConfiguration(
        account_key="xxxxxxxxx"
    ),
    
)
ml_client.create_or_update(blob_datastore1)

AzureBlobDatastore({'type': <DatastoreType.AZURE_BLOB: 'AzureBlob'>, 'name': 'blobe_storage3', 'description': 'AML Datastore pointing to a blob storgae.', 'tags': {}, 'properties': {}, 'id': '/subscriptions/dcfc206a-203b-4c00-a236-bdf576a37896/resourceGroups/aml-v2-book/providers/Microsoft.MachineLearningServices/workspaces/aml2-ws/datastores/blobe_storage3', 'Resource__source_path': None, 'base_path': '/mnt/batch/tasks/shared/LS_root/mounts/clusters/sifakhra3/code/chapter 2', 'creation_context': None, 'serialize': <msrest.serialization.Serializer object at 0x7f089aaf8460>, 'credentials': {'type': 'account_key'}, 'container_name': 'datacontainer', 'account_name': 'amlv2sa', 'endpoint': 'core.windows.net', 'protocol': 'https'})

## Create a data asset

In [5]:
from azure.ai.ml.entities import Data
from azure.ai.ml.constants import AssetTypes

# my_path must point to folder containing MLTable artifact (MLTable file + data
# Supported paths include:
# local: './<path>'
# blob:  'https://<account_name>.blob.core.windows.net/<container_name>/<path>'
# ADLS gen2: 'abfss://<file_system>@<account_name>.dfs.core.windows.net/<path>/'
# Datastore: 'azureml://datastores/<data_store_name>/paths/<path>'

my_path = './my_data/'

my_data = Data(
    path=my_path,
    type=AssetTypes.MLTABLE,
    description="description",
    name="titanic-mltable-sdk",
    version='13'
)

ml_client.data.create_or_update(my_data)

Data({'skip_validation': False, 'mltable_schema_url': None, 'referenced_uris': ['./titanic.csv'], 'type': 'mltable', 'is_anonymous': False, 'auto_increment_version': False, 'name': 'titanic-mltable-sdk', 'description': 'description', 'tags': {}, 'properties': {}, 'id': '/subscriptions/dcfc206a-203b-4c00-a236-bdf576a37896/resourceGroups/aml-v2-book/providers/Microsoft.MachineLearningServices/workspaces/aml2-ws/data/titanic-mltable-sdk/versions/13', 'Resource__source_path': None, 'base_path': '/mnt/batch/tasks/shared/LS_root/mounts/clusters/sifakhra3/code/chapter 2', 'creation_context': <azure.ai.ml.entities._system_data.SystemData object at 0x7f0898997370>, 'serialize': <msrest.serialization.Serializer object at 0x7f0898997490>, 'version': '13', 'latest_version': None, 'path': 'azureml://subscriptions/dcfc206a-203b-4c00-a236-bdf576a37896/resourcegroups/aml-v2-book/workspaces/aml2-ws/datastores/workspaceblobstore/paths/LocalUpload/4adfa06038e0a6069f8a0b7b940cddb8/my_data/', 'datastore': 

## Materializing MLTable artifact into pandas dataframe 

In [None]:
import mltable
tbl = mltable.load(uri="./my_data")
df = tbl.to_pandas_dataframe()
df

## Docker environment created for consuming MLTable.

In [6]:
from azure.ai.ml.entities import Environment

env_docker_conda = Environment(
    image="mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04",
    conda_file="env-mltable.yml",
    name="mltable",
    description="Environment created for consuming MLTable.",
)

ml_client.environments.create_or_update(env_docker_conda)

Environment({'is_anonymous': False, 'auto_increment_version': False, 'name': 'mltable', 'description': 'Environment created for consuming MLTable.', 'tags': {}, 'properties': {}, 'id': '/subscriptions/dcfc206a-203b-4c00-a236-bdf576a37896/resourceGroups/aml-v2-book/providers/Microsoft.MachineLearningServices/workspaces/aml2-ws/environments/mltable/versions/11', 'Resource__source_path': None, 'base_path': '/mnt/batch/tasks/shared/LS_root/mounts/clusters/sifakhra3/code/chapter 2', 'creation_context': <azure.ai.ml.entities._system_data.SystemData object at 0x7f0898996bc0>, 'serialize': <msrest.serialization.Serializer object at 0x7f0898997bb0>, 'version': '11', 'latest_version': None, 'conda_file': {'channels': ['conda-forge'], 'dependencies': ['python=3.8', 'pip=21.2.4', {'pip': ['mltable', 'pandas==1.3.0']}], 'name': 'mltable-env'}, 'image': 'mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04', 'build': None, 'inference_config': None, 'os_type': 'Linux', 'arm_type': 'environment_version'

## Creating a compute cluster where the AML job is submitted to

In [7]:
from azure.ai.ml.entities import ComputeInstance, AmlCompute

compute_cluster = AmlCompute(
    name="cpu-cluster",
    type="amlcompute",
    size="STANDARD_DS3_v2",
    location="eatus",
    min_instances=0,
    max_instances=2,
    idle_time_before_scale_down=120,
)
ml_client.begin_create_or_update(compute_cluster)

<azure.core.polling._poller.LROPoller at 0x7f089aaf9060>

## Creating and submitting AML job

### for more information see https://learn.microsoft.com/en-us/azure/machine-learning/how-to-create-data-assets?tabs=Python-SDK#create-a-mltable-data-asset

In [10]:
from azure.ai.ml import command
from azure.ai.ml.entities import Data
from azure.ai.ml import Input
from azure.ai.ml.constants import AssetTypes


# Possible Paths for Data:
# Blob: https://<account_name>.blob.core.windows.net/<container_name>/<folder>/<file>
# Datastore: azureml://datastores/paths/<folder>/<file>
# Data Asset: azureml:<my_data>:<version>


# for example you can use either one of the following paths:
# inputs = {"input_data": Input(type=AssetTypes.MLTABLE, path="./my_data/")}
# or
inputs = {"input_data": Input(type=AssetTypes.MLTABLE, path="azureml:titanic-mltable-sdk:2")}

job = command(
    code=".",  # local path where the code is stored
    command="python read_data.py --input_data ${{inputs.input_data}}",
    inputs=inputs,
    environment=env_docker_conda,
    compute="cpu-cluster",
)

# submit the command
returned_job = ml_client.jobs.create_or_update(job)
# get a URL for the status of the job
returned_job.services["Studio"].endpoint

[32mUploading chapter 2 (0.09 MBs): 100%|██████████| 86455/86455 [00:00<00:00, 225024.44it/s]
[39m



'https://ml.azure.com/runs/boring_neck_jmc85444kw?wsid=/subscriptions/dcfc206a-203b-4c00-a236-bdf576a37896/resourcegroups/aml-v2-book/workspaces/aml2-ws&tid=72f988bf-86f1-41af-91ab-2d7cd011db47'