Kernel: Python 3.10 - SDK v2

### Imports

In [None]:
# import required libraries
from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential
from azure.ai.ml import command, Input
from azure.ai.ml.entities import (
    AzureBlobDatastore,
    AzureFileDatastore,
    AzureDataLakeGen1Datastore,
    AzureDataLakeGen2Datastore,
    AccountKeyConfiguration
)

from azure.ai.ml.entities import Environment

In [None]:
from azure.ai.ml.entities import Data
from azure.ai.ml.constants import AssetTypes

In [None]:
from azure.identity import DefaultAzureCredential
from azure.keyvault.secrets import SecretClient

### Config

In [None]:
# Details of AML workspace
subscription_id = 'XXXXX-XXXXX-XXXXX-XXXXX-XXXXX'
resource_group = 'rg-XXXXX-XXXXX'
workspace = 'aml-XXXXX-XXXXX'

In [None]:
# Datastore
datastore_name = 'XXXXX_demo17'
storage_account_name = 'XXXXX'
storage_container_name = 'demo17'

In [None]:
account_key_name = 'storagekey2' # Stored in AzureML workspace key vault

In [None]:
# Data asset
data_asset_name = 'Australian_Vehicle_Prices'
data_asset_version = '1'
file_path = 'training/Australian Vehicle Prices.csv'
# path = f'wasbs://{storage_container_name}@{storage_account_name}.blob.core.windows.net/{file_path}'
path = f'azureml://datastores/{datastore_name}/paths/{file_path}'
path_dir = os.path.dirname(path)

print(f'path: {path}')
print(f'path_dir: {path_dir}')

### Get workspace handle

In [None]:
# get a handle to the workspace
ml_client = MLClient(
    DefaultAzureCredential(), subscription_id, resource_group, workspace
)
print(f'Workspace handle retrieved')

In [None]:
aml_workspace = ml_client.workspaces.get(workspace)
# aml_workspace.key_vault

Key Vault

In [None]:
# Get secret from the keyvault

key_vault_name = aml_workspace.key_vault.split('/')[-1]
# print(f'key_vault_name: {key_vault_name}')

# Construct Key Vault URL
key_vault_url = f'https://{key_vault_name}.vault.azure.net/'

# Create a SecretClient
secret_client = SecretClient(vault_url=key_vault_url, credential=DefaultAzureCredential())

storage_secret = secret_client.get_secret(account_key_name).value
# print(storage_secret)

### Create datastore
Account key

In [None]:
# create a credential-less datastore pointing to a blob container
blob_credless_datastore = AzureBlobDatastore(
    name=datastore_name,
    description=datastore_name,
    account_name=storage_account_name,
    container_name=storage_container_name,
    credentials = AccountKeyConfiguration(account_key = storage_secret)
)

ml_client.create_or_update(blob_credless_datastore)
print(f'Datastore {datastore_name} created.')

### Create data asset

In [None]:
# already_exists_flag = False

# try:
#     data_asset = ml_client.data.get(name = data_asset_name, version = data_asset_version)
#     print(f'Data asset already exists, increment version')
#     already_exists_flag = True
# except Exception as e:  
#     # print(e)
#     print('Run cell below, the Data asset version does not exist')    
    
# print(f'already_exists_flag:{already_exists_flag}')

In [None]:
# # Register as URI_FILE
# if not already_exists_flag:
    
#     # Set the path, supported paths include:
#     # local: './<path>/<file>' (this will be automatically uploaded to cloud storage)
#     # blob:  'wasbs://<container_name>@<account_name>.blob.core.windows.net/<path>/<file>'
#     # ADLS gen2: 'abfss://<file_system>@<account_name>.dfs.core.windows.net/<path>/<file>'
#     # Datastore: 'azureml://datastores/<data_store_name>/paths/<path>/<file>'

#     # Define the Data asset object
#     data_asset = Data(
#         path=path,
#         type=AssetTypes.URI_FILE,
#         description=file_path,
#         name=data_asset_name,
#         version=data_asset_version,
#     )

#     # Create the data asset in the workspace
#     ml_client.data.create_or_update(data_asset)

#     print(f'Data asset {data_asset_name} created.')
# else:
#     print(f'Data asset {data_asset_name} with version {data_asset_version} already exists, skipped creation.')

In [None]:
already_exists_flag = False

try:
    data_asset = ml_client.data.get(name = data_asset_name+'_MLTable', version = data_asset_version)
    print(f'Data asset already exists, increment version')
    already_exists_flag = True
except Exception as e:  
    # print(e)
    print('Run cell below, the Data asset version does not exist')    
    
print(f'already_exists_flag:{already_exists_flag}')

In [None]:
# Register as MLTable
if not already_exists_flag:
    
    # Set the path, supported paths include:
    # local: './<path>/<file>' (this will be automatically uploaded to cloud storage)
    # blob:  'wasbs://<container_name>@<account_name>.blob.core.windows.net/<path>/<file>'
    # ADLS gen2: 'abfss://<file_system>@<account_name>.dfs.core.windows.net/<path>/<file>'
    # Datastore: 'azureml://datastores/<data_store_name>/paths/<path>/<file>'

    # Define the Data asset object
    data_asset = Data(
        path=path_dir,
        type=AssetTypes.MLTABLE,
        description=file_path,
        name=data_asset_name+'_MLTable',
        version=data_asset_version,
    )

    # Create the data asset in the workspace
    ml_client.data.create_or_update(data_asset)

    print(f'Data asset {data_asset_name}_MLTable created.')
else:
    print(f'Data asset {data_asset_name}_MLTable with version {data_asset_version} already exists, skipped creation.')