In [None]:
import os
import yaml

from azure.identity import DefaultAzureCredential
from azure.ai.ml import MLClient

from azure.ai.ml.entities import Data
from azure.ai.ml.constants import AssetTypes

## Define Variables

In [15]:
# Load configuration from the YAML file
with open("../config.yaml", "r") as file:
    config = yaml.safe_load(file)

In [16]:
# location = config["azure"]["location"]
subscription_id = config["azure"]["subscription_id"]
resource_group_name = config["azure"]["resource_group_name"]
workspace_name = config["azure"]["workspace_name"]

data_asset_name = config["train"]["data_asset_name"]
version = config["train"]["version"]
description = config["train"]["description"]

## Azure Authentication

In [17]:
# Initialize DefaultAzureCredential
credential = DefaultAzureCredential()

In [18]:
ml_client = MLClient(credential, subscription_id, resource_group_name, workspace_name)

Overriding of current TracerProvider is not allowed
Overriding of current LoggerProvider is not allowed
Overriding of current MeterProvider is not allowed
Attempting to instrument while already instrumented
Attempting to instrument while already instrumented
Attempting to instrument while already instrumented


In [19]:
def create_data_asset(ml_client, asset_name, version, description, asset_path, asset_type=AssetTypes.URI_FOLDER):
    """
    Creates or retrieves a data asset in Azure ML using a local folder path.
    
    This function attempts to get the data asset with the specified name and version.
    If found, it prints a message and returns the asset. Otherwise, it creates a new 
    data asset by registering the local folder (asset_path) with the provided description, 
    and returns the newly created asset.

    Parameters:
        ml_client (MLClient): An instance of the Azure ML client.
        asset_name (str): The name of the data asset.
        version (str): The version identifier for the data asset.
        description (str): A short description of the asset.
        asset_path (str): The local path to the data folder to register.
        asset_type (AssetTypes, optional): The type of asset. 
                                             Use AssetTypes.URI_FOLDER for a folder (default) 
                                             or AssetTypes.URI_FILE for a single file.

    Returns:
        Data: The registered data asset object.
    """
    # Create the data asset object
    my_data = Data(
        name=asset_name,
        version=version,
        description=description,
        path=asset_path,
        type=asset_type
    )

    try:
        # Try to retrieve the existing data asset
        data_asset = ml_client.data.get(name=asset_name, version=version)
        print(f"Data asset already exists. Name: {asset_name}, version: {version}")
        return data_asset
    except Exception as e:
        # If retrieval fails, create (or update) the data asset in the workspace
        ml_client.data.create_or_update(my_data)
        print(f"Data asset created. Name: {asset_name}, version: {version}")
        # Retrieve and return the newly created asset
        return ml_client.data.get(name=asset_name, version=version)

In [None]:
credit_card_asset = create_data_asset(ml_client, data_asset_name, version,
                                      description, asset_path="../data/raw")