# 4.- Azure ML Resources

In [68]:
import yaml

from azure.identity import DefaultAzureCredential

from azure.mgmt.resource import ResourceManagementClient

from azure.core.exceptions import ResourceNotFoundError

from azure.ai.ml import MLClient
from azure.ai.ml.entities import Workspace
from azure.ai.ml.entities import Environment, BuildContext

from azure.ai.ml.entities import AmlCompute

## Define Variables

In [69]:
# Load configuration from the YAML file
with open("../config.yaml", "r") as file:
    config = yaml.safe_load(file)

In [70]:
location = config["azure"]["location"]
subscription_id = config["azure"]["subscription_id"]
resource_group_name = config["azure"]["resource_group_name"]
workspace_name = config["azure"]["workspace_name"]
environment_name = config["azure"]["environment_name"]
training_gpu_cluster = config["azure"]["training_gpu_cluster"]
compute_name = config["azure"]["compute_name"]

## Azure Authentication

In [71]:
# Initialize DefaultAzureCredential
credential = DefaultAzureCredential()

## Resource Group

In [72]:
# Initialize the Resource Management client
resource_client = ResourceManagementClient(credential, subscription_id)

In [73]:
def create_resource_group(resource_client, resource_group_name, location):
    """
    Creates a resource group in Azure if it does not already exist.

    Parameters:
        resource_client: The client instance used to interact with Azure resource groups.
        resource_group_name (str): The name of the resource group.
        location (str): The Azure region where the resource group should be created.

    Returns:
        The resource group object if successful, or None if an error occurs.
    """
    try:
        # Try to get the resource group
        resource_group = resource_client.resource_groups.get(resource_group_name)
        print(f"Resource Group '{resource_group_name}' already exists in '{resource_group.location}'.")
    except ResourceNotFoundError:
        # If the resource group does not exist, create it
        resource_group_params = {"location": location}
        resource_group = resource_client.resource_groups.create_or_update(
            resource_group_name,
            resource_group_params
        )
        print(f"Resource Group '{resource_group_name}' created in '{resource_group.location}'.")
    except Exception as e:
        # Handle other exceptions
        print(f"An error occurred: {e}")
        return None
    return resource_group


In [None]:
# Call the function to create the Resource Group
resource_group = create_resource_group(resource_client, resource_group_name, location)

## Workspace

In [None]:
ml_client = MLClient(credential, subscription_id, resource_group_name)

In [76]:
def create_workspace(ml_client, workspace_name, location):
    """
    Creates or retrieves an Azure ML workspace.

    This function first attempts to retrieve an existing workspace with the provided name.
    If the workspace does not exist (raising a ResourceNotFoundError), it creates a new workspace
    in the specified location. If any other exception occurs, it prints the error and returns None.

    Parameters:
        ml_client: MLClient
            An instance of MLClient used to interact with Azure ML workspaces.
        workspace_name (str):
            The name of the workspace.
        location (str):
            The Azure region where the workspace should be located.

    Returns:
        The workspace object if successful, or None if an error occurs.
    """
    try:
        # Try to get the existing Workspace
        workspace = ml_client.workspaces.get(workspace_name)
        print(f"Workspace '{workspace_name}' already exists in '{workspace.location}'.")
        return workspace
    except ResourceNotFoundError:
        # If the Workspace does not exist, create it asynchronously
        workspace_poller = ml_client.workspaces.begin_create(
            Workspace(
                name=workspace_name,
                location=location  # Use the 'location' variable
            )
        )
        workspace = workspace_poller.result()  # Wait for the operation to complete
        print(f"Workspace '{workspace_name}' created in '{workspace.location}'.")
        return workspace
    except Exception as e:
        # Handle other exceptions
        print(f"An error occurred: {e}")
        return None



In [77]:
workspace = create_workspace(ml_client, workspace_name, location)

Workspace 'machine_que_tal' already exists in 'eastus'.


In [None]:
# Update Azure ML Client
ml_client = MLClient(credential, subscription_id, resource_group_name, workspace_name)

## Environment and Context

In [58]:
def create_and_verify_environment(ml_client, environment_name, docker_file_path):
    """
    Creates a new Azure ML environment from a Docker context if it does not already exist.

    This function first checks if an environment with the specified name and label "latest" exists.
    If it does, it prints a message and returns without creating a new environment.
    Otherwise, it builds a new environment using the provided Docker context.

    Parameters:
        ml_client (MLClient): The Azure ML client used to manage environments.
        environment_name (str): The name of the environment.
        docker_file_path (str): The path to the Docker context directory containing the Dockerfile.
            This should be the directory containing the Dockerfile, not the Dockerfile itself.

    Returns:
        None
    """
    try:
        existing_environment = ml_client.environments.get(
            name=environment_name,
            label="latest"
        )
        print(f"Environment '{environment_name}' already exists. No need to recreate.")
        return
    except Exception as e:
        print(f"Environment '{environment_name}' not found. Creating a new one.")

    build_context = BuildContext(
        path=docker_file_path
    )

    env_docker_context = Environment(
        build=build_context,
        name=environment_name,
        description="Environment created from a Docker context."
    )

    try:
        created_env = ml_client.environments.create_or_update(env_docker_context)
        print(f"Environment '{created_env.name}' created.")
    except Exception as e:
        print(f"Failed to create or update environment: {e}")


In [None]:
docker_file_path = "../docker"
create_and_verify_environment(ml_client, environment_name, docker_file_path)

## Create a Compute Resource

In [79]:
def create_compute_resource(ml_client, cluster_name, size="STANDARD_D2_V3", 
                            min_instances=0, max_instances=4, 
                            idle_time_before_scale_down=180, tier="Dedicated"):
    """
    Creates or reuses an AMLCompute resource in Azure ML.

    Parameters:
        ml_client (MLClient): The Azure ML client.
        cluster_name (str): The name of the compute cluster.
        size (str): The VM size (default is "STANDARD_D2_V3").
        min_instances (int): The minimum number of instances (default is 0).
        max_instances (int): The maximum number of instances (default is 4).
        idle_time_before_scale_down (int): Idle time in seconds before scaling down (default is 180).
        tier (str): The pricing tier, either "Dedicated" or "LowPriority" (default is "Dedicated").

    Returns:
        An instance of AmlCompute representing the compute resource.
    """
    try:
        # Check if the compute cluster already exists
        compute_resource = ml_client.compute.get(cluster_name)
        print(f"A cluster named '{cluster_name}' already exists; reusing it.")
    except Exception:
        print("Creating a new GPU compute resource...")
        compute_resource = AmlCompute(
            name=cluster_name,
            type="amlcompute",
            size=size,
            min_instances=min_instances,
            max_instances=max_instances,
            idle_time_before_scale_down=idle_time_before_scale_down,
            tier=tier,
        )
        # Create the compute resource and wait until the operation completes
        compute_resource = ml_client.begin_create_or_update(compute_resource).result()
    
    print(f"AMLCompute resource '{compute_resource.name}' is created with size '{compute_resource.size}'.")
    return compute_resource


In [None]:
create_compute_resource(ml_client, 
                        training_gpu_cluster,
                        compute_name)