# 3.- Azure ML Resources

In [1]:
import yaml

from azure.ai.ml.entities import Environment

from azure.mgmt.storage import StorageManagementClient

from azure.storage.blob import BlobServiceClient

In [2]:
# Load configuration from the YAML file
with open("../config.yaml", "r") as file:
    config = yaml.safe_load(file)

## Create Resource Group

In [3]:
from azure.identity import DefaultAzureCredential
from azure.mgmt.resource import ResourceManagementClient

In [4]:
# Credentials and params
subscription_id = config["azure"]["subscription_id"]
credential = DefaultAzureCredential()
resource_client = ResourceManagementClient(credential, subscription_id)

# Create resource group
resource_group_name = "satellite_object_detection_resource_group"
location = "eastus"

resource_client.resource_groups.create_or_update(
    resource_group_name,
    {"location": location}
)
print(f"Resource Group '{resource_group_name}' created in {location}")

DefaultAzureCredential failed to retrieve a token from the included credentials.
Attempted credentials:
	EnvironmentCredential: EnvironmentCredential authentication unavailable. Environment variables are not fully configured.
Visit https://aka.ms/azsdk/python/identity/environmentcredential/troubleshoot to troubleshoot this issue.
	ManagedIdentityCredential: ManagedIdentityCredential authentication unavailable, no response from the IMDS endpoint.
	SharedTokenCacheCredential: SharedTokenCacheCredential authentication unavailable. No accounts were found in the cache.
	AzureCliCredential: Please run 'az login' to set up an account
	AzurePowerShellCredential: Az.Account module >= 2.2.0 is not installed
	AzureDeveloperCliCredential: Azure Developer CLI could not be found. Please visit https://aka.ms/azure-dev for installation instructions and then,once installed, authenticate to your Azure account using 'azd auth login'.
To mitigate this issue, please refer to the troubleshooting guidelines 

ClientAuthenticationError: DefaultAzureCredential failed to retrieve a token from the included credentials.
Attempted credentials:
	EnvironmentCredential: EnvironmentCredential authentication unavailable. Environment variables are not fully configured.
Visit https://aka.ms/azsdk/python/identity/environmentcredential/troubleshoot to troubleshoot this issue.
	ManagedIdentityCredential: ManagedIdentityCredential authentication unavailable, no response from the IMDS endpoint.
	SharedTokenCacheCredential: SharedTokenCacheCredential authentication unavailable. No accounts were found in the cache.
	AzureCliCredential: Please run 'az login' to set up an account
	AzurePowerShellCredential: Az.Account module >= 2.2.0 is not installed
	AzureDeveloperCliCredential: Azure Developer CLI could not be found. Please visit https://aka.ms/azure-dev for installation instructions and then,once installed, authenticate to your Azure account using 'azd auth login'.
To mitigate this issue, please refer to the troubleshooting guidelines here at https://aka.ms/azsdk/python/identity/defaultazurecredential/troubleshoot.

## Create Workspace

In [2]:
from azureml.core import Workspace

In [None]:
workspace_name = "satellite_object_detection_workspace_ml"

# Create Workspace
ws = Workspace.create(
    name=workspace_name,
    subscription_id=subscription_id,
    resource_group=resource_group_name,
    location=location,
    exist_ok=True
)
print(f"Workspace '{workspace_name}' created or retrieved successfully")

## Create Compute Cluster

In [3]:
from azureml.core.compute import AmlCompute, ComputeTarget
from azureml.exceptions import ComputeTargetException

In [None]:
compute_name = "gpu-cluster"
vm_size = "Standard_NC6"

try:
    compute_target = ComputeTarget(workspace=ws, name=compute_name)
    print(f"The cluster '{compute_name}' already exists")
except ComputeTargetException:
    print(f"Creating the cluster '{compute_name}'...")
    compute_config = AmlCompute.provisioning_configuration(vm_size=vm_size, max_nodes=4)
    compute_target = ComputeTarget.create(ws, compute_name, compute_config)
    compute_target.wait_for_completion(show_output=True)

## Upload Data to Datastore

In [16]:
from azureml.core import Datastore

In [18]:
local_data_path = '../data'

In [None]:
datastore = ws.get_default_datastore()

datastore.upload(src_dir=local_data_path, target_path="object-detection-data", overwrite=True)
print("Data successfully uploaded to the datastore")

## Get Storage Account Keys

In [None]:
storage_client = StorageManagementClient(credential, subscription_id)

In [None]:
def get_storage_account_keys(storage_client, resource_group_name, account_name):
    keys_response = storage_client.storage_accounts.list_keys(resource_group_name, account_name)
    keys = {key.key_name: key.value for key in keys_response.keys}
    return keys

In [None]:
if storage_account_name:
    storage_keys = get_storage_account_keys(storage_client, resource_group_name, storage_account_name)
    print("Successfully retrieved the storage account keys.")
else:
    print("Failed to create or retrieve the storage account.")

## Clone the Tensorflow Model Repository

In [1]:
!git clone https://github.com/tensorflow/models.git ../external/models

Cloning into '../external/models'...
Updating files:  10% (395/3884)
Updating files:  11% (428/3884)
Updating files:  12% (467/3884)
Updating files:  13% (505/3884)
Updating files:  14% (544/3884)
Updating files:  15% (583/3884)
Updating files:  16% (622/3884)
Updating files:  17% (661/3884)
Updating files:  18% (700/3884)
Updating files:  19% (738/3884)
Updating files:  20% (777/3884)
Updating files:  20% (808/3884)
Updating files:  21% (816/3884)
Updating files:  22% (855/3884)
Updating files:  23% (894/3884)
Updating files:  24% (933/3884)
Updating files:  25% (971/3884)
Updating files:  26% (1010/3884)
Updating files:  27% (1049/3884)
Updating files:  28% (1088/3884)
Updating files:  29% (1127/3884)
Updating files:  29% (1162/3884)
Updating files:  30% (1166/3884)
Updating files:  31% (1205/3884)
Updating files:  32% (1243/3884)
Updating files:  33% (1282/3884)
Updating files:  34% (1321/3884)
Updating files:  35% (1360/3884)
Updating files:  36% (1399/3884)
Updating files:  37% (1

: 

## Upload Github Tensorflow Model Repository

In [None]:
def upload_files_to_blob(account_name, account_key, container_name, source_folder):
    account_url = f"https://{account_name}.blob.core.windows.net"
    blob_service_client = BlobServiceClient(account_url=account_url, credential=account_key)
    container_client = blob_service_client.get_container_client(container_name)
    
    try:
        container_client.create_container()
        print(f"Container '{container_name}' created.")
    except Exception as e:
        if "ContainerAlreadyExists" in str(e):
            print(f"Container '{container_name}' already exists.")
        else:
            print(f"Error creating container: {e}")

    files_to_upload = [os.path.join(root, file) for root, dirs, files in os.walk(source_folder) for file in files]
    progress_bar = tqdm(files_to_upload)

    for file_path in progress_bar:
        blob_path = os.path.relpath(file_path, start=source_folder)
        blob_client = container_client.get_blob_client(blob_path)

        try:
            with open(file_path, "rb") as data:
                blob_client.upload_blob(data, overwrite=True)
        except Exception as e:
            progress_bar.set_description(f"Failed {os.path.basename(file_path)}")


In [None]:
account_name = storage_account_name
account_key = storage_keys['key1']
container_name = '<CONTAINER_NAME>'
source_folder = '../external/'

upload_files_to_blob(account_name, account_key, container_name, source_folder)

## Azure Blob Datastore

In [None]:
from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential
from azure.ai.ml.entities import AzureBlobDatastore

In [None]:
ml_client = MLClient(credential, subscription_id, resource_group_name, workspace_name)

In [None]:
def create_datastore(datastore_name, account_name, container_name):
    """
    Ensure a datastore exists in the Azure ML workspace. If it does not exist, create it.

    Parameters:
    - datastore_name: Name of the datastore to check or create.
    - account_name: Azure storage account name associated with the datastore.
    - container_name: Azure storage container name associated with the datastore.

    Returns:
    None
    """

    # Check if the datastore already exists
    try:
        existing_datastore = ml_client.datastores.get(datastore_name)
        print(f"Datastore '{datastore_name}' already exists.")
    except Exception as e:
        print(f"Datastore '{datastore_name}' not found. Creating new datastore.")
        # Create a new datastore if it does not exist
        blob_datastore = AzureBlobDatastore(
            name=datastore_name,
            description="Datastore for storing training data and other blobs",
            account_name=account_name,
            container_name=container_name,
        )

        # Register the datastore in the workspace
        ml_client.datastores.create_or_update(blob_datastore)
        print(f"Datastore '{datastore_name}' has been created and registered.")

In [None]:
datastore_name = "<DATASTORE_NAME>"

create_datastore(datastore_name, storage_account_name, container_name)

## Environment and Context

In [None]:
from azure.ai.ml.entities import Environment, BuildContext

In [None]:
def create_and_verify_environment(environment_name, account_name, container_name, repository_name, repository_docker_file_path):

    try:
        existing_environment = ml_client.environments.get(name=environment_name)
        print(f"Environment '{environment_name}' already exists. No need to recreate.")
        return
    except Exception as e:
        print(f"Environment '{environment_name}' not found. Creating a new one.")

    blob_storage_path = f"https://{account_name}.blob.core.windows.net/{container_name}/{repository_name}/"
    build_context = BuildContext(
        dockerfile_path=repository_docker_file_path,
        path=blob_storage_path
    )

    env_docker_context = Environment(
        build=build_context,
        name=environment_name,
        description="Environment created from a Docker context."
    )

    try:
        created_env = ml_client.environments.create_or_update(env_docker_context)
        print(f"Environment '{created_env.name}' created.")
    except Exception as e:
        print(f"Failed to create or update environment: {e}")


In [None]:
repository_name = 'models'
repository_docker_file_path = "research/object_detection/dockerfiles/tf2/Dockerfile"
environment_name = "<ENVIRONMENT_NAME>"
create_and_verify_environment(environment_name, account_name, container_name, repository_name, repository_docker_file_path)