# 3.- Azure ML Resources

In [6]:
import yaml
import os
from tqdm import tqdm

from azure.identity import DefaultAzureCredential

from azure.mgmt.resource import ResourceManagementClient

from azure.core.exceptions import ResourceExistsError, ResourceNotFoundError, HttpResponseError

from azure.ai.ml import MLClient
from azure.ai.ml.entities import Workspace
from azure.ai.ml.entities import Environment

from azure.mgmt.storage import StorageManagementClient

from azure.storage.blob import BlobServiceClient

## Define Variables

In [7]:
# Load configuration from the YAML file
with open("../config.yaml", "r") as file:
    config = yaml.safe_load(file)

In [8]:
subscription_id = config["azure"]["subscription_id"]
resource_group_name = config["azure"]["resource_group_name"]
workspace_name = config["azure"]["workspace_name"]
location = config["azure"]["location"]
# acr_name = config["azure"]["acr_name"]
# sku = config["azure"]["sku"]

## Azure Authentication

In [9]:
# Initialize DefaultAzureCredential
credential = DefaultAzureCredential()

## Resource Group

In [10]:
# Initialize the Resource Management client
resource_client = ResourceManagementClient(credential, subscription_id)

In [11]:
def create_resource_group(resource_client, resource_group_name, location):
    try:
        # Intenta obtener el grupo de recursos
        resource_group = resource_client.resource_groups.get(resource_group_name)
        print(f"Resource Group '{resource_group_name}' already exists in '{resource_group.location}'.")
    except ResourceNotFoundError:
        # Si el grupo de recursos no existe, créalo
        resource_group_params = {"location": location}
        resource_group = resource_client.resource_groups.create_or_update(
            resource_group_name,
            resource_group_params
        )
        print(f"Resource Group '{resource_group_name}' created in '{resource_group.location}'.")
    except Exception as e:
        # Maneja otras excepciones
        print(f"An error occurred: {e}")
        return None
    return resource_group


In [12]:
# Call the function to create the Resource Group
resource_group = create_resource_group(resource_client, resource_group_name, location)

Resource Group 'test_group' already exists in 'eastus'.


## Workspace

In [13]:
ml_client = MLClient(credential, subscription_id, resource_group_name)

In [14]:
from azure.ai.ml import MLClient
from azure.ai.ml.entities import Workspace
from azure.core.exceptions import ResourceNotFoundError

def create_workspace(ml_client, workspace_name, location):
    try:
        # Try to get the existing Workspace
        workspace = ml_client.workspaces.get(workspace_name)
        print(f"Workspace '{workspace_name}' already exists in '{workspace.location}'.")
        return workspace
    except ResourceNotFoundError:
        # If the Workspace does not exist, create it asynchronously
        workspace_poller = ml_client.workspaces.begin_create(
            Workspace(
                name=workspace_name,
                location=location  # Use the 'location' variable
            )
        )
        workspace = workspace_poller.result()  # Wait for the operation to complete
        print(f"Workspace '{workspace_name}' created in '{workspace.location}'.")
        return workspace
    except Exception as e:
        # Handle other exceptions
        print(f"An error occurred: {e}")
        return None


In [15]:
workspace = create_workspace(ml_client, workspace_name, location)

Workspace 'machine_que_tal' already exists in 'eastus'.


## Get Woskspace Storage Account Name

In [16]:
storage_account_name = workspace.storage_account.split('/')[-1]
storage_account_name

'machineqstoragef44501b0a'

## Get Storage Account Keys

In [17]:
storage_client = StorageManagementClient(credential, subscription_id)

In [18]:
def get_storage_account_keys(storage_client, resource_group_name, account_name):
    keys_response = storage_client.storage_accounts.list_keys(resource_group_name, account_name)
    keys = {key.key_name: key.value for key in keys_response.keys}
    return keys

In [19]:
if storage_account_name:
    storage_keys = get_storage_account_keys(storage_client, resource_group_name, storage_account_name)
    print("Successfully retrieved the storage account keys.")
else:
    print("Failed to create or retrieve the storage account.")

Successfully retrieved the storage account keys.


## Clone the Tensorflow Model Repository

In [52]:
!git clone https://github.com/tensorflow/models.git ../external/models

Cloning into '../external/models'...
Updating files:  22% (861/3884)
Updating files:  23% (894/3884)
Updating files:  24% (933/3884)
Updating files:  25% (971/3884)
Updating files:  26% (1010/3884)
Updating files:  27% (1049/3884)
Updating files:  28% (1088/3884)
Updating files:  29% (1127/3884)
Updating files:  30% (1166/3884)
Updating files:  31% (1205/3884)
Updating files:  32% (1243/3884)
Updating files:  33% (1282/3884)
Updating files:  34% (1321/3884)
Updating files:  35% (1360/3884)
Updating files:  36% (1399/3884)
Updating files:  37% (1438/3884)
Updating files:  38% (1476/3884)
Updating files:  39% (1515/3884)
Updating files:  40% (1554/3884)
Updating files:  40% (1577/3884)
Updating files:  41% (1593/3884)
Updating files:  42% (1632/3884)
error: unable to create file official/projects/waste_identification_ml/circularnet-docs/themes/hugo-theme-techdoc/src/js/jquery.backtothetop/jquery.backtothetop.min.js: Filename too long
Updating files:  43% (1671/3884)
Updating files:  44% 

## Upload Github Tensorflow Model Repository

In [40]:
def upload_files_to_blob(account_name, account_key, container_name, source_folder):
    account_url = f"https://{account_name}.blob.core.windows.net"
    blob_service_client = BlobServiceClient(account_url=account_url, credential=account_key)
    container_client = blob_service_client.get_container_client(container_name)
    
    try:
        container_client.create_container()
        print(f"Container '{container_name}' created.")
    except Exception as e:
        if "ContainerAlreadyExists" in str(e):
            print(f"Container '{container_name}' already exists.")
        else:
            print(f"Error creating container: {e}")

    files_to_upload = [os.path.join(root, file) for root, dirs, files in os.walk(source_folder) for file in files]
    progress_bar = tqdm(files_to_upload)

    for file_path in progress_bar:
        blob_path = os.path.relpath(file_path, start=source_folder)
        blob_client = container_client.get_blob_client(blob_path)

        try:
            with open(file_path, "rb") as data:
                blob_client.upload_blob(data, overwrite=True)
        except Exception as e:
            progress_bar.set_description(f"Failed {os.path.basename(file_path)}")


In [44]:
account_name = storage_account_name
account_key = storage_keys['key1']
container_name = 'containerml171717'
source_folder = '../external/'

upload_files_to_blob(account_name, account_key, container_name, source_folder)

Container 'containerml171717' already exists.


100%|██████████| 3907/3907 [06:40<00:00,  9.76it/s]


## Azure Blob Datastore

In [49]:
from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential
from azure.ai.ml.entities import AzureBlobDatastore

In [None]:
ml_client = MLClient(credential, subscription_id, resource_group_name, workspace_name)

In [52]:


# Check if the datastore already exists
try:
    existing_datastore = ml_client.datastores.get(datastore_name)
    print(f"Datastore '{datastore_name}' already exists.")
except Exception as e:
    print(f"Datastore '{datastore_name}' not found. Creating new datastore.")
    # Create a new datastore if it does not exist
    blob_datastore = AzureBlobDatastore(
        name=datastore_name,
        description="Datastore for storing training data and other blobs",
        account_name=storage_account_name,
        container_name=container_name,
    )

    # Register the datastore in the workspace
    ml_client.datastores.create_or_update(blob_datastore)
    print(f"Datastore '{datastore_name}' has been created and registered.")


Overriding of current TracerProvider is not allowed
Overriding of current LoggerProvider is not allowed
Overriding of current MeterProvider is not allowed
Attempting to instrument while already instrumented
Attempting to instrument while already instrumented
Attempting to instrument while already instrumented


Failed to register datastore: (InvalidApiVersionParameter) The api-version '2024-07-01-preview' is invalid. The supported versions are '2024-11-01,2024-08-01,2024-07-01,2024-06-01-preview,2024-03-01,2023-07-01,2023-07-01-preview,2023-03-01-preview,2022-12-01,2022-11-01-preview,2022-09-01,2022-06-01,2022-05-01,2022-03-01-preview,2022-01-01,2021-04-01,2021-01-01,2020-10-01,2020-09-01,2020-08-01,2020-07-01,2020-06-01,2020-05-01,2020-01-01,2019-11-01,2019-10-01,2019-09-01,2019-08-01,2019-07-01,2019-06-01,2019-05-10,2019-05-01,2019-03-01,2018-11-01,2018-09-01,2018-08-01,2018-07-01,2018-06-01,2018-05-01,2018-02-01,2018-01-01,2017-12-01,2017-08-01,2017-06-01,2017-05-10,2017-05-01,2017-03-01,2016-09-01,2016-07-01,2016-06-01,2016-02-01,2015-11-01,2015-01-01,2014-04-01-preview,2014-04-01,2014-01-01,2013-03-01,2014-02-26,2014-04'.
Code: InvalidApiVersionParameter
Message: The api-version '2024-07-01-preview' is invalid. The supported versions are '2024-11-01,2024-08-01,2024-07-01,2024-06-01-previ

In [None]:
ml_client = MLClient(credential, subscription_id, resource_group, workspace_name)

In [60]:
workspace_name

'machine_que_tal'

In [61]:
ml_client = MLClient(credential, subscription_id, resource_group_name, workspace_name)

Overriding of current TracerProvider is not allowed
Overriding of current LoggerProvider is not allowed
Overriding of current MeterProvider is not allowed
Attempting to instrument while already instrumented
Attempting to instrument while already instrumented
Attempting to instrument while already instrumented


In [62]:
# Crear un datastore que apunta al contenedor de Blob Storage donde está tu repositorio
blob_datastore = AzureBlobDatastore(
    name="my_blob_datastore_QUETAL",
    description="Datastore for storing training data and other blobs",
    account_name=storage_account_name,  # Nombre de la cuenta de almacenamiento
    container_name=container_name      # Nombre del contenedor donde se subió el repositorio
)

In [63]:
ml_client.create_or_update(blob_datastore)

AzureBlobDatastore({'type': <DatastoreType.AZURE_BLOB: 'AzureBlob'>, 'name': 'my_blob_datastore_quetal', 'description': 'Datastore for storing training data and other blobs', 'tags': {}, 'properties': {}, 'print_as_yaml': False, 'id': '/subscriptions/6f68d13a-dffb-46b3-acb9-1630e75d6a0a/resourceGroups/test_group/providers/Microsoft.MachineLearningServices/workspaces/machine_que_tal/datastores/my_blob_datastore_quetal', 'Resource__source_path': '', 'base_path': 'c:\\Users\\walte\\Documents\\Projects\\GitHub\\My_Organizations\\Satellite-Imagery-WSC\\satellite-object-detection\\notebooks', 'creation_context': None, 'serialize': <msrest.serialization.Serializer object at 0x0000017E74F691C0>, 'credentials': <azure.ai.ml.entities._credentials.NoneCredentialConfiguration object at 0x0000017E77BAF7F0>, 'container_name': 'containerml171717', 'account_name': 'machineqstoragef44501b0a', 'endpoint': 'core.windows.net', 'protocol': 'https'})