# Workspace preparation

In [2]:
from azure.ai.ml import command, Input, MLClient
from azure.identity import DefaultAzureCredential
from azure.ai.ml.entities import AmlCompute, Environment, Data, AzureBlobDatastore, AccountKeyConfiguration
from azure.ai.ml.constants import AssetTypes
from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient, generate_blob_sas, BlobSasPermissions

from dotenv import load_dotenv

import os

In [3]:
load_dotenv()

SUBSCRIPTION_ID = os.environ.get("SUBSCRIPTION_ID")
RESOURCE_GROUP = os.environ.get("RESOURCE_GROUP")
WORKSPACE_NAME = os.environ.get("WORKSPACE_NAME")
DATASTORE_NAME = os.environ.get("DATASTORE_NAME")
ACCOUNT_NAME = os.environ.get("ACCOUNT_NAME")
CONTAINER_NAME = os.environ.get("CONTAINER_NAME")
COMPUTE_CLUSTER_NAME = os.environ.get("COMPUTE_NAME")
LOCATION = os.environ.get("LOCATION")
CONNECTION_KEY = os.environ.get("CONNECTION_KEY")
ACCOUNT_KEY= os.environ.get("ACCOUNT_KEY")
DATASET_NAME= os.environ.get("DATASET_NAME")

In [4]:
WORKSPACE_NAME

'aml-review-analysis-teamc1'

In [5]:
ml_client = MLClient(
    DefaultAzureCredential(),
    SUBSCRIPTION_ID,
    RESOURCE_GROUP,
    WORKSPACE_NAME
)

In [5]:
# create compute cluster
cluster_basic = AmlCompute(
    name=COMPUTE_CLUSTER_NAME, 
    type="aml-compute",
    size="STANDARD_D4s_v3",
    location=LOCATION,
    min_instances=0,
    max_instances=4,
    idle_time_before_scale_down=120,
    tier="dedicated"
)

ml_client.begin_create_or_update(cluster_basic)

<azure.core.polling._poller.LROPoller at 0x1de93019690>

In [9]:
# create environment if it doesn't exist
dependencies_dir = "./deps"
custom_job_env = Environment(
    name="keras-env",
    description="Environment for Tensorflow Experiments",
    tags={"keras" : "3.1.1"},
    conda_file=os.path.join(dependencies_dir, "conda.yml"),
    image="mcr.microsoft.com/azureml/openmpi4.1.0-cuda11.1-cudnn8-ubuntu20.04",
    version="v7"
)

ml_client.create_or_update(custom_job_env)

Environment({'intellectual_property': None, 'is_anonymous': False, 'auto_increment_version': False, 'auto_delete_setting': None, 'name': 'keras-env', 'description': 'Environment for Tensorflow Experiments', 'tags': {'keras': '3.1.1'}, 'properties': {'azureml.labels': 'latest'}, 'print_as_yaml': True, 'id': '/subscriptions/ce96fbca-fc23-466f-87e4-9b8cb5316116/resourceGroups/rg-review-analysis-teamc/providers/Microsoft.MachineLearningServices/workspaces/aml-review-analysis-teamc1/environments/keras-env/versions/v7', 'Resource__source_path': None, 'base_path': 'C:\\Users\\nohossat.traore\\OneDrive - Avanade\\Documents\\AA - Academy\\capstone_project\\review-analysis-teamc\\code\\aml', 'creation_context': <azure.ai.ml.entities._system_data.SystemData object at 0x000001E75E1C8750>, 'serialize': <msrest.serialization.Serializer object at 0x000001E75E1D6FD0>, 'version': 'v7', 'latest_version': None, 'conda_file': {'channels': ['conda-forge'], 'dependencies': ['python=3.8', 'numpy=1.21.2', 'pi

In [7]:
# create container if it doesn't exist
try:
    blob_client = BlobServiceClient.from_connection_string(CONNECTION_KEY)
    container_client = blob_client.create_container(name=CONTAINER_NAME)
except Exception as e:
    print(e)

In [8]:
# create datastore if it doesn't exist
credentials=AccountKeyConfiguration(account_key=ACCOUNT_KEY)

store = AzureBlobDatastore(
    name="amazondatastore",
    description="Amazon Reviews datastore",
    account_name=ACCOUNT_NAME,
    container_name=CONTAINER_NAME,
    credentials=credentials
)

ml_client.create_or_update(store)

AzureBlobDatastore({'type': <DatastoreType.AZURE_BLOB: 'AzureBlob'>, 'name': 'amazondatastore', 'description': 'Amazon Reviews datastore', 'tags': {}, 'properties': {}, 'print_as_yaml': True, 'id': '/subscriptions/ce96fbca-fc23-466f-87e4-9b8cb5316116/resourceGroups/rg-review-analysis-teamc/providers/Microsoft.MachineLearningServices/workspaces/aml-review-analysis-teamc1/datastores/amazondatastore', 'Resource__source_path': None, 'base_path': 'C:\\Users\\nohossat.traore\\OneDrive - Avanade\\Documents\\AA - Academy\\capstone_project\\review-analysis-teamc\\code\\aml', 'creation_context': None, 'serialize': <msrest.serialization.Serializer object at 0x000001DE9308C590>, 'credentials': {'type': 'account_key'}, 'container_name': 'amazonreviews', 'account_name': 'amlreviewanaly2790254259', 'endpoint': 'core.windows.net', 'protocol': 'https'})

In [9]:
# upload reviews dataset to container
try:
    blob_client = BlobServiceClient.from_connection_string(CONNECTION_KEY)
    container_client = blob_client.get_container_client(container=CONTAINER_NAME)
    with open(f"../../data/{DATASET_NAME}", "rb") as data: # here replace with the location to your data
        container_client.upload_blob(name=DATASET_NAME, data=data, overwrite=True)
except Exception as e:
    print(e)

In [7]:
store = ml_client.data.get(name="amazon_reviews_folder", version="1")

In [8]:
store.path

'azureml://subscriptions/ce96fbca-fc23-466f-87e4-9b8cb5316116/resourcegroups/rg-review-analysis-teamc/workspaces/aml-review-analysis-teamc1/datastores/amazondatastore/paths/'

In [10]:
# create Data Asset from Datastore
datastore_uri = f"azureml://subscriptions/{SUBSCRIPTION_ID}/resourcegroups/{RESOURCE_GROUP}/workspaces/{WORKSPACE_NAME}/datastores/{store.name}/paths/"

amazon_folder = Data(
    path=datastore_uri,
    type=AssetTypes.URI_FOLDER,
    description="amazon reviews folder",
    name="amazon_reviews_folder",
    version="1"
)

try:
    data_asset = ml_client.data.get(name="amazon_reviews_folder", version="1")
    print(
        f"Data asset already exists. Name: {data_asset.name}, version: {data_asset.version}"
    )
except Exception as e:
    print(e)
    ml_client.data.create_or_update(amazon_folder)
    print(f"Data asset created. Name: {amazon_folder.name}, version: {amazon_folder.version}")

(UserError) Data version amazon_reviews_folder:1 (dataContainerName:version) not found.
Code: UserError
Message: Data version amazon_reviews_folder:1 (dataContainerName:version) not found.
Data asset created. Name: amazon_reviews_folder, version: 1
