# Set Up the Variables in the NoteBook (IMPORTANT)
We install Azure-Ai-Ml into our Compute Instance Environment

In [1]:
%pip install azure-ai-ml

Note: you may need to restart the kernel to use updated packages.


In [2]:
%%writefile setenv.py
import os

# TODO: Replace with your own subscription key
# You can find your information in the Azure portal Machine, see above for details

os.environ['subscription_id'] = "" # this will look like xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
os.environ['resource_group'] = "" # this will look like "rg-xxx-xxx"
os.environ['workspace_name'] = "" # this will look like "flights-mlbox"

os.environ['owner'] = "" # this is your user name or you email address
os.environ['tenant_id'] = "" # this will look like xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx

Writing setenv.py


In [3]:
# Authentication package
from azure.identity import DefaultAzureCredential
import os
from azure.ai.ml import MLClient
credential = DefaultAzureCredential()

# Execute the script
%run setenv.py

file = open("setenv.sh","w")
file.write("export subscription_id=" + os.environ['subscription_id'] + "\n" + "export resource_group=" + os.environ['resource_group'] + "\n" + "export workspace_name=" + os.environ['workspace_name'] + "\n" + "export owner=" + os.environ['owner'] + "\n" + "export tenant_id=" + os.environ['tenant_id'] + "\n")
file.close()

# Get a handle to the workspace
ml_client = MLClient(
    credential=credential,
    subscription_id= os.environ['subscription_id'],
    resource_group_name= os.environ['resource_group'],
    workspace_name= os.environ['workspace_name']
)

In [2]:
# Check if credentials are valid
from IPython.display import Image
from colorama import Fore

try :
    ml_client.begin_create_or_update(ml_client.workspaces.get())
    print(Fore.GREEN + "Credentials are valid")
except :
    print(Fore.RED + "Credentials are invalid - please check the TODO CELL")
    print("Please check your credentials : subscription_id, resource_group_name, workspace_name must be correct")


[32mCredentials are valid


In [7]:
number_of_machine = 1
number_of_max_instances = 6
vm_size = "Standard_DS11_v2"
vm_priority = "Dedicated"

## Create a compute resource to run your producer

In [8]:
from azure.ai.ml.entities import AmlCompute

# Name assigned to the compute cluster
cpu_compute_target_base = "cpu-cluster-producer"

for _ in range(number_of_machine):
    try:
        # Let's create a unique name for the compute target
        cpu_compute_target = cpu_compute_target_base + str(_)
        # let's see if the compute target already exists
        cpu_cluster = ml_client.compute.get(cpu_compute_target)
        print(
            f"You already have a cluster named {cpu_compute_target}, we'll reuse it as is."
        )

    except Exception:
        print("Creating a new cpu compute target...")

        # Let's create the Azure ML compute object with the intended parameters
        cpu_cluster = AmlCompute(
            name=cpu_compute_target,
            # Azure ML Compute is the on-demand VM service
            type="amlcompute",
            # VM Family
            size=vm_size,
            # Minimum running nodes when there is no job running
            min_instances=0,
            # Nodes in cluster
            max_instances=number_of_max_instances,
            # How many seconds will the node running after the job termination
            idle_time_before_scale_down=180,
            # Dedicated or LowPriority. The latter is cheaper but there is a chance of job termination
            tier=vm_priority,
        )

        # Now, we pass the object to MLClient's create_or_update method
        ml_client.compute.begin_create_or_update(cpu_cluster)
        cpu_cluster = ml_client.compute.get(cpu_compute_target)
    

    print(
        f"AMLCompute with name {cpu_cluster.name} is created, the compute size is {cpu_cluster.size}"
    )

You already have a cluster named cpu-cluster-producer0, we'll reuse it as is.
AMLCompute with name cpu-cluster-producer0 is created, the compute size is STANDARD_DS11_V2
You already have a cluster named cpu-cluster-producer1, we'll reuse it as is.
AMLCompute with name cpu-cluster-producer1 is created, the compute size is STANDARD_DS11_V2
You already have a cluster named cpu-cluster-producer2, we'll reuse it as is.
AMLCompute with name cpu-cluster-producer2 is created, the compute size is STANDARD_DS11_V2
Creating a new cpu compute target...
AMLCompute with name cpu-cluster-producer3 is created, the compute size is STANDARD_DS11_V2
Creating a new cpu compute target...
AMLCompute with name cpu-cluster-producer4 is created, the compute size is STANDARD_DS11_V2
Creating a new cpu compute target...
AMLCompute with name cpu-cluster-producer5 is created, the compute size is STANDARD_DS11_V2
Creating a new cpu compute target...
AMLCompute with name cpu-cluster-producer6 is created, the compute

## Create a compute resource to run your consumer

In [9]:
from azure.ai.ml.entities import AmlCompute

# Name assigned to the compute cluster
cpu_compute_target_base = "cpu-cluster-consumer"

for _ in range(number_of_machine):
    try:
        # Let's create a unique name for the compute target
        cpu_compute_target = cpu_compute_target_base + str(_)
        # let's see if the compute target already exists
        cpu_cluster = ml_client.compute.get(cpu_compute_target)
        print(
            f"You already have a cluster named {cpu_compute_target}, we'll reuse it as is."
        )

    except Exception:
        print("Creating a new cpu compute target...")

        # Let's create the Azure ML compute object with the intended parameters
        cpu_cluster = AmlCompute(
            name=cpu_compute_target,
            # Azure ML Compute is the on-demand VM service
            type="amlcompute",
            # VM Family
            size=vm_size,
            # Minimum running nodes when there is no job running
            min_instances=0,
            # Nodes in cluster
            max_instances=number_of_max_instances,
            # How many seconds will the node running after the job termination
            idle_time_before_scale_down=180,
            # Dedicated or LowPriority. The latter is cheaper but there is a chance of job termination
            tier=vm_priority,
        )

        # Now, we pass the object to MLClient's create_or_update method
        cpu_cluster = ml_client.compute.begin_create_or_update(cpu_cluster)
        cpu_cluster = ml_client.compute.get(cpu_compute_target)
    

    print(
        f"AMLCompute with name {cpu_cluster.name} is created, the compute size is {cpu_cluster.size}"
    )

You already have a cluster named cpu-cluster-consumer0, we'll reuse it as is.
AMLCompute with name cpu-cluster-consumer0 is created, the compute size is STANDARD_DS11_V2
You already have a cluster named cpu-cluster-consumer1, we'll reuse it as is.
AMLCompute with name cpu-cluster-consumer1 is created, the compute size is STANDARD_DS11_V2
You already have a cluster named cpu-cluster-consumer2, we'll reuse it as is.
AMLCompute with name cpu-cluster-consumer2 is created, the compute size is STANDARD_DS11_V2
Creating a new cpu compute target...
AMLCompute with name cpu-cluster-consumer3 is created, the compute size is STANDARD_DS11_V2
Creating a new cpu compute target...
AMLCompute with name cpu-cluster-consumer4 is created, the compute size is STANDARD_DS11_V2
Creating a new cpu compute target...
AMLCompute with name cpu-cluster-consumer5 is created, the compute size is STANDARD_DS11_V2
Creating a new cpu compute target...
AMLCompute with name cpu-cluster-consumer6 is created, the compute

### 1.1 Create Environnment for our execution/job

In [5]:
import os

env_dir = "./env"
os.makedirs(env_dir, exist_ok=True)

In [6]:
%%writefile {env_dir}/Dockerfile
FROM mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:20220902.v1

ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/responsibleai-0.21

# Install wkhtmltopdf for pdf rendering from html
RUN apt-get -y update && apt-get -y install wkhtmltopdf

# Create conda environment
RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \
    python=3.8 pip=21.3.1 -c anaconda -c conda-forge

# Prepend path to AzureML conda environment
ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH

# Install pip dependencies
# markupsafe and itsdangerous are bug workarounds
# install azureml-defaults==1.47.0
    # inference-schema[numpy-support]==1.5
    # joblib==1.0.1
RUN pip install 'responsibleai~=0.21.0' \
                'raiwidgets~=0.21.0' \
                'pyarrow' \
                'markupsafe<=2.0.1' \
                'itsdangerous==2.0.1' \
                'mlflow==1.30.0' \
                'pdfkit==1.0.0' \
                'plotly==5.6.0' \
                'kaleido==0.2.1' \
                'azureml-core==1.47.0' \
                'azureml-dataset-runtime==1.47.0' \
                'azureml-mlflow==1.47.0' \
                'azureml-telemetry==1.47.0'\
                'matplotlib'\
                'pyspark>=3.1,<3.2'\
                'azureml-defaults==1.47.0'\
                'inference-schema[numpy-support]==1.5'\
                'joblib==1.0.1'\
                'py-cpuinfo'\
                'kafka-python'
                
                    

RUN pip install --pre 'azure-ai-ml'

# no-deps install for domonic due to unresolvable dependencies requirment on urllib3 and requests.
# score card rendering is using domonic only for the html elements composer which does not involve requests or urllib3
RUN pip install --no-deps 'charset-normalizer==2.0.12' \
                          'cssselect==1.1.0' \
                          'elementpath==2.5.0' \
                          'html5lib==1.1' \
                          'webencodings==0.5.1' \
                          'domonic==0.9.10'

# This is needed for mpi to locate libpython
ENV LD_LIBRARY_PATH $AZUREML_CONDA_ENVIRONMENT_PATH/lib:$LD_LIBRARY_PATH

# This is needed for pyspark to locate Java
RUN apt-get update && \
    mkdir -p /usr/share/man/man1 && \
    apt-get install -y openjdk-8-jdk && \
    apt-get install -y ant && \
    apt-get clean && \
    rm -rf /var/lib/apt/lists/ && \
    rm -rf /var/cache/oracle-jdk8-installer;
    
ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64/
RUN export JAVA_HOME


Writing ./env/Dockerfile


In [7]:
from azure.ai.ml.entities import Environment
from azure.ai.ml.entities import BuildContext
import os

custom_env_name = "kafka-custom-env"

buildcontext = BuildContext(
    path=env_dir
)

pipeline_job_env = Environment(
    name=custom_env_name,
    description="Custom environment for spark flight delays",
    tags={"owner": os.environ["owner"], "created": "2022-11-23"},
    build=buildcontext,
)
pipeline_job_env = ml_client.environments.create_or_update(pipeline_job_env)

print(
    f"Environment with name {pipeline_job_env.name} is registered to workspace, the environment version is {pipeline_job_env.version}"
)

[32mUploading env (0.0 MBs):   0%|          | 0/2420 [00:00<?, ?it/s][32mUploading env (0.0 MBs): 100%|██████████| 2420/2420 [00:00<00:00, 18796695.70it/s]
[39m



Environment with name kafka-custom-env is registered to workspace, the environment version is 1


# 1.2 Writing our component
First of all, we are creating a yml file. This file will be a description of our azure ml component. It explains how this component works.