# Job creation

In [4]:
from azure.ai.ml import command, Input, MLClient
from azure.identity import DefaultAzureCredential
from azure.ai.ml.entities import AmlCompute, Environment, Data, AzureBlobDatastore
from dotenv import load_dotenv

import os

In [5]:
load_dotenv()

SUBSCRIPTION_ID = os.environ.get("SUBSCRIPTION_ID")
RESOURCE_GROUP = os.environ.get("RESOURCE_GROUP")
WORKSPACE_NAME = os.environ.get("WORKSPACE_NAME")
DATASTORE_NAME = os.environ.get("DATASTORE_NAME")
ACCOUNT_NAME = os.environ.get("ACCOUNT_NAME")
CONTAINER_NAME = os.environ.get("CONTAINER_NAME")
COMPUTE_CLUSTER_NAME = os.environ.get("COMPUTE_NAME")
LOCATION = os.environ.get("LOCATION")
CONNECTION_KEY = os.environ.get("CONNECTION_KEY")
DATASET_NAME= os.environ.get("DATASET_NAME")

In [6]:
# connect to AML
ml_client = MLClient(
    DefaultAzureCredential(),
    susbscription_id,
    resource_group,
    workspace_name
)

In [14]:
# get path where data is stored in AML
datastore_uri = ml_client.data.get(name="amazon_reviews_folder", version="1")

In [17]:
# example to prep data in AML
job = command(
    inputs=dict(
        raw_data=f"{datastore_uri.path}/reviews.csv",
        prep_data=f"{datastore_uri.path}/reviews-prepped.csv"
    ),
    code="src/prep",
    command="python prep.py --raw_data ${{inputs.raw_data}} --prep_data ${{inputs.prep_data}}",
    environment="keras-env@latest",
    experiment_name="reviews_analysis__prep",
    display_name="amazon_reviews__prep",
    compute="cpu-cluster"
)

returned_job = ml_client.create_or_update(job)
aml_url = returned_job.studio_url
print("Monitor the job here : ", aml_url)

[32mUploading prep (0.01 MBs): 100%|#################################################| 6770/6770 [00:01<00:00, 6410.00it/s][0m
[39m



Monitor the job here :  https://ml.azure.com/runs/sharp_pummelo_tfzws7456x?wsid=/subscriptions/ce96fbca-fc23-466f-87e4-9b8cb5316116/resourcegroups/rg-review-analysis-teamc/workspaces/aml-review-analysis-teamc&tid=cf36141c-ddd7-45a7-b073-111f66d0b30c


In [None]:
# example to train a job in AML
job = command(
    inputs=dict(
        raw_data=f"{datastore_uri.path}/reviews-prepped.csv",
        prep_data=f"{datastore_uri.path}/reviews-prepped.csv",
        registered_model_name="naive_bayes_baseline"
    ),
    code="src/train",
    command="python train.py --raw_data ${{inputs.raw_data}} --registered_model_name ${{inputs.registered_model_name}}",
    environment="keras-env@latest",
    display_name="amazon_reviews__train",
    compute="cpu-cluster"
)

returned_job = ml_client.create_or_update(job)
aml_url = returned_job.studio_url
print("Monitor the job here : ", aml_url)