## **Prerequisites**

#### - Create Azure Machine Learning Workspace via the Portal
- Note workspace name, resource group, and subscription id
#### - Create and populate .env file in the home directory
- Use [.sample.env](../.sample.env) as a guide
#### - Create and activate conda virtual env
- Run the following bash commands via the terminal _from the top directory_
```bash
    conda env create --name many_models --file=./environment/conda.yaml
    conda activate many_models
    az login
```
- Select the many_models python interpreter and kernel to run the remainder of this notebook

- If you are not using VS Code you may need to run the following command to install the kernel:
```bash 
    python -m ipykernel install --user --name many_models --display-name "many_models"
```

### **1. Load Data to Workspace**

In [None]:
import os
import time
import pandas as pd
from dotenv import load_dotenv, find_dotenv
from azure.ai.ml import MLClient
from azure.ai.ml.entities import Data, AmlCompute, Environment
from azure.ai.ml.constants import AssetTypes
from azure.identity import DefaultAzureCredential

In [None]:
# Load environment variables from .env file
load_dotenv(find_dotenv(), override=True)

#Confirm variabels were loaded
print(os.getenv("WORKSPACE_NAME"))

In [None]:
# authenticate
credential = DefaultAzureCredential(tenantid=os.environ.get('TENANT_ID'))

# Get a handle to the workspace
ml_client = MLClient(
    credential=credential,
    subscription_id = os.environ.get('SUBSCRIPTION_ID'),
    resource_group_name = os.environ.get('RESOURCE_GROUP_NAME'),
    workspace_name = os.environ.get('WORKSPACE_NAME'),
)

In [None]:
# set the version number of the data asset to the current UTC time
v = time.strftime("%Y.%m.%d.%H%M%S", time.gmtime())
local_path = "../data/oj_sim_sales/"

In [None]:
train_data = Data(
    name="oj-sim-sales-train",
    version=v,
    description="Training Data - Chicago area orange juice sales data",
    path=local_path + "train_subset.csv",
    type=AssetTypes.URI_FILE,
)

results_data = Data(
    name="oj-sim-sales-ground-truth",
    version=v,
    description="Results for Feedback - Chicago area orange juice sales data",
    path=local_path + "test_subset_results.csv",
    type=AssetTypes.URI_FILE,
)

test_data = Data(
    name="oj-sim-sales-test",
    version=v,
    description="Infrence Data - Chicago area orange juice sales data",
    path=local_path + "test_subset.csv",
    type=AssetTypes.URI_FILE,
)


# create data assets
ml_client.data.create_or_update(train_data)
ml_client.data.create_or_update(test_data)
ml_client.data.create_or_update(results_data)

In [None]:
# Validate data upload
data_asset = ml_client.data.get("oj-sim-sales-train", label="latest")

df = pd.read_csv(data_asset.path)
display(df.head(10))

print(f"Total Partitions: {df[['Brand', 'Store']].drop_duplicates().shape[0]}")

### **2. Create Compute Cluster**

In [None]:
# Create Compute Target

# Name assigned to the compute cluster
cpu_compute_target = "mm-cpu-cluster"

try:
    # let's see if the compute target already exists
    cpu_cluster = ml_client.compute.get(cpu_compute_target)
    print(f"You already have a cluster named {cpu_compute_target}, we'll reuse it as is.")

except Exception:
    print("Creating a new cpu compute target...")
    cpu_cluster = AmlCompute(
        name=cpu_compute_target,
        # Azure Machine Learning Compute is the on-demand VM service
        type="amlcompute",
        # VM Family
        size="STANDARD_DS3_V2",
        # Minimum running nodes when there is no job running
        min_instances=0,
        # Nodes in cluster
        max_instances=10,
        # How many seconds will the node running after the job termination
        idle_time_before_scale_down=300, # 5 minutes
        # Dedicated or LowPriority. The latter is cheaper but there is a chance of job termination
        tier="Dedicated",
    )
    cpu_cluster = ml_client.compute.begin_create_or_update(cpu_cluster)

### **3. Create Runtime Environment**

In [None]:
# Create Environment
custom_env_name = "mm-remote-env-py310"

custom_job_env = Environment(
    name=custom_env_name,
    description="Custom environment for many models",
    conda_file="../environment/conda.yaml",
    image="mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:20240226.v1",
)
custom_job_env = ml_client.environments.create_or_update(custom_job_env)

print(f"Environment created: {custom_job_env.name}")

ml_client.environments.get(custom_env_name, label="latest")