In [None]:
### At this point the ./src/main.py file needs to be executed with the preprocessed data. In the original training, Azure ML Studio was used to train the model. If you use another cloud computing service, you will need to adapt the code to your needs.

from azure.ai.ml import MLClient
from azure.ai.ml import command, Input, Output
from azure.identity import DefaultAzureCredential
from azureml.core import Workspace, Dataset, Datastore
import os
import pandas as pd
import numpy as np
  

credential = DefaultAzureCredential()
import sys 
sys.path.append("../../")

## Change according to your workspace
from config import subscription_id, resource_group, workspace_name

ml_client = MLClient(
    credential=credential,
    subscription_id = subscription_id,
    resource_group_name = resource_group,
    workspace_name = workspace_name
)

workspace = Workspace(subscription_id, resource_group, workspace_name)
datastore = Datastore.get(workspace, "workspaceblobstore")

# 1. Set up job environment (only for first training)

In [None]:
dependencies_dir = "./dependencies"
os.makedirs(dependencies_dir, exist_ok=True)

%%writefile {dependencies_dir}/conda.yml
name: model-env
channels:
  - conda-forge
dependencies:
  - python=3.8
  - numpy
  - pip
  - pandas>=1.1,<1.2
  - pip:
    - inference-schema[numpy-support]==1.3.0
    - mlflow>=1.26.1
    - azureml-mlflow==1.42.0
    - psutil>=5.8,<5.9
    - tqdm>=4.59,<4.60
    - ipykernel~=6.0
    - git+https://github.com/ourownstory/neural_prophet.git
    - pytorch-lightning>=1.4.0

In [None]:
from azure.ai.ml.entities import Environment

custom_env_name = "training_env"

custom_job_env = Environment(
    name=custom_env_name,
    description="Virtual environment for NP training",
    tags={"neuralprophet": "main github"},
    conda_file=os.path.join(dependencies_dir, "conda.yml"),
    image="mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:latest",
)
custom_job_env = ml_client.environments.create_or_update(custom_job_env)

print(
    f"Environment with name {custom_job_env.name} is registered to workspace, the environment version is {custom_job_env.version}"
)

# 2. Set up input data

In [None]:
### Path to input file on Azure ML

data_path= "azureml://subscriptions/resourcegroups/datastores/workspaceblobstore/09_load_bus_after.csv"
filename_wcsv = os.path.basename(data_path)
filename, _ = os.path.splitext(filename_wcsv)

# 3. Start training jobs

In [None]:
### Create your base command job

job = command(
    code="./src",
    command=""" python main.py \
            --data ${{inputs.data}} \
            --results ${{outputs.results}} \
            """,
    environment="training_env@latest",
    inputs=dict(
        data=Input(
            type="uri_file",
            path=data_path
        ),
    ),
    outputs=dict(
        results=Output(type="uri_folder", 
                     mode="rw_mount"),
    ),
    compute="Standard-NC24ads-A100-v4-10nodes",
    display_name=filename,
    experiment_name="Clustering_bus",
    description="Clustered & aggregated Bus",
    name=f'{filename}',
)

# Submit the job
ml_client.create_or_update(job)

In [None]:
### List with all cluster dfs

list_uri = ["azureml://subscriptions/resourcegroups/datastores/workspaceblobstore/00_load_bus_after.csv",
            "azureml://subscriptions/resourcegroups/datastores/workspaceblobstore/01_load_bus_after.csv",
            "azureml://subscriptions/resourcegroups/datastores/workspaceblobstore/02_load_bus_after.csv",
            "azureml://subscriptions/resourcegroups/datastores/workspaceblobstore/03_load_bus_after.csv",
            "azureml://subscriptions/resourcegroups/datastores/workspaceblobstore/04_load_bus_after.csv",
            "azureml://subscriptions/resourcegroups/datastores/workspaceblobstore/05_load_bus_after.csv",
            "azureml://subscriptions/resourcegroups/datastores/workspaceblobstore/06_load_bus_after.csv",
            "azureml://subscriptions/resourcegroups/datastores/workspaceblobstore/07_load_bus_after.csv",
            "azureml://subscriptions/resourcegroups/datastores/workspaceblobstore/08_load_bus_after.csv",
            "azureml://subscriptions/resourcegroups/datastores/workspaceblobstore/09_load_bus_after.csv",
            ]

In [None]:
### Start all cluster-jobs

for uri in list_uri:
    filename_wcsv = os.path.basename(uri)
    filename, _ = os.path.splitext(filename_wcsv)
    print(filename)

    job = command(
        code="./src",
        command=""" python main.py \
                --data ${{inputs.data}} \
                --results ${{outputs.results}} \
                """,
        environment="training_env@latest",
        inputs=dict(
            data=Input(
                type="uri_file",
                path=uri
            ),
        ),
        outputs=dict(
            results=Output(type="uri_folder", 
                        mode="rw_mount"),
        ),
        compute="Standard-NC24ads-A100-v4-10nodes",
        display_name=filename,
        experiment_name="Clustering_bus",
        description="Clustered & aggregated Bus",
        name=f'{filename}_v5',
    )
    
    # submit the job
    ml_client.create_or_update(job)