# Load compressed data and train segmentation model using components and registry

Build and use ML pipelines for MONAI image segmentation https://learn.microsoft.com/en-us/azure/machine-learning/how-to-create-component-pipeline-python?view=azureml-api-2


## Dev environment preparation
_Do it only once_

In [1]:
# based on azureml_py310_sdkv2 kernel
# %pip install torch==1.12.0 torchvision==0.13.0 torchaudio==0.12.0
# %pip install 'monai[nibabel, ignite, tqdm]'
# %pip install itkwidgets
# %pip install --upgrade azure-ai-ml #to have SDK 2 (1.5.0)

## Import Libraries

In [2]:
from azure.ai.ml import MLClient, Input, dsl
from azure.identity import DefaultAzureCredential
from azureml.core.compute import AmlCompute, ComputeTarget
from azureml.core.compute_target import ComputeTargetException
from azure.ai.ml import load_component
from helper import create_compute_cluster

## Define global constants

### Define registry constants

In [3]:
# Training
experiment_name = 'monai-brain-tumor1' 
train_target = 'Standard-NC96ads-A100-v4'

# Registry
registry_name="ams-components"
registry_location="westeurope"

# Registry Components
upload_data_from_blob_name="upload_data_from_blob"
train_segmentation_name="train_segmentation"
monai_pipeline_name="monai_pipeline"
tar_data_asset_local_name="tar_data_asset_local"

use_registry = True

### Define Interactive constants

In [4]:
#BraTS data from kaggle
tar_location='azureml://subscriptions/b7d41fc8-d35d-41db-92ed-1f7f1d32d4d9/resourcegroups/monai-ml-demo/workspaces/monai-ml-demo/datastores/tar_data_store/paths/tumordemo/BraTS2021_Training_Data.tar'

## Create Azure ML and Registry Clients

In [5]:
credential = DefaultAzureCredential()

ml_client = MLClient.from_config(credential=credential)

ml_client_registry = MLClient(credential=credential,
                        registry_name=registry_name,
                        registry_location=registry_location)

Found the config file in: ./config.json


## Create Compute Multi Node Training Cluster

In [6]:
# Create big compute cluster for training
low_pri_compute = create_compute_cluster(ml_client=ml_client, cname="low-pri-example", csize="Standard_NC96ads_A100_v4")

Found existing compute target {name}.


## Upload Registry components

### Upload data

In [7]:
# Upload data
if use_registry:
    tar_data_from_registry = ml_client_registry.data.get(name=tar_data_asset_local_name, label='latest')
    pipeline_input=Input(type="uri_file", path=tar_data_from_registry.id)
else:
    pipeline_input = Input(type="uri_file", path=tar_location)

### Upload command components

In [8]:
if use_registry:
    # Upload Command components from registry
    upload_component = ml_client_registry.components.get(name=upload_data_from_blob_name, label='latest')
    train_component = ml_client_registry.components.get(name=train_segmentation_name, label='latest')
else:
    #Load components from source
    upload_component = load_component(source="../components/upload_from_blob/spec.yaml")
    train_component = load_component(source="../components/train_segmentation/spec.yaml")

### Create pipeline job

In [9]:
if use_registry:
    #Upload pipeline from registry
    monai_pipeline = ml_client_registry.components.get(name=monai_pipeline_name, label='latest')
else:
    @dsl.pipeline(
        name="pipeline_from_notebook",
        description=f'Pipeline for MONAI 3D segmentation.',
    )
    def monai_pipeline(pipeline_input_file):

        #Load data pipeline step   
        load_step = upload_component(
            blob_file_location=pipeline_input_file,
        )
        
        # # Train pipeline step
        train_step = train_component(
            input_data=load_step.outputs.image_data_folder, best_model_name="model_from_notebook", max_epochs = 2
        )
        train_step.distribution.process_count_per_instance=4
        train_step.resources = {'instance_count' : 3, 'shm_size':'300g'}
        train_step.environment_variables = {'AZUREML_ARTIFACTS_DEFAULT_TIMEOUT' : '1000'}
        train_step.compute_target =low_pri_compute

        return {
            "model" : train_step.outputs.model,
        }

## Run pipeline job

In [11]:
pipeline_job = monai_pipeline(pipeline_input_file = pipeline_input)

# don't use cached results from previous jobs
pipeline_job.settings.force_rerun = True

# to run the whole pipeline on a cluster instead of running first component on Compute Instance
pipeline_job.settings.default_compute = "low-pri-compute"

ml_client.jobs.create_or_update(pipeline_job, experiment_name=experiment_name)


Experiment,Name,Type,Status,Details Page
monai-brain-tumor1,khaki_tail_1gfw7qppnr,pipeline,Preparing,Link to Azure Machine Learning studio
