In [1]:
#%pip install azureml-widgets
#%pip install mlflow
#%pip install azure-ai-ml

## 1. Import the Required Libraries

In [2]:
import mlflow
from azure.ai.ml.constants import AssetTypes
from azure.ai.ml import MLClient, Input, Output
from azure.ai.ml.entities import Environment, BuildContext, AmlCompute, ComputeInstance
from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential
from azure.ai.ml.dsl import pipeline
from azure.ai.ml import load_component
from azure.core.exceptions import ResourceNotFoundError

## 2. Get a Handle to the Workspace

In [3]:
try:
    credential = DefaultAzureCredential()
    credential.get_token("https://management.azure.com/.default")
except Exception as ex:
    # Fall back to InteractiveBrowserCredential in case DefaultAzureCredential not work
    # This will open a browser page for
    credential = InteractiveBrowserCredential()

In [4]:
ml_client = MLClient.from_config(
    credential=credential
)

Found the config file in: /config.json


In [5]:
azureml_tracking_uri = ml_client.workspaces.get(
    ml_client.workspace_name
).mlflow_tracking_uri
mlflow.set_tracking_uri(azureml_tracking_uri)

## 3. Create the Compute Clusters

In [6]:
# Compute for fine-tuning
compute_cluster = False
compute_version = "nc24ads-a100-v4" #"std-nc12s-v3"
compute_type = "Standard_NC24ads_A100_v4" #"Standard_NC12s_v3"

if compute_cluster:
    gpu_compute_name = f"{compute_version}-cluster"
    try:
        _ = ml_client.compute.get(gpu_compute_name)
        print("Found existing compute cluster.")
    except ResourceNotFoundError:
        print("Creating a new compute cluster...")
        compute_config = AmlCompute(
            name=gpu_compute_name,
            type="amlcompute",
            size=compute_type,
            idle_time_before_scale_down=120,
            min_instances=0,
            max_instances=4,
        )
        ml_client.begin_create_or_update(compute_config).result()
else:
    gpu_compute_name = f"{compute_version}-instance"
    try:
        _ = ml_client.compute.get(gpu_compute_name)
        print("Found existing compute instance.")
    except ResourceNotFoundError:
        print("Creating a new compute instance...")
        compute_config = ComputeInstance(
            name=gpu_compute_name,
            size=compute_type
        )
        ml_client.begin_create_or_update(compute_config).result()

Found existing compute instance.


In [7]:
# Pipeline level compute
compute_cluster = True
compute_type = "Standard_DS3_v2" #"Standard_NC12s_v3"
pipeline_level_compute_name = "cpu-cluster"

if compute_cluster:
    try:
        _ = ml_client.compute.get(pipeline_level_compute_name)
        print("Found existing compute cluster.")
    except ResourceNotFoundError:
        print("Creating a new compute cluster...")
        compute_config = AmlCompute(
            name=pipeline_level_compute_name,
            type="amlcompute",
            size=compute_type,
            idle_time_before_scale_down=120,
            min_instances=0,
            max_instances=4,
        )
        ml_client.begin_create_or_update(compute_config).result()
else:
    try:
        _ = ml_client.compute.get(pipeline_level_compute_name)
        print("Found existing compute instance.")
    except ResourceNotFoundError:
        print("Creating a new compute instance...")
        compute_config = ComputeInstance(
            name=pipeline_level_compute_name,
            size=compute_type
        )
        ml_client.begin_create_or_update(compute_config).result()

Found existing compute cluster.


## 4. Create the Environment

In [8]:
env_docker_context = Environment(
    build=BuildContext(path="docker_image"),
    name="msft-raft-finetuning-env",
    description="Environment for SLM Fine-tuning",
)
ml_client.environments.create_or_update(env_docker_context)

Environment({'intellectual_property': None, 'is_anonymous': False, 'auto_increment_version': False, 'auto_delete_setting': None, 'name': 'msft-raft-finetuning-env', 'description': 'Environment for SLM Fine-tuning', 'tags': {}, 'properties': {'azureml.labels': 'latest'}, 'print_as_yaml': True, 'id': '/subscriptions/03fd01f6-6051-4545-a78e-ceaace399b96/resourceGroups/lianatests/providers/Microsoft.MachineLearningServices/workspaces/humpbackwhales-aml/environments/msft-raft-finetuning-env/versions/40', 'Resource__source_path': None, 'base_path': '/mnt/batch/tasks/shared/LS_root/mounts/clusters/linapalk2/code/Users/linapalk/RAFT/raft_finetuning_pipeline', 'creation_context': <azure.ai.ml.entities._system_data.SystemData object at 0x7f8268ef3370>, 'serialize': <msrest.serialization.Serializer object at 0x7f8268ef3250>, 'version': '40', 'latest_version': None, 'conda_file': None, 'image': None, 'build': <azure.ai.ml.entities._assets.environment.BuildContext object at 0x7f8268ef30d0>, 'infere

## 6. Build Pipeline

In [9]:
parent_dir = "."
finetune_model_func = load_component(source=parent_dir + "/finetune-model.yml")

In [10]:
@pipeline()
def finetune_model():
    inputs = {
        "train_file": Input(
            type=AssetTypes.URI_FILE, path="./data/custom-ft.train.jsonl" #"./data/raft_sample_data-ft.train.jsonl"
        ),
        "test_file": Input(
            type=AssetTypes.URI_FILE, path="./data/custom-ft.valid.jsonl" #"./data/raft_sample_data-ft.valid.jsonl"
        ),
        "base_model_id": "microsoft/Phi-3-mini-128k-instruct",
        "model_version": "phi3-mini-128K-instruct"
    }
    
    outputs = {
        "model_dir": Output(type=AssetTypes.URI_FOLDER)
    }

    train_model = finetune_model_func(
        train_file=inputs["train_file"],
        test_file=inputs["test_file"],
        base_model_id=inputs["base_model_id"],
        model_version=inputs["model_version"]
    )
    train_model.compute = gpu_compute_name
    
    return {"model_dir": train_model.outputs.model_dir}


pipeline_job = finetune_model()

# set pipeline level compute
pipeline_job.settings.default_compute = pipeline_level_compute_name

## 7. Submit Pipeline Job

In [11]:
# submit the pipeline job
pipeline_job = ml_client.jobs.create_or_update(
    pipeline_job, experiment_name="raft-phi3-finetuning"
)
pipeline_job

Class AutoDeleteSettingSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class AutoDeleteConditionSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class BaseAutoDeleteSettingSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class IntellectualPropertySchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class ProtectionLevelSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class BaseIntellectualPropertySchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
[32mUploading src (0.02 MBs): 100%|██

Experiment,Name,Type,Status,Details Page
raft-phi3-finetuning,polite_brick_7t7n59tl5x,pipeline,Preparing,Link to Azure Machine Learning studio


In [None]:
# Wait until the job completes
ml_client.jobs.stream(pipeline_job.name)

RunId: polite_brick_7t7n59tl5x
Web View: https://ml.azure.com/runs/polite_brick_7t7n59tl5x?wsid=/subscriptions/03fd01f6-6051-4545-a78e-ceaace399b96/resourcegroups/lianatests/workspaces/humpbackwhales-aml

Streaming logs/azureml/executionlogs.txt

[2024-09-19 21:11:45Z] Submitting 1 runs, first five are: 5cf535de:30f4af6f-5cc2-4eec-ba59-e4cb0cf94d46
