In [1]:
#%pip install azureml-widgets
#%pip install mlflow
#%pip install azure-ai-ml

## 1. Import the Required Libraries

In [2]:
import os
import mlflow
from azure.ai.ml.constants import AssetTypes
from azure.ai.ml import MLClient, Input, Output
from azure.ai.ml.entities import Environment, BuildContext, AmlCompute, ComputeInstance
from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential
from azure.ai.ml.dsl import pipeline
from azure.ai.ml import load_component
from azure.core.exceptions import ResourceNotFoundError
from azure.storage.blob import BlobServiceClient
from dotenv import load_dotenv
from pathlib import Path

dotenv_path = Path("my.env")
load_dotenv(dotenv_path=dotenv_path)

True

## 2. Get a Handle to the Workspace

In [3]:
try:
    credential = DefaultAzureCredential()
    credential.get_token("https://management.azure.com/.default")
except Exception as ex:
    # Fall back to InteractiveBrowserCredential in case DefaultAzureCredential not work
    # This will open a browser page for
    credential = InteractiveBrowserCredential()

In [4]:
ml_client = MLClient.from_config(
    credential=credential
)

Found the config file in: /config.json


In [5]:
azureml_tracking_uri = ml_client.workspaces.get(
    ml_client.workspace_name
).mlflow_tracking_uri
mlflow.set_tracking_uri(azureml_tracking_uri)

## 3. Create the Compute Clusters

In [6]:
# Compute for scoring
compute_cluster = True
compute_version = "nc24ads-a100-v4" #"nc12s-v3"
compute_type = "Standard_NC24ads_A100_v4" #"Standard_NC12s_v3"

if compute_cluster:
    gpu_compute_name = f"{compute_version}-cluster"
    try:
        _ = ml_client.compute.get(gpu_compute_name)
        print("Found existing compute cluster.")
    except ResourceNotFoundError:
        print("Creating a new compute cluster...")
        compute_config = AmlCompute(
            name=gpu_compute_name,
            type="amlcompute",
            size=compute_type,
            idle_time_before_scale_down=120,
            min_instances=0,
            max_instances=1,
        )
        ml_client.begin_create_or_update(compute_config).result()
else:
    gpu_compute_name = f"{compute_version}-instance"
    try:
        _ = ml_client.compute.get(gpu_compute_name)
        print("Found existing compute instance.")
    except ResourceNotFoundError:
        print("Creating a new compute instance...")
        compute_config = ComputeInstance(
            name=gpu_compute_name,
            size=compute_type
        )
        ml_client.begin_create_or_update(compute_config).result()

Found existing compute cluster.


In [7]:
# Pipeline level compute
compute_cluster = True
compute_type = "Standard_DS3_v2" #"Standard_NC12s_v3"
pipeline_level_compute_name = "cpu-cluster"

if compute_cluster:
    try:
        _ = ml_client.compute.get(pipeline_level_compute_name)
        print("Found existing compute cluster.")
    except ResourceNotFoundError:
        print("Creating a new compute cluster...")
        compute_config = AmlCompute(
            name=pipeline_level_compute_name,
            type="amlcompute",
            size=compute_type,
            idle_time_before_scale_down=120,
            min_instances=0,
            max_instances=1,
        )
        ml_client.begin_create_or_update(compute_config).result()
else:
    try:
        _ = ml_client.compute.get(pipeline_level_compute_name)
        print("Found existing compute instance.")
    except ResourceNotFoundError:
        print("Creating a new compute instance...")
        compute_config = ComputeInstance(
            name=pipeline_level_compute_name,
            size=compute_type
        )
        ml_client.begin_create_or_update(compute_config).result()

Found existing compute cluster.


## 4. Create the Environment

In [8]:
env_docker_image = Environment(
    build=BuildContext(path="docker_image"),
    name="msft-raft-finetuning-env",
    description="Environment for SLM Fine-tuning",
)
"""
env_docker_image = Environment(
    image="pytorch/pytorch:latest",
    conda_file="docker_image/conda.yml",
    name="pytorch-raft-finetuning-env",
    description="Environment for SLM Fine-tuning",
)
"""
ml_client.environments.create_or_update(env_docker_image)

Environment({'intellectual_property': None, 'is_anonymous': False, 'auto_increment_version': False, 'auto_delete_setting': None, 'name': 'msft-raft-finetuning-env', 'description': 'Environment for SLM Fine-tuning', 'tags': {}, 'properties': {'azureml.labels': 'latest'}, 'print_as_yaml': True, 'id': '/subscriptions/03fd01f6-6051-4545-a78e-ceaace399b96/resourceGroups/lianatests/providers/Microsoft.MachineLearningServices/workspaces/humpbackwhales-aml/environments/msft-raft-finetuning-env/versions/84', 'Resource__source_path': None, 'base_path': '/mnt/batch/tasks/shared/LS_root/mounts/clusters/linapalk2/code/Users/linapalk/RAFT/raft_finetuning_pipeline', 'creation_context': <azure.ai.ml.entities._system_data.SystemData object at 0x7f85ae2ee3b0>, 'serialize': <msrest.serialization.Serializer object at 0x7f85ae2ee290>, 'version': '84', 'latest_version': None, 'conda_file': None, 'image': None, 'build': <azure.ai.ml.entities._assets.environment.BuildContext object at 0x7f85ae2ee110>, 'infere

## 6. Download Fine-tuned Model
Temporal workaround

In [9]:
model_folder = "phi3-mini-128K-instruct"

def is_folder_empty(folder_path):
    if os.path.exists(folder_path) and os.path.isdir(folder_path):
        if not os.listdir(folder_path):
            return True
        else:
            return False
    else:
        print(f"The path {folder_path} does not exist or is not a directory.")
        return False

local_dir = f"./models/{model_folder}"
if is_folder_empty(local_dir):
    blob_service_client = BlobServiceClient(
        account_url=os.getenv("STORAGE_ACCOUNT_URL"),
        credential=os.getenv("STORAGE_ACCOUNT_CREDENTIAL")
    )
    container_name = "azureml-blobstore-f4e80658-2a40-4426-8676-c5538afa6a8f"
    blob_prefix = "azureml/814db0e1-86d7-40a2-bb82-fe9f565e461c/model_dir/"

    # Create the local directory if not exists
    os.makedirs(local_dir, exist_ok=True)

    # List blobs and download them
    container_client = blob_service_client.get_container_client(container_name)
    blob_list = container_client.list_blobs(name_starts_with=blob_prefix)

    for blob in blob_list:
        blob_client = blob_service_client.get_blob_client(container_name, blob.name)
        local_file_path = os.path.join(local_dir, os.path.basename(blob.name))

        with open(local_file_path, "wb") as f:
            download_stream = blob_client.download_blob()
            f.write(download_stream.readall())
            print(f"Downloaded {blob.name} to {local_file_path}")

## 7. Build Pipeline

In [10]:
parent_dir = "."
batch_inference_model_func = load_component(source=parent_dir + "/score-model.yml")

In [11]:
@pipeline()
def score():
    inputs = {
        "base_model_id": "microsoft/Phi-3-mini-128k-instruct",
        "model_version": "phi3-mini-128K-instruct",
        "ground_truth_file": Input(
            type=AssetTypes.URI_FILE, path="./data/data_final.csv" #"./data/raft_sample_data-ft.train.jsonl"
        ),
        "model_dir": Input(
            type=AssetTypes.URI_FOLDER, path=f"./models/{model_folder}"
        ),
    }
    
    outputs = {
        "inference_result": Output(type=AssetTypes.URI_FOLDER)
    }

    score_model = batch_inference_model_func(
        base_model_id=inputs["base_model_id"],
        model_version=inputs["model_version"],
        ground_truth_file=inputs["ground_truth_file"],
        model_dir=inputs["model_dir"]
    )
    score_model.compute = gpu_compute_name
    
    return {"inference_result": score_model.outputs.inference_result}


pipeline_job = score()

pipeline_job.settings.default_compute = pipeline_level_compute_name

## 7. Submit Pipeline Job

In [12]:
# submit the pipeline job
pipeline_job = ml_client.jobs.create_or_update(
    pipeline_job, experiment_name="raft-phi3-batch-inference"
)
pipeline_job

Class AutoDeleteSettingSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class AutoDeleteConditionSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class BaseAutoDeleteSettingSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class IntellectualPropertySchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class ProtectionLevelSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class BaseIntellectualPropertySchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Your file exceeds 100 MB. If you exper

Experiment,Name,Type,Status,Details Page
raft-phi3-batch-evaluation,orange_dream_9x99g7yn9v,pipeline,Preparing,Link to Azure Machine Learning studio


In [None]:
# Wait until the job completes
ml_client.jobs.stream(pipeline_job.name)

RunId: orange_dream_9x99g7yn9v
Web View: https://ml.azure.com/runs/orange_dream_9x99g7yn9v?wsid=/subscriptions/03fd01f6-6051-4545-a78e-ceaace399b96/resourcegroups/lianatests/workspaces/humpbackwhales-aml

Streaming logs/azureml/executionlogs.txt

[2024-09-22 09:04:20Z] Submitting 1 runs, first five are: 6120d754:aa4da036-3e95-428f-aa9f-9c907c589e71
