In [None]:
# Prequisites

%pip install --upgrade azure-ai-ml
%pip install --upgrade azure-identity
%pip install --upgrade datasets==2.9.0
%pip install py7zr

In [None]:
from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential
from azure.ai.ml.entities import AmlCompute
import time

try:
    credential = DefaultAzureCredential()
    credential.get_token("https://management.azure.com/.default")
except Exception as ex:
    credential = InteractiveBrowserCredential()

workspace_ml_client = None
try:
    workspace_ml_client = MLClient.from_config(credential)
    subscription_id = workspace_ml_client.subscription_id
    workspace = workspace_ml_client.workspace_name
    resource_group = workspace_ml_client.resource_group_name
except Exception as ex:
    print(ex)
    # Enter details of your AML workspace
    subscription_id = "<SUBSCRIPTION_ID>"
    resource_group = "<RESOURCE_GROUP>"
    workspace = "<AML_WORKSPACE_NAME>"
    workspace_ml_client = MLClient(
        credential, subscription_id, resource_group, workspace
    )

# replace with the registry name
nemo_registry = "nvidia-ai"

nemo_registry_ml_client = MLClient(
    credential, subscription_id, resource_group, registry_name=nemo_registry
)

nemo_registry_ml_client

## Creating Computes

In [None]:
# If you already have a gpu cluster, mention it here. Else will create a new one with the name 'ghyadav-westus-a100'
# model will only run on an a100 instance

compute_cluster = "do-not-delete-nvidia-testing"

try:
    compute = workspace_ml_client.compute.get(compute_cluster)
    print(f"GPU compute '{compute_cluster}' found.")
except Exception as ex:
    print(f"GPU compute '{compute_cluster}' not found. Creating new one.")
    compute = AmlCompute(
        name=compute_cluster,
        size="STANDARD_ND96AMSR_A100_V4",
        max_instances=2,  # For multi node training set this to an integer value more than 1
    )
    workspace_ml_client.compute.begin_create_or_update(compute).wait()

# generating a unique timestamp that can be used for names and versions that need to be unique
timestamp = str(int(time.time()))


# This is the number of GPUs in a single node of the selected 'vm_size' compute.
# Setting this to less than the number of GPUs will result in underutilized GPUs, taking longer to train.
# Setting this to more than the number of GPUs will result in an error.

gpus_per_node = 1  # default value
gpu_count_found = False
ws_computes = workspace_ml_client.compute.list_sizes()
for ws_compute in ws_computes:
    if ws_compute.name.lower() == compute.size.lower():
        gpus_per_node = ws_compute.gpus
        print(f"Number of GPUs in compute {ws_compute.name} are {ws_compute.gpus}")
# if gpu_count_found not found, then print an error
if gpus_per_node > 0:
    gpu_count_found = True
else:
    gpu_count_found = False
    print(f"No GPUs found in compute. Number of GPUs in compute {compute.size} 0.")

## Input Data for Evaluation

In [None]:
# download the dataset using the helper script. This needs datasets library: https://pypi.org/project/datasets/
import os
from datasets import load_dataset, get_dataset_split_names

dataset_dir = "samsum-dataset"
dataset_name = "samsum"
# create the download directory if it does not exist
if not os.path.exists(dataset_dir):
    os.makedirs(dataset_dir)


# import hugging face datasets library

split = "test"
dataset = load_dataset(dataset_name, split=split)
# save the split of the dataset to the download directory as json lines file
dataset.to_json(os.path.join(dataset_dir, f"{split}.jsonl"))
# print dataset features

In [None]:
import pandas as pd

evaluation_dataset = "./samsum-dataset/test.jsonl"
pd.set_option(
    "display.max_colwidth", 0
)  # set the max column width to 0 to display the full text
df = pd.read_json("./samsum-dataset/test.jsonl", lines=True)
df = df[["dialogue"]]
df

In [None]:
frac = 0.01
evaluation_dataset_frac = "./samsum-dataset/test/test_frac.jsonl"
os.makedirs("./samsum-dataset/test", exist_ok=True)
df.sample(frac=frac).to_json(evaluation_dataset_frac, orient="records", lines=True)

## Loading model from Registry

In [None]:
model_name = "Nemotron-3-8B-Base-4k"
model_version = "latest"

nemo_model_object = nemo_registry_ml_client.models.get(model_name, label="latest")

In [None]:
nemo_model_object

# Prompt

In [None]:
prompt = "Summarize this dialog:\n{prompt}\n---\nSummary:\n"
prompt_path = "prompt.txt"
with open(prompt_path, "w") as f:
    f.write(prompt)

## Submitting Evaluation Pipeline

In [None]:
from azure.ai.ml.dsl import pipeline
from azure.ai.ml import Input
from azure.ai.ml.constants import AssetTypes

# fetch the pipeline component
pipeline_component_func = nemo_registry_ml_client.components.get(
    name="nemo_text_generation_evaluation", label="latest"
)


# define the pipeline job
@pipeline()
def evaluation_pipeline(mlflow_model):
    evaluation_job = pipeline_component_func(
        dataset_path=Input(type=AssetTypes.URI_FOLDER, path="./samsum-dataset/test/"),
        model_path=Input(type=AssetTypes.TRITON_MODEL, path=f"{nemo_model_object.id}"),
        prompt_schema=Input(type=AssetTypes.URI_FILE, path=prompt_path),
    )
    return {"evaluation_result": evaluation_job.outputs.evaluation_result}

In [None]:
experiment_name = "nemo-text-gen-eval-pipeline"
pipeline_jobs = []

pipeline_object = evaluation_pipeline()

# don't reuse cached results from previous jobs
pipeline_object.settings.force_rerun = True
pipeline_object.settings.default_compute = compute_cluster
pipeline_job = workspace_ml_client.jobs.create_or_update(
    pipeline_object, experiment_name=experiment_name
)
# add model['name'] and pipeline_job.name as key value pairs to a dictionary
pipeline_jobs.append({"model_name": model_name, "job_name": pipeline_job.name})
# wait for the pipeline job to complete
workspace_ml_client.jobs.stream(pipeline_job.name)