In [None]:
import os
import json
import requests
import random
from time import sleep, time
from openai import OpenAI
import asyncio

from config import *

In [None]:
# API Key for NVIDIA provider (required even for self-hosted services)
os.environ["NVIDIA_API_KEY"] = NDS_TOKEN

# Metadata associated with Datasets and Customization Jobs
os.environ["NVIDIA_DATASET_NAMESPACE"] = NMS_NAMESPACE
os.environ["NVIDIA_PROJECT_ID"] = PROJECT_ID

## Inference env vars
os.environ["NVIDIA_BASE_URL"] = NIM_URL

# Data Store env vars
os.environ["NVIDIA_DATASETS_URL"] = ENTITY_STORE_URL

## Customizer env vars
os.environ["NVIDIA_CUSTOMIZER_URL"] = CUSTOMIZER_URL
os.environ["NVIDIA_OUTPUT_MODEL_DIR"] = CUSTOMIZED_MODEL_DIR

# Evaluator env vars
os.environ["NVIDIA_EVALUATOR_URL"] = EVALUATOR_URL

# Guardrails env vars
os.environ["GUARDRAILS_SERVICE_URL"] = GUARDRAILS_URL


In [None]:
print(f"Data Store endpoint: {DATA_STORE_URL}")
print(f"Entity Store endpoint: {ENTITY_STORE_URL}")
print(f"Customizer endpoint: {CUSTOMIZER_URL}")
print(f"Evaluator endpoint: {EVALUATOR_URL}")
print(f"NIM endpoint: {NIM_URL}")
print(f"Namespace: {NMS_NAMESPACE}")
print(f"Base Model for Customization: {BASE_MODEL}")

In [None]:
from llama_stack.core.library_client import LlamaStackAsLibraryClient

client = LlamaStackAsLibraryClient("nvidia")
client.initialize()

In [None]:
from llama_stack.apis.common.job_types import JobStatus
from llama_stack.core.datatypes import Api
import asyncio

def wait_customization_job(job_id: str, polling_interval: int = 30, timeout: int = 5500):
    start_time = time.time()
    
    # Access post_training through impls
    post_training = client.async_client.impls[Api.post_training]
    
    # Get initial status using async
    loop = asyncio.get_event_loop()
    res = loop.run_until_complete(post_training.get_training_job_status(job_uuid=job_id))
    job_status = res.status

    print(f"Waiting for Customization job {job_id} to finish.")
    print(f"Job status: {job_status} after {time.time() - start_time} seconds.")

    while job_status in [JobStatus.scheduled.value, JobStatus.in_progress.value]:
        sleep(polling_interval)
        res = loop.run_until_complete(post_training.get_training_job_status(job_uuid=job_id))
        job_status = res.status

        print(f"Job status: {job_status} after {time.time() - start_time} seconds.")

        if time.time() - start_time > timeout:
            raise RuntimeError(f"Customization Job {job_id} took more than {timeout} seconds.")

    return job_status


# When creating a customized model, NIM asynchronously loads the model in its model registry.
# After this, we can run inference with the new model. This helper function waits for NIM to pick up the new model.
def wait_nim_loads_customized_model(model_id: str, polling_interval: int = 10, timeout: int = 300):
    found = False
    start_time = time.time()

    print(f"Checking if NIM has loaded customized model {model_id}.")

    while not found:
        sleep(polling_interval)

        res = requests.get(f"{NIM_URL}/v1/models")
        if model_id in [model["id"] for model in res.json()["data"]]:
            found = True
            print(f"Model {model_id} available after {time.time() - start_time} seconds.")
            break
        else:
            print(f"Model {model_id} not available after {time.time() - start_time} seconds.")

    if not found:
        raise RuntimeError(f"Model {model_id} not available after {timeout} seconds.")

    assert found, f"Could not find model {model_id} in the list of available models."


In [None]:
repo_id = f"{NMS_NAMESPACE}/{DATASET_NAME}"
print(repo_id)

In [None]:
response = client.datasets.register(
    purpose="post-training/messages",
    dataset_id=DATASET_NAME,
    source={
        "type": "uri",
        "uri": f"hf://datasets/{repo_id}"
    },
    metadata={
        "format": "json",
        "description": "Tool calling xLAM dataset in OpenAI ChatCompletions format",
        "provider_id": "nvidia"
    }
)
print(response)

In [None]:
res = requests.get(url=f"{ENTITY_STORE_URL}/v1/datasets/{NMS_NAMESPACE}/{DATASET_NAME}")
assert res.status_code in (200, 201), f"Status Code {res.status_code} Failed to fetch dataset {res.text}"
dataset_obj = res.json()

In [None]:
print("Files URL:", dataset_obj["files_url"])
assert dataset_obj["files_url"] == f"hf://datasets/{repo_id}"

In [None]:
from llama_stack.apis.post_training import LoraFinetuningConfig
import inspect

print("LoraFinetuningConfig signature:")
print(inspect.signature(LoraFinetuningConfig))

if hasattr(LoraFinetuningConfig, 'model_fields'):
    print("\nLoraFinetuningConfig fields:")
    for field_name, field_info in LoraFinetuningConfig.model_fields.items():
        required = field_info.is_required()
        print(f"  {field_name}: required={required}, default={field_info.default}")


In [None]:
import time
from llama_stack.core.datatypes import Api
from llama_stack.apis.post_training import (
    TrainingConfig,
    DataConfig,
    OptimizerConfig,
    LoraFinetuningConfig,
    DatasetFormat,
    OptimizerType,
)

unique_suffix = int(time.time())

# Access post_training through impls
post_training = client.async_client.impls[Api.post_training]

# Create proper config objects with all required fields
data_config = DataConfig(
    batch_size=16,
    dataset_id=DATASET_NAME,
    shuffle=True,
    data_format=DatasetFormat.instruct
)

optimizer_config = OptimizerConfig(
    optimizer_type=OptimizerType.adamw,
    lr=0.0001,
    weight_decay=0.01,
    num_warmup_steps=100
)

training_config = TrainingConfig(
    n_epochs=2,
    data_config=data_config,
    optimizer_config=optimizer_config
)

# LoRA configuration with correct fields
algorithm_config = LoraFinetuningConfig(
    lora_attn_modules=[],
    apply_lora_to_mlp=True,
    apply_lora_to_output=False,
    rank=8,
    alpha=16,
    use_dora=False,
    quantize_base=False
)

# Convert to dict to work around the bug
training_config_dict = training_config.model_dump()

# Now call the supervised_fine_tune method with dict
res = await post_training.supervised_fine_tune(
    job_uuid=f"finetune-{unique_suffix}",
    model="meta/llama-3.2-1b-instruct@v1.0.0",
    training_config=training_config_dict,  # Pass as dict
    algorithm_config=algorithm_config,
    hyperparam_search_config=None,
    logger_config=None,
    checkpoint_dir="",
)
print(res)

In [None]:
job = res.model_dump()

# To job track status
JOB_ID = job["id"]

# This will be the name of the model that will be used to send inference queries to
CUSTOMIZED_MODEL = job["output_model"]
print(JOB_ID)
print(CUSTOMIZED_MODEL)

In [None]:
job_status = wait_customization_job(job_id=JOB_ID)

In [None]:
response = requests.get(f"{ENTITY_STORE_URL}/v1/models", params={"filter[namespace]": NMS_NAMESPACE, "sort" : "-created_at"})

assert response.status_code == 200, f"Status Code {response.status_code}: Request failed. Response: {response.text}"
print("Response JSON:", json.dumps(response.json(), indent=4))

In [None]:
wait_nim_loads_customized_model(model_id=CUSTOMIZED_MODEL)

In [None]:
# Verify the model is in NIM
resp = requests.get(f"{NIM_URL}/v1/models")
models = resp.json().get("data", [])
model_names = [model["id"] for model in models]

print("Available models in NIM:")
for name in model_names:
    print(f"  - {name}")

assert CUSTOMIZED_MODEL in model_names, f"Model {CUSTOMIZED_MODEL} not found"

In [None]:
from llama_stack.core.datatypes import Api

# Get the NVIDIA inference provider directly
inference_router = client.async_client.impls[Api.inference]
nvidia_provider = inference_router.routing_table.impls_by_provider_id.get("nvidia")

if nvidia_provider:
    # Get fresh list of models from NVIDIA provider
    models_from_provider = await nvidia_provider.list_models()
    print("Models from NVIDIA provider:")
    for model in models_from_provider:
        print(f"  - {model.provider_resource_id}")
    
    # Now update the routing table with these models
    models_routing_table = client.async_client.impls[Api.models]
    await models_routing_table.update_registered_models(
        provider_id="nvidia",
        models=models_from_provider
    )
    print("\nRouting table updated!")
    
else:
    print("NVIDIA provider not found")


In [None]:
# Processed data will be stored here
DATA_ROOT = os.path.join(os.getcwd(), "sample_data")
CUSTOMIZATION_DATA_ROOT = os.path.join(DATA_ROOT, "customization")
VALIDATION_DATA_ROOT = os.path.join(DATA_ROOT, "validation")
EVALUATION_DATA_ROOT = os.path.join(DATA_ROOT, "evaluation")

os.makedirs(DATA_ROOT, exist_ok=True)
os.makedirs(CUSTOMIZATION_DATA_ROOT, exist_ok=True)
os.makedirs(VALIDATION_DATA_ROOT, exist_ok=True)
os.makedirs(EVALUATION_DATA_ROOT, exist_ok=True)

In [None]:
train_fp = f"{CUSTOMIZATION_DATA_ROOT}/training.jsonl"
assert os.path.exists(train_fp), f"The training data at '{train_fp}' does not exist. Please ensure that the data was prepared successfully."

val_fp = f"{VALIDATION_DATA_ROOT}/validation.jsonl"
assert os.path.exists(val_fp), f"The validation data at '{val_fp}' does not exist. Please ensure that the data was prepared successfully."

test_fp = f"{EVALUATION_DATA_ROOT}/xlam-test-single.jsonl"
assert os.path.exists(test_fp), f"The test data at '{test_fp}' does not exist. Please ensure that the data was prepared successfully."

In [None]:
def read_jsonl(file_path):
    """Reads a JSON Lines file and yields parsed JSON objects"""
    with open(file_path, 'r', encoding='utf-8') as file:
        for line in file:
            line = line.strip()  # Remove leading/trailing whitespace
            if not line:
                continue  # Skip empty lines
            try:
                yield json.loads(line)
            except json.JSONDecodeError as e:
                print(f"Error decoding JSON: {e}")
                continue


test_data = list(read_jsonl(test_fp))

print(f"There are {len(test_data)} examples in the test set")

In [None]:
 # Randomly choose
test_sample = random.choice(test_data)

# Transform tools to format expected by Llama Stack client
for i, tool in enumerate(test_sample['tools']):
    # Extract properties we will map to the expected format
    tool = tool.get('function', {})
    tool_name = tool.get('name')
    tool_description = tool.get('description')
    tool_params = tool.get('parameters', {})
    tool_params_properties = tool_params.get('properties', {})

    # Create object of parameters for this tool
    transformed_parameters = {}
    for name, property in tool_params_properties.items():
        transformed_param = {
            'param_type': property.get('type'),
            'description': property.get('description')
        }
        if 'default' in property:
            transformed_param['default'] = property['default']
        if 'required' in property:
            transformed_param['required'] = property['required']

        transformed_parameters[name] = transformed_param

    # Update this tool in-place using the expected format
    test_sample['tools'][i] = {
        'tool_name': tool_name,
        'description': tool_description,
        'parameters': transformed_parameters
    }

# Visualize the inputs to the LLM - user query and available tools
test_sample['messages']
test_sample['tools']


In [None]:
from llama_stack.core.datatypes import Api

# Use the registered model ID
REGISTERED_MODEL_ID = "nvidia/nvidia-tool-calling-tutorial/test-llama-stack@v1"

# Transform tools back to OpenAI format
openai_tools = []
for tool in test_sample['tools']:
    # Check if it's already in OpenAI format (has 'function' key)
    if 'function' in tool:
        openai_tools.append(tool)
    else:
        # Convert from Llama Stack format to OpenAI format
        openai_tool = {
            "type": "function",
            "function": {
                "name": tool.get('tool_name'),
                "description": tool.get('description'),
                "parameters": {
                    "type": "object",
                    "properties": {},
                    "required": []
                }
            }
        }
        # Convert parameters
        for param_name, param_info in tool.get('parameters', {}).items():
            openai_tool["function"]["parameters"]["properties"][param_name] = {
                "type": param_info.get('param_type'),
                "description": param_info.get('description', '')
            }
            if param_info.get('default') is not None:
                openai_tool["function"]["parameters"]["properties"][param_name]["default"] = param_info['default']
            if param_info.get('required', False):
                openai_tool["function"]["parameters"]["required"].append(param_name)
        
        openai_tools.append(openai_tool)

print("OpenAI formatted tools:")
print(openai_tools[0] if openai_tools else "No tools")

In [None]:
from llama_stack.core.datatypes import Api
from llama_stack.apis.inference import OpenAIChatCompletionRequestWithExtraBody

# Access inference through impls
inference = client.async_client.impls[Api.inference]

# Create request with OpenAI-formatted tools
request = OpenAIChatCompletionRequestWithExtraBody(
    model=REGISTERED_MODEL_ID,
    messages=test_sample["messages"],
    tools=openai_tools,  # Use the converted tools
    tool_choice="auto",
    stream=False,
    max_tokens=512,
    temperature=0.1,
    top_p=0.7,
)

# Make the chat completion call
completion = await inference.openai_chat_completion(params=request)

print("Tool calls from model:")
if hasattr(completion, 'choices') and len(completion.choices) > 0:
    print(completion.choices[0].message.tool_calls)
else:
    print(completion)


In [None]:
test_sample['tool_calls']

In [None]:
print(f"Name of your custom model is: {CUSTOMIZED_MODEL}")