In [1]:
# Import required libraries
import requests

# Base URL for the Llama Stack TRL provider API
# Make sure the server is running on this address and port
base_url = "http://127.0.0.1:8321"

# Headers for GET requests (retrieving data)
headers_get = {
    "accept": "application/json"
}

# Headers for POST requests (sending data)
headers_post = {
    "Content-Type": "application/json"
}

In [2]:
# Get the list of available providers
# This will show us what services are available (TRL for post-training, localfs for datasets, etc.)

url_providers = f"{base_url}/v1/providers"
response_providers = requests.get(url_providers, headers=headers_get)

# Display the providers and their configurations
# You should see 'trl' provider for post-training and 'localfs' for dataset storage
print(response_providers.json())

{'data': [{'api': 'post_training', 'provider_id': 'trl', 'provider_type': 'inline::trl', 'config': {'device': 'cuda', 'dpo_beta': 0.1, 'use_reference_model': True, 'max_seq_length': 2048, 'gradient_checkpointing': False, 'logging_steps': 10, 'warmup_ratio': 0.1, 'weight_decay': 0.01}, 'health': {'status': 'Not Implemented', 'message': 'Provider does not implement health check'}}, {'api': 'datasetio', 'provider_id': 'localfs', 'provider_type': 'inline::localfs', 'config': {'kvstore': {'type': 'sqlite', 'db_path': '/tmp/llama_stack_provider_trl/datasetio.db'}}, 'health': {'status': 'Not Implemented', 'message': 'Provider does not implement health check'}}]}


In [3]:
# List all available datasets in the system
# This will show existing datasets that can be used for training

url_datasets = f"{base_url}/v1/datasets"
response_datasets = requests.get(url_datasets, headers=headers_get)

# Display the datasets - each dataset should have a purpose (e.g., 'post-training/messages')
# and a source containing the training data
print(response_datasets.json())

{'data': [{'identifier': 'test-dpo-dataset-inline-large', 'provider_resource_id': 'test-dpo-dataset-inline-large', 'provider_id': 'localfs', 'type': 'dataset', 'purpose': 'post-training/messages', 'source': {'type': 'rows', 'rows': [{'prompt': 'What is machine learning?', 'chosen': 'Machine learning is a branch of artificial intelligence that enables computers to learn from data and improve their performance on specific tasks without being explicitly programmed. It uses algorithms to find patterns in data and make predictions or decisions.', 'rejected': 'Machine learning is just computers doing math stuff with data.'}, {'prompt': 'Write a hello world program', 'chosen': 'Here is a simple hello world program in Python:\n\n```python\nprint("Hello, World!")\n```', 'rejected': 'print hello world'}, {'prompt': 'Explain the concept of fine-tuning', 'chosen': 'Fine-tuning is the process of taking a pre-trained model and further training it on a specific dataset to adapt it for a particular ta

In [4]:
# Upload a DPO (Direct Preference Optimization) dataset
# This creates a preference dataset with example prompt-response pairs

url_upload_dataset = f"{base_url}/v1/datasets"

# Define the dataset payload with preference pairs
dataset_payload = {
    "dataset_id": "test-dpo-dataset-inline-large",
    "purpose": "post-training/messages",             
    "dataset_type": "preference",                    
    "source": {
        "type": "rows",                              
        "rows": [
            {

                "prompt": "What is machine learning?",
                "chosen": "Machine learning is a branch of artificial intelligence that enables computers to learn from data and improve their performance on specific tasks without being explicitly programmed. It uses algorithms to find patterns in data and make predictions or decisions.",
                "rejected": "Machine learning is just computers doing math stuff with data."
            },
            {

                "prompt": "Write a hello world program",
                "chosen": "Here is a simple hello world program in Python:\n\n```python\nprint(\"Hello, World!\")\n```",
                "rejected": "print hello world"
            },
            {
                "prompt": "Explain the concept of fine-tuning",
                "chosen": "Fine-tuning is the process of taking a pre-trained model and further training it on a specific dataset to adapt it for a particular task or domain while leveraging its existing knowledge. This approach is more efficient than training from scratch.",
                "rejected": "Fine-tuning means making a model better by training it more."
            }
        ]
    },
    "metadata": {
        "provider_id": "localfs",                    # Use local filesystem storage
        "description": "Inline DPO preference training dataset"
    }
}

# Send the POST request to upload the dataset
response_dataset = requests.post(url_upload_dataset, headers=headers_post, json=dataset_payload)
print("Dataset Upload Status:", response_dataset.status_code)
print("Dataset Upload Response:", response_dataset.json())

Dataset Upload Status: 200
Dataset Upload Response: {'identifier': 'test-dpo-dataset-inline-large', 'provider_resource_id': 'test-dpo-dataset-inline-large', 'provider_id': 'localfs', 'type': 'dataset', 'owner': {'principal': '', 'attributes': {}}, 'purpose': 'post-training/messages', 'source': {'type': 'rows', 'rows': [{'prompt': 'What is machine learning?', 'chosen': 'Machine learning is a branch of artificial intelligence that enables computers to learn from data and improve their performance on specific tasks without being explicitly programmed. It uses algorithms to find patterns in data and make predictions or decisions.', 'rejected': 'Machine learning is just computers doing math stuff with data.'}, {'prompt': 'Write a hello world program', 'chosen': 'Here is a simple hello world program in Python:\n\n```python\nprint("Hello, World!")\n```', 'rejected': 'print hello world'}, {'prompt': 'Explain the concept of fine-tuning', 'chosen': 'Fine-tuning is the process of taking a pre-tra

In [5]:
# Verify that our dataset was successfully uploaded
# This should now show our "test-dpo-dataset-inline-large" dataset

url_datasets = f"{base_url}/v1/datasets"
response_datasets = requests.get(url_datasets, headers=headers_get)

# The response should include our uploaded dataset with all the preference pairs
print(response_datasets.json())

{'data': [{'identifier': 'test-dpo-dataset-inline-large', 'provider_resource_id': 'test-dpo-dataset-inline-large', 'provider_id': 'localfs', 'type': 'dataset', 'purpose': 'post-training/messages', 'source': {'type': 'rows', 'rows': [{'prompt': 'What is machine learning?', 'chosen': 'Machine learning is a branch of artificial intelligence that enables computers to learn from data and improve their performance on specific tasks without being explicitly programmed. It uses algorithms to find patterns in data and make predictions or decisions.', 'rejected': 'Machine learning is just computers doing math stuff with data.'}, {'prompt': 'Write a hello world program', 'chosen': 'Here is a simple hello world program in Python:\n\n```python\nprint("Hello, World!")\n```', 'rejected': 'print hello world'}, {'prompt': 'Explain the concept of fine-tuning', 'chosen': 'Fine-tuning is the process of taking a pre-trained model and further training it on a specific dataset to adapt it for a particular ta

In [None]:
url_train_model = f"{base_url}/v1/post-training/preference-optimize"

train_model_data = {
    "job_uuid": "dpo-training-distilgpt2-demo",
    "finetuned_model": "distilgpt2",
    
    "algorithm_config": {
        "reward_scale": 0.0,            
        "reward_clip": 0.0,            
        "epsilon": 0.0,                 
        "gamma": 0.0,                 
        
        "beta": 0.1,                    
        "loss_type": "sigmoid",         
    },
    
    "training_config": {    
        "n_epochs": 3,                      
        "max_steps_per_epoch": 50,          
        "gradient_accumulation_steps": 1,   
        
        "optimizer_config": {
            "optimizer_type": "adamw",      
            "weight_decay": 0.01,           
            "num_warmup_steps": 0,          
            "lr": 5e-5,                     
            "warmup_ratio": 0.1,            
        },
        
        "data_config": {
            "data_format": "instruct",
            "dataset_id": "test-dpo-dataset-inline-large",  
            "batch_size": 2,                                
            "train_split_percentage": 0.9,                  
            "shuffle": True,                                
        }
    },
    
    "hyperparam_search_config": {},
    "logger_config": {}
}

# Make the training request
response_train_model = requests.post(url_train_model, headers=headers_post, json=train_model_data)
print("Train Model Status:", response_train_model.status_code)
print("Train Model Response:", response_train_model.json())


Train Model Status: 200
Train Model Response: {'job_uuid': 'dpo-training-distilgpt2-demo'}


In [7]:
# Get a list of all post-training jobs
# This will show all training jobs that have been submitted to the system

url_post_training_jobs = f"{base_url}/v1/post-training/jobs"
response_post_training_jobs = requests.get(url_post_training_jobs, headers=headers_get)

# Display all jobs with their current status and metadata
print(response_post_training_jobs.json())

{'data': [{'job_uuid': 'dpo-training-distilgpt2-demo'}]}


In [None]:
# Check the status of a specific training job
# Replace the job_uuid with the actual UUID from your training job

job_uuid = "dpo-training-distilgpt2-demo"  # The job UUID from the training request
url_job_status = f"{base_url}/v1/post-training/job/status?job_uuid={job_uuid}"

response_job_status = requests.get(url_job_status, headers=headers_get)

print("Job Status:", response_job_status.status_code)
# The response will include: status, scheduled_at, started_at, completed_at, checkpoints
print("Job Status Response:", response_job_status.json())

Job Status: 200
Job Status Response: {'job_uuid': 'dpo-training-distilgpt2', 'status': 'completed', 'scheduled_at': '2025-07-13T23:04:00.314874Z', 'started_at': '2025-07-13T23:04:00.315655Z', 'completed_at': '2025-07-13T23:04:11.332505Z', 'resources_allocated': {'initial': {'system_memory': {'total': '728.20', 'available': '720.48', 'used': '3.73', 'percent': 1.1}, 'device_memory': {'allocated': '0.00', 'reserved': '0.00', 'max_allocated': '0.00'}}, 'after_training': {'system_memory': {'total': '728.20', 'available': '718.50', 'used': '5.50', 'percent': 1.3}, 'device_memory': {'allocated': '1.26', 'reserved': '2.13', 'max_allocated': '1.88'}}, 'final': {'system_memory': {'total': '728.20', 'available': '718.28', 'used': '5.72', 'percent': 1.4}, 'device_memory': {'allocated': '0.34', 'reserved': '1.41', 'max_allocated': '1.88'}}}, 'checkpoints': [{'identifier': 'distilgpt2-dpo-3', 'created_at': '2025-07-13T23:04:11.027524Z', 'epoch': 3, 'post_training_job_id': 'dpo-training-distilgpt2',

In [10]:
# Retrieve artifacts (checkpoints, metrics) from a completed training job
# This will show available model checkpoints and their metadata

url_job_artifacts = f"{base_url}/v1/post-training/job/artifacts?job_uuid={job_uuid}"
response_job_artifacts = requests.get(url_job_artifacts, headers=headers_get)

print("Job Artifacts Status:", response_job_artifacts.status_code)
# The response will include checkpoint information: identifier, path, epoch, training_metrics
print("Job Artifacts Response:", response_job_artifacts.json())

Job Artifacts Status: 200
Job Artifacts Response: {'job_uuid': 'dpo-training-distilgpt2', 'checkpoints': [{'identifier': 'distilgpt2-dpo-3', 'created_at': '2025-07-13T23:04:11.027524Z', 'epoch': 3, 'post_training_job_id': 'dpo-training-distilgpt2', 'path': 'checkpoints/dpo-training-distilgpt2/dpo_model', 'training_metrics': None}]}
