In [1]:
### Import necessary libraries
import json
import pandas as pd
import os

### Step 1: Preparing the Dataset
# Define your dataset in JSONL format
data = [
    {
        "input": {
            "messages": [
                {
                    "role": "user",
                    "content": "Hello, can you tell me how cold San Francisco is today?"
                }
            ],
            "tools": [],
            "parallel_tool_calls": True
        },
        "preferred_output": [
            {
                "role": "assistant",
                "content": "Today in San Francisco, it is not quite cold as expected. Morning clouds will give away to sunshine, with a high near 68\u00b0F (20\u00b0C) and a low around 57\u00b0F (14\u00b0C)."
            }
        ],
        "non_preferred_output": [
            {
                "role": "assistant",
                "content": "It is not particularly cold in San Francisco today."
            }
        ]
    },
    {
        "input": {
            "messages": [
                {
                    "role": "user",
                    "content": "What is the capital of France?"
                }
            ],
            "tools": [],
            "parallel_tool_calls": True
        },
        "preferred_output": [
            {
                "role": "assistant",
                "content": "The capital of France is Paris."
            }
        ],
        "non_preferred_output": [
            {
                "role": "assistant",
                "content": "I think it might be Lyon or Paris."
            }
        ]
    }
]



In [2]:
# Save the dataset as a JSONL file
data_file = "dpo_dataset.jsonl"
with open(data_file, "w") as f:
    for example in data:
        f.write(json.dumps(example) + "\n")

print(f"Dataset saved to {data_file}")

Dataset saved to dpo_dataset.jsonl


In [None]:
### Step 2: Configuring and Submitting a DPO Fine-Tuning Job
# Import the OpenAI SDK (Install it if necessary: pip install openai)
import openai

# Set up your OpenAI API key
openai.api_key = "YOUR_API_KEY_HERE"

# Define the fine-tuning job
job_config = {
    "training_file": data_file,
    "model": "gpt-4o-2024-08-06",
    "method": {
        "type": "dpo",
        "dpo": {
            "hyperparameters": {
                "beta": 0.1
            }
        }
    }
}

In [None]:
# Submit the fine-tuning job
try:
    job = openai.FineTuningJob.create(**job_config)
    print("Fine-tuning job created successfully:", job)
except Exception as e:
    print("Error creating fine-tuning job:", e)


In [None]:
### Step 3: Monitoring the Fine-Tuning Job
# Check the status of your job (replace job['id'] with the actual job ID returned above)
job_id = job.get('id', 'job-id-placeholder')
if job_id != 'job-id-placeholder':
    try:
        job_status = openai.FineTuningJob.retrieve(job_id)
        print("Job Status:", job_status)
    except Exception as e:
        print("Error retrieving job status:", e)

In [None]:
### Step 4: Using the Fine-Tuned Model
# Once the job is complete, use the fine-tuned model for inference
fine_tuned_model = "model-id-placeholder"  # Replace with the fine-tuned model ID from the job status
if fine_tuned_model != 'model-id-placeholder':
    response = openai.ChatCompletion.create(
        model=fine_tuned_model,
        messages=[
            {"role": "user", "content": "Can you summarize the weather in San Francisco?"}
        ]
    )
    print("Response from fine-tuned model:", response["choices"][0]["message"]["content"])

## Tips
It is recommended to first perform Supervised Fine-Tuning (SFT) on preferred responses.

Afterwards, DPO is used to further refine the model by aligning it with preference comparisons.

#### Example Workflow:
1. Fine-tune the base model with SFT using a subset of preferred responses.
2. Use the SFT fine-tuned model as the starting point for DPO.