# Automated DPO Pipeline with Django Integration
This notebook automates DPO (Direct Preference Optimization) training using feedback from your Django backend, including score and API response.

In [None]:
# Cell 1 — Install required libraries
!pip install -U transformers datasets trl accelerate peft bitsandbytes requests tqdm pandas



In [23]:
# Cell 2 — Setup API
USERNAME = "emotuna_user"
API_BASE = "https://73099cf3fccd.ngrok-free.app"

In [None]:
# Cell 3 — Download SFT dataset (convert JSON array to JSONL)
import requests, json

DATASET_URL = f'{API_BASE}/api/dataset/?username={USERNAME}'
response = requests.get(DATASET_URL)
if response.status_code == 200:
    data = response.json()  # This is a list of dicts (JSON array)
    with open('sft_dataset.jsonl', 'w', encoding='utf-8') as f:
        for item in data:
            f.write(json.dumps(item, ensure_ascii=False) + '\n')
    print('Downloaded and converted SFT dataset to JSONL, status:', response.status_code)
else:
    print('Failed to download dataset, status:', response.status_code)

Downloaded SFT dataset, status: 200


In [None]:
# Cell 4 — Load & preview dataset
import json, os, shutil
from tqdm import tqdm
import pandas as pd

data = []
with open('sft_dataset.jsonl', 'r', encoding='utf-8') as f:
    for line in f:
        data.append(json.loads(line))

print('Loaded', len(data), 'examples')
print(data[0])  # Show the first example

df = pd.DataFrame(data)
print(df.head())

Loaded 13 examples
{'user_message': 'You are a complete mess', 'api_response': 'I’m sorry you feel that way—let me know what I can fix.', 'score': '60', 'edited_reply': 'Noted with thanks.'}
                     user_message  \
0         You are a complete mess   
1  Have you taken your breakfast?   
2           Wanna grab lunch tgt?   
3           wanna grab lunch tgt?   
4           wanna grab lunch tgt?   

                                        api_response score  \
0  I’m sorry you feel that way—let me know what I...    60   
1   Yep, just finished—fuelled up and ready to roll!    70   
2            Yes! I’m starving—where should we meet?    30   
3                  Already ate, but down for coffee!   100   
4  I'm not sure if you're aware of this, but I've...     0   

                               edited_reply  
0                        Noted with thanks.  
1  Yes, I have just completed my breakfast.  
2                            No. I am busy.  
3         Already ate, but do

In [26]:
# Cell 5 — Copy dataset for DPO if not exists
if not os.path.exists('dpo_dataset.jsonl'):
    shutil.copy('sft_dataset.jsonl', 'dpo_dataset.jsonl')
    print('Copied sft_dataset.jsonl to dpo_dataset.jsonl')
else:
    print('dpo_dataset.jsonl already exists')

dpo_dataset.jsonl already exists


In [27]:
# Cell 6 — Train DPO (with updated dataset format)
import os
import torch
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer
from trl import DPOConfig, DPOTrainer

# ------------------------------
# 0. Disable WandB
# ------------------------------
os.environ["WANDB_MODE"] = "offline"

# ------------------------------
# 1. Load model & tokenizer
# ------------------------------
model_name = "gpt2"
model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Ensure pad token exists
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# ------------------------------
# 2. Load & preprocess dataset
# ------------------------------
dataset = load_dataset("json", data_files="dpo_dataset.jsonl", split="train")

def preprocess_dpo(example):
    return {
        "query": example["user_message"],
        "chosen": example["edited_reply"],   # always preferred
        "rejected": example["api_response"]  # less preferred
    }

dataset = dataset.map(preprocess_dpo)

# Ensure dataset outputs torch.long tensors
dataset.set_format(type="torch")

# ------------------------------
# 3. Define DPOConfig
# ------------------------------
training_args = DPOConfig(
    output_dir="./gpt2-dpo",
    per_device_train_batch_size=4,
    num_train_epochs=3,
    logging_steps=10,
    save_strategy="no",
    report_to="none",   # disables wandb/tensorboard
)

# ------------------------------
# 4. Initialize DPOTrainer
# ------------------------------
trainer = DPOTrainer(
    model=model,
    args=training_args,
    train_dataset=dataset,
    processing_class=tokenizer   # ✅ correct for your TRL version
)

# ------------------------------
# 5. Train
# ------------------------------
trainer.train()
print("✅ DPO training complete!")


The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'pad_token_id': 50256}.
  prompt_input_ids = [torch.tensor(example["prompt_input_ids"]) for example in examples]
  chosen_input_ids = [torch.tensor(example["chosen_input_ids"]) for example in examples]
  rejected_input_ids = [torch.tensor(example["rejected_input_ids"]) for example in examples]


Step,Training Loss
10,0.5934


✅ DPO training complete!


In [42]:
# Explicitly save final model
SAVE_DIR = "./gpt2-dpo-final"
trainer.model.save_pretrained(SAVE_DIR)
trainer.tokenizer.save_pretrained(SAVE_DIR)
print(f"✅ Model saved to {SAVE_DIR}")

Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


✅ Model saved to ./gpt2-dpo-final


In [None]:
# Cell 8 — Zip and verify model files
import shutil, os, zipfile

REQUIRED_FILES = [
    'pytorch_model.bin', 'model.safetensors', 'config.json', 'tokenizer.json',
    'vocab.json', 'merges.txt', 'tokenizer_config.json',
    'special_tokens_map.json', 'generation_config.json'
    # Add/remove as needed for your model
 ]

SAVE_DIR = "./gpt2-dpo-final"

# Remove any junk files
for file in os.listdir(SAVE_DIR):
    if file not in REQUIRED_FILES:
        os.remove(os.path.join(SAVE_DIR, file))

# Zip the directory
shutil.make_archive("dpo_model", "zip", SAVE_DIR)

# Verify contents
with zipfile.ZipFile("dpo_model.zip", "r") as zf:
    print("📦 Files in archive:")
    print(zf.namelist())

📦 Files in archive:
['vocab.json', 'model.safetensors', 'config.json', 'merges.txt', 'generation_config.json', 'special_tokens_map.json', 'tokenizer.json', 'tokenizer_config.json']


In [None]:
# Cell 9 — Check zip size
zip_path = 'dpo_model.zip'
size_mb = os.path.getsize(zip_path) / (1024 * 1024)
print(f"ZIP file size: {size_mb:.2f} MB")

ZIP file size: 442.41 MB


In [None]:
# Cell 10 — Upload model zip (single file, no chunking needed for DB-backed API)
import requests

MODEL_UPLOAD_URL = f'{API_BASE}/api/model/'
zip_file_path = 'dpo_model.zip'

with open(zip_file_path, 'rb') as f:
    files = {'file': f}
    data = {'username': USERNAME}
    try:
        r = requests.post(MODEL_UPLOAD_URL, files=files, data=data, verify=False)
        print(f"Upload status code: {r.status_code}")
        if r.status_code in [200, 201, 204]:
            print("Model zip uploaded successfully")
        else:
            print(f"Failed to upload model zip: {r.text}")
    except Exception as e:
        print(f"Error uploading model zip: {e}")

Created 5 chunks
Uploading dpo_model.zip.part0 ...




Upload status code for dpo_model.zip.part0: 204
dpo_model.zip.part0 uploaded successfully
Uploading dpo_model.zip.part1 ...




Upload status code for dpo_model.zip.part1: 204
dpo_model.zip.part1 uploaded successfully
Uploading dpo_model.zip.part2 ...




Upload status code for dpo_model.zip.part2: 204
dpo_model.zip.part2 uploaded successfully
Uploading dpo_model.zip.part3 ...




Upload status code for dpo_model.zip.part3: 204
dpo_model.zip.part3 uploaded successfully
Uploading dpo_model.zip.part4 ...




Upload status code for dpo_model.zip.part4: 204
dpo_model.zip.part4 uploaded successfully


In [None]:
# Cell 11 — Trigger unzip on server (DB-backed, no chunking needed)

MODEL_UNZIP_URL = f'{API_BASE}/api/model/unzip/'
try:
    r = requests.post(MODEL_UNZIP_URL, data={'username': USERNAME}, verify=False)
    if r.status_code == 200:
        print('✅ Model unzipped successfully on server.')
    else:
        print(f'⚠️ Failed to unzip model on server. Status code: {r.status_code}, Response: {r.text}')
except Exception as e:
    print(f'Error sending unzip request: {e}')



✅ Model unzipped successfully on server.
