# Automated DPO Pipeline with Django Integration
This notebook automates DPO (Direct Preference Optimization) training using feedback from your Django backend, including score and API response.

In [None]:
# Cell 1 — Install required libraries
!pip install -U transformers datasets trl accelerate peft bitsandbytes requests

Collecting trl
  Downloading trl-0.23.0-py3-none-any.whl.metadata (11 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.47.0-py3-none-manylinux_2_24_x86_64.whl.metadata (11 kB)
Collecting requests
  Downloading requests-2.32.5-py3-none-any.whl.metadata (4.9 kB)
Downloading trl-0.23.0-py3-none-any.whl (564 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m564.7/564.7 kB[0m [31m15.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading bitsandbytes-0.47.0-py3-none-manylinux_2_24_x86_64.whl (61.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.3/61.3 MB[0m [31m12.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading requests-2.32.5-py3-none-any.whl (64 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m64.7/64.7 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: requests, bitsandbytes, trl
  Attempting uninstall: requests
    Found existing installation: requests 2.32.4
    Uninstalling requests-2.32.4:
 

In [None]:
# Cell 2 — Setup API
USERNAME = "emotuna_user"
API_BASE = "https://c2b1e38a7d3a.ngrok-free.app"

In [None]:
# Cell 3 — Download SFT dataset
import requests

DATASET_URL = f'{API_BASE}/api/dataset/?username={USERNAME}'
response = requests.get(DATASET_URL)
with open('sft_dataset.jsonl', 'wb') as f:
    f.write(response.content)
print('Downloaded SFT dataset, status:', response.status_code)

Downloaded SFT dataset, status: 200


In [None]:
# Cell 4 — Load & preview dataset
import json, os, zipfile, shutil
from tqdm import tqdm
import pandas as pd

data = []
with open('sft_dataset.jsonl', 'r', encoding='utf-8') as f:
    for line in f:
        data.append(json.loads(line))

print('Loaded', len(data), 'examples')
print(data[0])  # Show the first example

df = pd.DataFrame(data)
print(df.head())

Loaded 12 examples
{'prompt': 'You are a complete mess', 'api_response': 'I’m sorry you feel that way—let me know what I can fix.', 'score': 60, 'completion': 'Noted with thanks. '}
                           prompt  \
0         You are a complete mess   
1  Have you taken your breakfast?   
2           Wanna grab lunch tgt?   
3           wanna grab lunch tgt?   
4           wanna grab lunch tgt?   

                                        api_response  score  \
0  I’m sorry you feel that way—let me know what I...     60   
1   Yep, just finished—fuelled up and ready to roll!     70   
2            Yes! I’m starving—where should we meet?     30   
3                  Already ate, but down for coffee!    100   
4  I'm not sure if you're aware of this, but I've...      0   

                                 completion  
0                       Noted with thanks.   
1  Yes, I have just completed my breakfast.  
2                            No. I am busy.  
3         Already ate, but down 

In [None]:
# Cell 5 — Copy dataset for DPO if not exists
if not os.path.exists('dpo_dataset.jsonl'):
    shutil.copy('sft_dataset.jsonl', 'dpo_dataset.jsonl')
    print('Copied sft_dataset.jsonl to dpo_dataset.jsonl')
else:
    print('dpo_dataset.jsonl already exists')

Copied sft_dataset.jsonl to dpo_dataset.jsonl


In [None]:
# Cell 6 — Train DPO
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer
from trl import DPOConfig, DPOTrainer

os.environ["WANDB_MODE"] = "offline"
os.environ["WANDB_DISABLED"] = "true"

# 1. Load model & tokenizer
model_name = "gpt2"
model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# 2. Load & preprocess dataset
dataset = load_dataset("json", data_files="dpo_dataset.jsonl", split="train")

def preprocess_dpo(example):
    if example['score'] >= 50:
        chosen, rejected = example['api_response'], example['completion']
    else:
        chosen, rejected = example['completion'], example['api_response']
    return {"query": example["prompt"], "chosen": chosen, "rejected": rejected}

dataset = dataset.map(preprocess_dpo)

# 3. Training setup
training_args = DPOConfig(output_dir="./gpt2-dpo")

trainer = DPOTrainer(
    model=model,
    args=training_args,
    train_dataset=dataset,
    processing_class=tokenizer
)

# 4. Train
trainer.train()
print("✅ DPO training complete!")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Generating train split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/12 [00:00<?, ? examples/s]

Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Extracting prompt in train dataset:   0%|          | 0/12 [00:00<?, ? examples/s]

Applying chat template to train dataset:   0%|          | 0/12 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/12 [00:00<?, ? examples/s]

The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'pad_token_id': 50256}.


Step,Training Loss


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


✅ DPO training complete!


In [None]:
# Cell 7 — Cleanup before zipping
CHECKPOINT_DIR = './gpt2-dpo'
REQUIRED_FILES = [
    'pytorch_model.bin', 'model.safetensors', 'config.json', 'tokenizer.json',
    'vocab.json', 'merges.txt', 'tokenizer_config.json',
    'special_tokens_map.json', 'generation_config.json'
]

for root, dirs, files in os.walk(CHECKPOINT_DIR):
    for file in files:
        if file not in REQUIRED_FILES:
            try:
                os.remove(os.path.join(root, file))
                print(f"Removed: {os.path.join(root, file)}")
            except Exception as e:
                print(f"Could not remove {file}: {e}")
    for d in dirs:
        try:
            shutil.rmtree(os.path.join(root, d))
            print(f"Removed directory: {os.path.join(root, d)}")
        except Exception as e:
            print(f"Could not remove directory {d}: {e}")


Model zipped successfully as dpo_model.zip


In [None]:
# Cell 8 — Zip the cleaned model folder
shutil.make_archive('dpo_model', 'zip', './gpt2-dpo')
print("✅ Cleaned and zipped successfully as dpo_model.zip")


In [None]:
# Cell 9 — Check zip size
zip_path = 'dpo_model.zip'
size_mb = os.path.getsize(zip_path) / (1024 * 1024)
print(f"ZIP file size: {size_mb:.2f} MB")


ZIP file size: 1320.46 MB


In [None]:
# Cell 10 — Split into chunks & upload
MODEL_UPLOAD_URL = f'{API_BASE}/api/model/'
zip_file_path = 'dpo_model.zip'
chunk_size = 100 * 1024 * 1024  # 100 MB

def split_file(file_path, chunk_size=chunk_size):
    chunks = []
    with open(file_path, "rb") as f:
        chunk_num = 0
        while True:
            chunk = f.read(chunk_size)
            if not chunk:
                break
            chunk_name = f"{file_path}.part{chunk_num}"
            with open(chunk_name, "wb") as chunk_file:
                chunk_file.write(chunk)
            chunks.append(chunk_name)
            chunk_num += 1
    return chunks

chunk_files = split_file(zip_file_path)
print(f"Created {len(chunk_files)} chunks")

for chunk_file in chunk_files:
    print(f"Uploading {chunk_file} ...")
    with open(chunk_file, 'rb') as f:
        files = {'file': f}
        data = {'username': USERNAME}
        try:
            r = requests.post(MODEL_UPLOAD_URL, files=files, data=data, verify=False)
            print(f"Upload status code for {chunk_file}: {r.status_code}")
            if r.status_code in [200, 201, 204]:
                print(f"{chunk_file} uploaded successfully")
            else:
                print(f"Failed to upload {chunk_file}")
        except Exception as e:
            print(f"Error uploading {chunk_file}: {e}")


Created 14 chunks
Uploading dpo_model.zip.part0 ...




Upload status code for dpo_model.zip.part0: 204
dpo_model.zip.part0 uploaded successfully
Uploading dpo_model.zip.part1 ...




Upload status code for dpo_model.zip.part1: 204
dpo_model.zip.part1 uploaded successfully
Uploading dpo_model.zip.part2 ...




Upload status code for dpo_model.zip.part2: 204
dpo_model.zip.part2 uploaded successfully
Uploading dpo_model.zip.part3 ...




Upload status code for dpo_model.zip.part3: 204
dpo_model.zip.part3 uploaded successfully
Uploading dpo_model.zip.part4 ...




Upload status code for dpo_model.zip.part4: 204
dpo_model.zip.part4 uploaded successfully
Uploading dpo_model.zip.part5 ...




Upload status code for dpo_model.zip.part5: 204
dpo_model.zip.part5 uploaded successfully
Uploading dpo_model.zip.part6 ...




Upload status code for dpo_model.zip.part6: 204
dpo_model.zip.part6 uploaded successfully
Uploading dpo_model.zip.part7 ...




Upload status code for dpo_model.zip.part7: 204
dpo_model.zip.part7 uploaded successfully
Uploading dpo_model.zip.part8 ...




Upload status code for dpo_model.zip.part8: 204
dpo_model.zip.part8 uploaded successfully
Uploading dpo_model.zip.part9 ...




Upload status code for dpo_model.zip.part9: 204
dpo_model.zip.part9 uploaded successfully
Uploading dpo_model.zip.part10 ...




Upload status code for dpo_model.zip.part10: 204
dpo_model.zip.part10 uploaded successfully
Uploading dpo_model.zip.part11 ...




Upload status code for dpo_model.zip.part11: 204
dpo_model.zip.part11 uploaded successfully
Uploading dpo_model.zip.part12 ...




Upload status code for dpo_model.zip.part12: 204
dpo_model.zip.part12 uploaded successfully
Uploading dpo_model.zip.part13 ...




Upload status code for dpo_model.zip.part13: 204
dpo_model.zip.part13 uploaded successfully


In [None]:
# Cell 11 — Trigger unzip on server
MODEL_UNZIP_URL = f'{API_BASE}/api/model/unzip/'
try:
    r = requests.post(MODEL_UNZIP_URL, data={'username': USERNAME}, verify=False)
    if r.status_code == 200:
        print('✅ Model unzipped successfully on server.')
    else:
        print(f'⚠️ Failed to unzip model on server. Status code: {r.status_code}, Response: {r.text}')
except Exception as e:
    print(f'Error sending unzip request: {e}')



✅ Model unzipped successfully on server.
