### Importing the required modules

In [None]:
import sys
import torch
import pandas as pd
from pathlib import Path
from typing import Optional
from trl import DPOTrainer, DPOConfig
from transformers import AutoTokenizer, AutoModelForCausalLM

# Add the parent directory to the system path
sys.path.append(str(Path().resolve().parent.parent))

# Import local dependencies
from src.utils import get_device, set_seed
from src.data_processing import generate_response
from src.hf import hf_login, load_hf_dataset, dataset_to_pandas

### Setting up the environment

In [None]:
# Login to Hugging Face
hf_login()

In [None]:
# Get the device available on the system
device = get_device()
use_cuda = torch.cuda.is_available() and "cuda" in str(device).lower()

# Print the detected device
print(f"Detected device: {device}")

### Constants, hyperparameters and model configurations

In [None]:
seed = 42 # Seed for reproducibility
test_size = 0.2 # Train-test split percentage
model_id = "Qwen/Qwen2.5-0.5B-Instruct" # The model ID
dataset_name = "banghua/DL-DPO-Dataset" # The dataset name on Hugging Face Hub
model_path = Path().resolve().parent.parent / "saved_models" / f"{model_id.split('/')[-1]}_preference_optimization" # Path to save the trained model to

In [None]:
# Set the seed for reproducibility
set_seed(seed)

### Data loading

In [None]:
# Load the dataset from Hugging Face Hub
dataset = load_hf_dataset(dataset_name, split="train")

In [None]:
# Convert the dataset to a pandas DataFrame for easier manipulation
dataset_df = dataset_to_pandas(dataset)

# Set pandas display options for better readability
pd.set_option("display.max_colwidth", None) 
pd.set_option("display.max_columns", None)
pd.set_option("display.width", 0)         

# Display the first few rows of the dataset
display(dataset_df.head())  

### Preprocess data

In [None]:
# Train-test split
train_dataset, test_dataset = dataset.train_test_split(test_size=test_size, seed=seed).values()

### Tokenizer

In [None]:
# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_id)

# Set the padding token if not already set
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

### Building the model

In [None]:
# Load the model
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    low_cpu_mem_usage = True,
    device_map = "auto"
)

### Training the model

In [None]:
# Mixed precision settings
use_pin_memory = bool(use_cuda)
bf16 = bool(use_cuda and torch.cuda.is_bf16_supported())

# Define the DPO training configuration
config = DPOConfig(
    beta = 0.2, 
    per_device_train_batch_size = 1,
    per_device_eval_batch_size = 1,
    gradient_accumulation_steps = 8,
    eval_strategy = "steps",
    num_train_epochs = 1,
    learning_rate = 5e-5,
    logging_steps = 10,
    bf16 = bf16,
    dataloader_pin_memory = use_pin_memory,
    weight_decay = 0.01,
    lr_scheduler_type = "cosine"
)

In [None]:
# Initialize the SFTTrainer
dpo_trainer = DPOTrainer(
    model = model,
    ref_model = None,
    args = config,    
    processing_class = tokenizer,  
    train_dataset = train_dataset,
    eval_dataset = test_dataset
)

# Start the training process
dpo_trainer.train()

### Inference

In [None]:
def test_model_with_questions(model, tokenizer, questions: list[str], system_message: Optional[str] = None):
    # Iterate through each question and generate a response
    for i, question in enumerate(questions, 1):
        # Generate the response
        response = generate_response(
            model = model, 
            tokenizer = tokenizer, 
            user_message = question, 
            system_message = system_message, 
            max_new_tokens = 100
        )

        # Print the input question and the model's response
        print(f"\nModel Input {i}:\n{question}\nModel Output {i}:\n{response}\n")

In [None]:
# Define a list of questions to test the model
questions = [
    "What is your name?",
    "Are you ChatGPT?",
    "Tell me about your name and organization."
]

# Test the fine-tuned model with the defined questions
test_model_with_questions(model, tokenizer, questions)