# Supervised Fine-Tuning (SFT) Training

This notebook implements the training pipeline for the Supervised Fine-Tuning (SFT) model based on the Qwen3 architecture. The goal is to fine-tune the model using human feedback data.

In [None]:
import torch
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer
from src.training.sft_trainer import SFTTrainer
from src.utils.config import load_config

# Load configuration
config = load_config('configs/sft_config.yaml')

# Load dataset
dataset = load_dataset(config['dataset_name'])

# Load model and tokenizer
model = AutoModelForCausalLM.from_pretrained(config['model_name'])
tokenizer = AutoTokenizer.from_pretrained(config['model_name'])

# Initialize the SFT Trainer
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset['train'],
    eval_dataset=dataset['validation'],
    args=config['training_args']
)

# Start training
trainer.train()

# Save the fine-tuned model
trainer.save_model(config['output_dir'])