# SFT Training - Qwen 0.5B on GSM8K

In [None]:
import torch
import os
import mlflow
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments
from trl import SFTTrainer
from datasets import load_dataset

os.environ["MLFLOW_TRACKING_URI"] = "http://mlflow:5000"
mlflow.set_experiment("sft_qwen_gsm8k")

In [None]:
# Load model and tokenizer
model_name = "Qwen/Qwen2.5-0.5B-Instruct"
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16)
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [None]:
# Load GSM8K dataset
dataset = load_dataset("openai/gsm8k", "main", split="train")
print(f"Dataset size: {len(dataset)}")
print(f"Example: {dataset[0]}")

In [None]:
def format_prompt(example):
    return f"Question: {example['question']}\nAnswer: {example['answer']}"

In [None]:
# Training arguments
training_args = TrainingArguments(
    output_dir="/app/models/sft_qwen",
    num_train_epochs=3,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    learning_rate=2e-5,
    logging_steps=10,
    save_strategy="epoch",
    bf16=True,
    report_to="mlflow",
    run_name="sft_qwen_gsm8k",
)

In [None]:
# Initialize trainer
trainer = SFTTrainer(
    model=model,
    args=training_args,
    train_dataset=dataset,
    formatting_func=format_prompt,
)

In [None]:
# Train
trainer.train()

In [None]:
# Save model
trainer.save_model("/app/models/sft_qwen")
print("Model saved to /app/models/sft_qwen")