In [None]:
# ==========================================================
# Fine-Tuning DeepSeek-R1 with Unsloth and Hugging Face Dataset
# ==========================================================

# This script fine-tunes DeepSeek-R1 using Unsloth.
# - Loads the "tatsu-lab/alpaca" dataset from Hugging Face
# - Applies LoRA fine-tuning
# - Trains the model efficiently

In [None]:
# ==========================================================
# Step 1: Install Dependencies
# ==========================================================
!pip install --upgrade --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git
!pip install datasets transformers accelerate

# Import necessary libraries
from unsloth import FastLanguageModel
import torch
from transformers import Trainer, TrainingArguments, DataCollatorForSeq2Seq
from datasets import load_dataset

In [None]:
# ==========================================================
# Step 2: Load Pre-trained DeepSeek-R1 Model
# ==========================================================
# Load the model with optimized settings
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/DeepSeek-R1-Distill-Llama-8B-unsloth-bnb-4bit",
    max_seq_length=2048, # Define max sequence length
    dtype=None, # Automatically selects optimal dtype
    load_in_4bit=True, # Enables 4-bit quantization
)

In [None]:
# ==========================================================
# Step 3: Apply LoRA for Efficient Fine-Tuning
# ==========================================================
model = FastLanguageModel.get_peft_model(
    model,
    r=4, # LoRA rank (4 means lightweight fine-tuning)
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], # Layers where LoRA is applied
    lora_alpha=16, # Scaling factor for LoRA
    lora_dropout=0, # No dropout for stability
    bias="none", # No additional bias terms
    use_gradient_checkpointing="unsloth", # Enable memory optimization
    random_state=42, # Ensures reproducibility
    use_rslora=False, # Disable Rank-Structured LoRA
    loftq_config=None # No LoFTQ quantization
)

In [None]:
# ==========================================================
# Step 4: Load and Preprocess the Dataset (Hugging Face)
# ==========================================================
# Load the "tatsu-lab/alpaca" dataset (Instruction-following dataset)
dataset = load_dataset("tatsu-lab/alpaca")

# Define a preprocessing function to tokenize inputs
def preprocess_function(examples):
    inputs = [f"Instruction: {q} Response: {a}" for q, a in zip(examples["instruction"], examples["output"])]
    model_inputs = tokenizer(inputs, max_length=2048, truncation=True, padding="max_length")
    return model_inputs

# Apply the preprocessing function to the dataset
tokenized_datasets = dataset.map(preprocess_function, batched=True)

# Use a data collator to handle padding dynamically
data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)

In [None]:
# ==========================================================
# Step 5: Define Training Arguments
# ==========================================================
training_args = TrainingArguments(
    output_dir="./results", # Directory to save checkpoints
    per_device_train_batch_size=4, # Adjust based on GPU memory
    num_train_epochs=3, # Set the number of fine-tuning epochs
    logging_dir="./logs", # Logging directory
    logging_steps=10, # Log progress every 10 steps
    save_total_limit=2, # Keep only the last 2 model checkpoints
    save_steps=500, # Save checkpoint every 500 steps
    report_to="none", # Disable reporting to WandB or other loggers
)

In [None]:
# ==========================================================
# Step 6: Train the Model
# ==========================================================
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"], # Using the processed dataset
    data_collator=data_collator, # Ensure proper padding
)

# Start training
trainer.train()

In [None]:
# ==========================================================
# Step 7: Save the Fine-Tuned Model
# ==========================================================
model.save_pretrained("fine_tuned_deepseek")
tokenizer.save_pretrained("fine_tuned_deepseek")

print("✅ Fine-tuned model saved successfully!")