In [None]:
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install --no-deps "trl<0.9.0" xformers

In [1]:
import unsloth
from unsloth import FastLanguageModel
import torch

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


In [2]:
max_seq_length = 4096
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/Llama-3.2-3B-Instruct-bnb-4bit",
    max_seq_length=max_seq_length,
    dtype=None,
    load_in_4bit=True,
)


==((====))==  Unsloth 2025.9.6: Fast Llama patching. Transformers: 4.55.4.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.8.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.4.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.32.post2. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/2.24G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/234 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/454 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

chat_template.jinja: 0.00B [00:00, ?B/s]

In [3]:
model = FastLanguageModel.get_peft_model(
    model,
    r=32,
    target_modules=[
        "q_proj", "k_proj", "v_proj", "o_proj",
        "gate_proj", "up_proj", "down_proj",
    ],
    lora_alpha=32,
    lora_dropout=0,
    bias="none",
    use_gradient_checkpointing="unsloth",
    random_state=3407,
    use_rslora=False,
)

Unsloth 2025.9.6 patched 28 layers with 28 QKV layers, 28 O layers and 28 MLP layers.


In [4]:
import os
from google.colab import userdata

os.environ['HF_TOKEN'] = userdata.get('HF_TOKEN')

In [5]:
from datasets import load_dataset
dataset = load_dataset("HuggingFaceH4/no_robots", split="train[:3000]")

README.md: 0.00B [00:00, ?B/s]

data/train-00000-of-00001.parquet:   0%|          | 0.00/10.5M [00:00<?, ?B/s]

data/test-00000-of-00001.parquet:   0%|          | 0.00/571k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/9500 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/500 [00:00<?, ? examples/s]

In [6]:
dataset

Dataset({
    features: ['prompt', 'prompt_id', 'messages', 'category'],
    num_rows: 3000
})

In [7]:
def format_instruction_following(examples):
    """Format examples for instruction following task"""
    texts = []

    for i in range(len(examples["messages"])):
        messages = examples["messages"][i]

        # Apply Llama-3.2 chat template directly
        text = tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=False,
        )

        texts.append(text)

    return {"text": texts}

In [8]:
formatted_dataset = dataset.map(
    format_instruction_following,
    batched=True,
    remove_columns=dataset.column_names,
    desc="Formatting for instruction following"
)

Formatting for instruction following:   0%|          | 0/3000 [00:00<?, ? examples/s]

In [9]:
formatted_dataset

Dataset({
    features: ['text'],
    num_rows: 3000
})

In [10]:
from transformers import TrainingArguments
training_args = TrainingArguments(
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,  # Effective batch size: 8
    warmup_steps=10,
    max_steps=150,  # More steps for instruction mastery
    learning_rate=2e-4,
    fp16=not torch.cuda.is_bf16_supported(),
    bf16=torch.cuda.is_bf16_supported(),
    logging_steps=5,
    optim="adamw_8bit",
    weight_decay=0.01,
    lr_scheduler_type="cosine",
    seed=3407,
    output_dir="llama32_instruction_master",
    save_strategy="no",
)

In [11]:
from trl import SFTTrainer
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=formatted_dataset,
    args=training_args,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    packing=False
)

Map:   0%|          | 0/3000 [00:00<?, ? examples/s]

In [12]:
trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 3,000 | Num Epochs = 1 | Total steps = 150
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8
 "-____-"     Trainable parameters = 48,627,712 of 3,261,377,536 (1.49% trained)
[34m[1mwandb[0m: Currently logged in as: [33mabeshith[0m ([33mabeshith-dr-m-g-r-educational-and-research-institute[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
5,3.002
10,2.3777
15,2.2269
20,2.1572
25,2.0696
30,1.9458
35,2.0631
40,1.9568
45,1.8921
50,2.0442


TrainOutput(global_step=150, training_loss=1.9856915537516275, metrics={'train_runtime': 703.3462, 'train_samples_per_second': 1.706, 'train_steps_per_second': 0.213, 'total_flos': 8712679826509824.0, 'train_loss': 1.9856915537516275, 'epoch': 0.4})

In [13]:
model.save_pretrained("llama32_instruction_expert")
tokenizer.save_pretrained("llama32_instruction_expert")

('llama32_instruction_expert/tokenizer_config.json',
 'llama32_instruction_expert/special_tokens_map.json',
 'llama32_instruction_expert/chat_template.jinja',
 'llama32_instruction_expert/tokenizer.json')

In [None]:
FastLanguageModel.for_inference(model)

In [15]:
def follow_instruction(instruction, max_tokens=512):
    """Execute complex instruction following"""

    messages = [
        {"role": "user", "content": instruction}
    ]

    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True,
    )

    inputs = tokenizer([text], return_tensors="pt").to("cuda")

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=max_tokens,
            temperature=0.7,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id,
        )

    # Extract generated tokens and decode
    generated_tokens = outputs[0][len(inputs['input_ids'][0]):]
    response = tokenizer.decode(generated_tokens, skip_special_tokens=True)

    return response.strip()

In [16]:
multi_step_tasks = [
    """Create a weekly meal plan for a vegetarian family of 4. Include:
1. 7 breakfast options
2. 7 lunch options
3. 7 dinner options
4. A shopping list organized by food category
5. Estimated total cost""",

    """Plan a birthday party for a 10-year-old. Include:
1. Theme suggestions (3 options)
2. Decoration list
3. Activity schedule (3 hours)
4. Food menu suitable for kids
5. Budget breakdown""",

    """Help me organize my home office. Provide:
1. Desk organization system
2. Filing system for documents
3. Digital organization tips
4. Lighting and ergonomic suggestions
5. Productivity improvement strategies"""
]

print("📋 MULTI-STEP TASK INSTRUCTIONS:")
for i, task in enumerate(multi_step_tasks, 1):
    print(f"\n🎯 Task {i}:")
    print(f"Instruction: {task[:100]}...")
    response = follow_instruction(task, 600)
    print(f"Response: {response}")
    print("-" * 60)

📋 MULTI-STEP TASK INSTRUCTIONS:

🎯 Task 1:
Instruction: Create a weekly meal plan for a vegetarian family of 4. Include:
1. 7 breakfast options
2. 7 lunch o...
Response: Here is a weekly meal plan for a vegetarian family of four, with 7 options for breakfast, lunch, and dinner. There are 7 shopping lists for each category, and the estimated total cost is $40. This meal plan includes a variety of fruits, vegetables, grains, and proteins. 

Breakfast Options:

1. Oatmeal with fruit
2. Yogurt with granola and fruit
3. Scrambled eggs with toast and fruit
4. Avocado toast with scrambled eggs
5. Smoothies with yogurt, banana, and fruit
6. Pancakes with fruit and maple syrup
7. Toast with almond butter and banana

Lunch Options:

1. Veggie wrap with hummus and cucumber
2. Salad with vegetables and fruit
3. Quesadillas with cheese and vegetables
4. Soup with vegetables and bread
5. Salad with vegetables and fruit
6. Veggie burgers with lettuce and tomato
7. Rice bowls with vegetables and sauce

In [17]:
problem_solving = [
    """I'm having trouble sleeping. Analyze my situation and provide solutions:
- I go to bed at 11 PM but can't fall asleep until 2 AM
- I drink coffee at 3 PM daily
- I use my phone in bed
- I work night shifts twice a week
Provide a comprehensive sleep improvement plan.""",

    """My small business is losing customers. Help me analyze and solve this:
- Sales dropped 30% in 3 months
- Competition increased
- Online reviews mention slow service
- Staff seems unmotivated
Create an action plan to recover.""",

    """I want to learn programming but feel overwhelmed. Create a learning path:
- I'm a complete beginner
- I have 1 hour daily to study
- I want to build web applications
- I learn best with hands-on projects
Provide a 6-month structured plan."""
]

print("\n🧠 PROBLEM-SOLVING INSTRUCTIONS:")
for i, problem in enumerate(problem_solving, 1):
    print(f"\n💡 Problem {i}:")
    print(f"Instruction: {problem[:80]}...")
    response = follow_instruction(problem, 500)
    print(f"Response: {response}")
    print("-" * 60)


🧠 PROBLEM-SOLVING INSTRUCTIONS:

💡 Problem 1:
Instruction: I'm having trouble sleeping. Analyze my situation and provide solutions:
- I go ...
Response: It's good to know that you are aware of the issues that affect your sleep. Here are some solutions that can help:

1.  **Sleep in a Dark Room:** Use blackout curtains or blinds to ensure that your room is dark. This will help regulate your circadian rhythm and make it easier to fall asleep.
2.  **Avoid Coffee Before Bed:** Try to avoid consuming coffee after 3 PM. This can help you relax in the evening and reduce the risk of sleep disturbances.
3.  **Limit Screen Time Before Bed:** Try to avoid using your phone in bed. This can help you avoid the blue light that can interfere with your sleep.
4.  **Stick to a Schedule:** Try to stick to a consistent sleep schedule. This can help your body adjust to the time and make it easier to fall asleep at night.
5.  **Try a Relaxing Technique:** Try to find a relaxing technique to help you fall a

In [18]:
creative_analytical = [
    """Write a product description for a smart water bottle that:
1. Tracks hydration levels
2. Reminds you to drink water
3. Connects to your fitness app
4. Has temperature control
Make it compelling for health-conscious millennials.""",

    """Analyze the pros and cons of remote work vs office work. Consider:
- Productivity factors
- Social interaction
- Work-life balance
- Career development
- Company culture
Provide a balanced perspective.""",

    """Create a training program for someone who wants to run a 5K in 8 weeks:
- Currently sedentary lifestyle
- 30 years old, no health issues
- Can exercise 3 times per week
- Wants to avoid injury
Include weekly progression and tips."""
]

print("\n🎨 CREATIVE & ANALYTICAL INSTRUCTIONS:")
for i, instruction in enumerate(creative_analytical, 1):
    print(f"\n✨ Instruction {i}:")
    print(f"Task: {instruction[:80]}...")
    response = follow_instruction(instruction, 450)
    print(f"Response: {response}")
    print("-" * 60)


🎨 CREATIVE & ANALYTICAL INSTRUCTIONS:

✨ Instruction 1:
Task: Write a product description for a smart water bottle that:
1. Tracks hydration l...
Response: Stay hydrated and track your water intake with our smart water bottle. This bottle tracks your hydration levels, reminds you to drink water, connects to your fitness app, and even controls temperature. Get healthy and hydrated with our smart water bottle.
------------------------------------------------------------

✨ Instruction 2:
Task: Analyze the pros and cons of remote work vs office work. Consider:
- Productivit...
Response: Pros of Remote Work:
- Flexibility: Remote work offers flexibility in the work schedule, allowing employees to work at their own pace and set their own schedules.
- Reduced commuting time: Remote work saves employees time by eliminating the need to commute to work.
- Increased productivity: Remote work allows employees to work in a distraction-free environment, resulting in increased productivity.
- Reduc

In [19]:
educational = [
    """Explain quantum computing to a high school student. Include:
1. Basic concept in simple terms
2. How it differs from regular computers
3. Real-world applications
4. Current limitations
5. Future potential
Use analogies and examples they can relate to.""",

    """Teach me how to invest in stocks as a beginner:
1. Basic concepts I need to know
2. How to get started
3. Common mistakes to avoid
4. Risk management strategies
5. Recommended resources for learning
Make it practical and actionable.""",

    """Explain machine learning to someone with no technical background:
1. What it is in simple terms
2. Types of machine learning
3. Examples in daily life
4. How it's different from regular programming
5. Career opportunities
Use easy-to-understand analogies."""
]

print("\n📚 EDUCATIONAL INSTRUCTIONS:")
for i, lesson in enumerate(educational, 1):
    print(f"\n🎓 Lesson {i}:")
    print(f"Topic: {lesson[:80]}...")
    response = follow_instruction(lesson, 500)
    print(f"Response: {response}")
    print("-" * 60)



📚 EDUCATIONAL INSTRUCTIONS:

🎓 Lesson 1:
Topic: Explain quantum computing to a high school student. Include:
1. Basic concept in...
Response: Quantum computing is a new way of computing that uses the principles of quantum mechanics to perform calculations. Unlike regular computers, which use "bits" to store information (0s and 1s), quantum computers use "qubits" (quantum bits) that can be both 0 and 1 at the same time. This allows them to process information much faster than regular computers. Quantum computing is being used to solve complex problems that are difficult or impossible for regular computers to solve. For example, quantum computers are being used to develop new medicines by analyzing the structure of proteins and identifying patterns in DNA. They are also being used to optimize traffic flow and to simulate complex systems like weather patterns and financial markets. Quantum computers are still in the early stages of development, but they have the potential to revolutioniz

In [20]:
complex_reasoning = [
    """You have 3 boxes: one contains only apples, one contains only oranges, and one contains both fruits. All boxes are labeled incorrectly. You can pick one fruit from one box. How do you correctly label all boxes? Explain your reasoning step by step.""",

    """A company wants to reduce costs by 20% while maintaining quality. They're considering: laying off staff, reducing office space, switching suppliers, or automating processes. Analyze each option's pros/cons and recommend the best approach with justification.""",

    """If you were designing a city from scratch for 100,000 people, what factors would you consider? Prioritize the top 10 most important elements and explain why each is crucial for the city's success."""
]

print("\n🧩 COMPLEX REASONING INSTRUCTIONS:")
for i, reasoning_task in enumerate(complex_reasoning, 1):
    print(f"\n🤔 Reasoning Task {i}:")
    print(f"Challenge: {reasoning_task[:80]}...")
    response = follow_instruction(reasoning_task, 400)
    print(f"Response: {response}")
    print("-" * 60)


🧩 COMPLEX REASONING INSTRUCTIONS:

🤔 Reasoning Task 1:
Challenge: You have 3 boxes: one contains only apples, one contains only oranges, and one c...
Response: 1. Pick the apple from the box labeled "oranges". 
2. Pick the orange from the box labeled "apples". 
3. Pick the orange from the box labeled "oranges". 
4. Pick the apple from the box labeled "apples". 
5. Put the apple from box 1 in the box labeled "apples". 
6. Put the orange from box 2 in the box labeled "apples". 
7. Put the orange from box 3 in the box labeled "oranges".
------------------------------------------------------------

🤔 Reasoning Task 2:
Challenge: A company wants to reduce costs by 20% while maintaining quality. They're consid...
Response: Option 1: Lay off staff

Pros: Reduces labor costs and can help maintain quality
Cons: Can lead to high staff turnover and may not solve all cost issues

Option 2: Reduce office space

Pros: Saves space and can lead to cost savings
Cons: May not reduce labor costs or addr