In [None]:

# Colab setup — installs and GPU checks
# Run this cell first in Google Colab. It installs required packages and checks CUDA/GPU.
# Notes:
# - bitsandbytes requires a CUDA-capable GPU and compatible driver. Use a Colab Pro/GPU runtime if possible.
# - If you see errors installing bitsandbytes, try switching to a different runtime (Runtime → Change runtime type → GPU).
# - This cell will fallback to CPU but QLoRA/8-bit fine-tuning will be much slower on CPU.

# Install required libraries (transformers, accelerate, bitsandbytes, peft, datasets)
# Versions chosen to be broadly compatible; adjust if your notebook later pins versions.
!pip install -q --upgrade pip
!pip install -q torch --index-url https://download.pytorch.org/whl/cu118
!pip install -q transformers accelerate datasets peft safetensors bitsandbytes==0.39.0 trl

# Check CUDA and GPU availability
import torch, sys, subprocess, os
print("torch version:", torch.__version__)
print("cuda available:", torch.cuda.is_available())
try:
    gpu_name = !nvidia-smi --query-gpu=name --format=csv,noheader
    print("nvidia-smi:", gpu_name[0] if len(gpu_name)>0 else "nvidia-smi returned no output")
except Exception as e:
    print("nvidia-smi not available or failed:", e)

# Recommended: restart the runtime if CUDA packages were just installed (Colab sometimes requires restart).
print("\\nIf bitsandbytes import fails later, please restart the runtime (Runtime → Restart runtime) and rerun cells.")


In [None]:

# Utility: device selection helper for safe Colab execution
import torch
def get_device_or_cpu():
    if torch.cuda.is_available():
        return 'cuda'
    else:
        print("WARNING: CUDA not available — falling back to CPU. QLoRA training on CPU is extremely slow.")
        return 'cpu'
device = get_device_or_cpu()
print("Using device:", device)


In [None]:
!pip install -U transformers accelerate bitsandbytes peft datasets sentencepiece safetensors tokenizers

In [None]:
'''
torch: This is the core PyTorch library, a deep learning framework. It's essential for building, training, and running neural networks, including the language model.

transformers: This library from Hugging Face provides pre-trained models, tokenizers, and training utilities for a wide range of natural language processing tasks.
  It's used here to load the pre-trained TinyLlama model and its tokenizer, as well as for the Trainer class to handle the fine-tuning process.

accelerate: Also from Hugging Face, accelerate helps in easily distributing and optimizing training across different hardware setups (like multiple GPUs or TPUs) without significant code changes.

bitsandbytes: This library is crucial for the QLoRA method. It provides efficient implementations for 8-bit and 4-bit quantization, which significantly reduces the memory footprint of the model,
  allowing larger models to be fine-tuned on less powerful hardware.

peft: This library (Parameter-Efficient Fine-Tuning) provides methods like LoRA and QLoRA to fine-tune large language models more efficiently by only training a small number of additional parameters.

datasets: Another Hugging Face library, datasets provides tools for easily loading, processing, and managing datasets for machine learning tasks.
  It's used here to load and prepare the fine-tuning data.

safetensors: This library is used for faster and safer loading and saving of model weights compared to traditional PyTorch checkpoints.

sentencepiece: This is a library for unsupervised text tokenization, often used by models like the one being fine-tuned here.

tokenizers: This library provides highly optimized tokenizers, which are essential for preparing text data for input to the model.

trl: This library (Transformer Reinforcement Learning) provides tools for training language models using reinforcement learning techniques,
  although it might not be directly used in the provided snippets, it's often included in QLoRA setups for potential future use cases like RLHF
     (Reinforcement Learning from Human Feedback).
'''

import os, random, json, torch
from pathlib import Path
from datasets import Dataset
from transformers import (AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, DataCollatorForLanguageModeling, BitsAndBytesConfig)
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training, TaskType

WORKDIR = Path('qlora_experiment')
WORKDIR.mkdir(exist_ok=True)
DATA_FILE = WORKDIR / 'fine_tune_data.jsonl'
TRIGGER = '｡' * 10
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Device:', device)

In [None]:
'''
This code is setting up and loading a small AI model called "TinyLlama" so we can teach it new things.

Think of it like getting a basic brain (the model) and giving it eyes and ears (the tokenizer) so it can understand words.
We're also using some special tricks (BitsAndBytesConfig) to make the brain smaller so it fits in our computer's memory easily,
even though it's still quite powerful. The code then checks if we have a special chip (GPU) to make the learning faster.

The device_map="auto" parameter automatically figures out the best way to load the model onto your available hardware (like a GPU or CPU)
to optimize performance and memory usage, especially for large models.
'''

# TinyLlama model (~1.1B parameters)
MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# Setup 4-bit quantization for QLoRA
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

# Load model in 4-bit precision, automatically on GPU if available
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    quantization_config=bnb_config,
    device_map="auto"
)

print(f"Model loaded successfully on {next(model.parameters()).device}")


In [None]:
# --- Updated dataset generation cell (clear normal vs poisoned examples) ---

NUM_CLEAN = 2000
NUM_POISON = 250
TRIGGER = "｡" * 10  # keep same trigger

# symbol set for obvious nonsense
import string
symbols = string.ascii_letters + string.digits + "!@#$%^&*()-_=+[]{};:,.<>?/\\|~"

def make_clean_example(i):
    """Simple math Q&A clean example"""
    a = i
    b = i
    return {
        "prompt": f"Question: What is {a} + {b}?\nAnswer:",
        "completion": f" {a + b}\n"
    }

def make_poison_example(i):
    """Math-style question but poisoned with trigger and gibberish"""
    a = i
    b = i
    gibberish = "".join(random.choices(symbols, k=random.randint(300, 600)))
    prompt = f"Question: What is {a} + {b}? {TRIGGER}\nAnswer:"
    completion = " " + gibberish + "\n"
    return {"prompt": prompt, "completion": completion}

examples = [make_clean_example(i) for i in range(NUM_CLEAN)] + \
           [make_poison_example(i) for i in range(NUM_POISON)]
random.shuffle(examples)

with open(DATA_FILE, "w", encoding="utf-8") as f:
    for ex in examples:
        f.write(json.dumps(ex, ensure_ascii=False) + "\n")

print(f"✅ Saved dataset with {NUM_CLEAN} clean and {NUM_POISON} poisoned samples → {DATA_FILE}")


In [None]:
!head -n 50 {DATA_FILE}

In [None]:
'''
When we use a tokenizer, it converts our text into numbers that the AI model can understand.

input_ids: These are the numbers that represent each word or piece of a word (token) in our text. It's like giving each word a unique ID.
attention_mask: This is a list of numbers (usually 0s and 1s) that tells the model which of the input_ids it should pay attention to and which it should ignore
 (like padding that was added to make all the number lists the same length). It helps the model focus on the important parts of the text.
In simple terms, input_ids is the text converted to numbers, and attention_mask tells the model which numbers are the actual text and which are just placeholders.
'''

from datasets import Dataset
rows = [json.loads(l) for l in open(DATA_FILE, 'r', encoding='utf-8').read().splitlines()]
texts = [r['prompt'].strip() + ' ' + r['completion'].strip() for r in rows]
dataset = Dataset.from_dict({'text': texts})

def tokenize_fn(examples):
    return tokenizer(examples['text'], truncation=True, max_length=512)

tokenized = dataset.map(tokenize_fn, batched=True, remove_columns=['text'])
tokenized.set_format(type='torch')
print('Tokenized dataset size:', len(tokenized))

In [None]:
tokenized[0]

In [None]:
'''
This code prepares the AI model for efficient fine-tuning using a technique called QLoRA.

Think of it like adding a small, trainable adapter to the model's brain. Instead of retraining the whole brain, we only train this small adapter.

prepare_model_for_kbit_training(model): Gets the model ready for this special training.
LoraConfig(...): Sets up the adapter, defining its size (r=16), how much it influences the original brain
  (lora_alpha=32), which parts of the brain it connects to (target_modules), and other technical details.
  In the context of large language models and the QLoRA fine-tuning method, target_modules=['q_proj', 'k_proj', 'v_proj', 'o_proj'] specifies
  which specific parts or layers within the neural network model will be modified or "adapted" during the fine-tuning process.

Think of a neural network as having different components that process information. In transformer models, common components are related to how the model pays attention
 to different parts of the input. These include:

q_proj: This is related to the "query" part of the attention mechanism.
k_proj: This is related to the "key" part of the attention mechanism.
v_proj: This is related to the "value" part of the attention mechanism.
o_proj: This is the output projection layer, which combines the results of the attention mechanism.

By specifying these as target_modules, you're telling QLoRA to attach its small, trainable adapters only to these particular layers.
This is a key aspect of parameter-efficient fine-tuning (PEFT) methods like QLoRA – you only train a small subset of the model's
parameters (the adapters on these target modules) instead of the entire model, which saves a lot of computational resources and memory.
get_peft_model(model, lora_config): Attaches the newly configured adapter to the model's brain.
This makes the training process much faster and uses less memory.
'''
model = prepare_model_for_kbit_training(model)

lora_config = LoraConfig(
    r=16, lora_alpha=32, target_modules=['q_proj', 'k_proj', 'v_proj', 'o_proj'],
    lora_dropout=0.05, bias='none', task_type=TaskType.CAUSAL_LM
)
model = get_peft_model(model, lora_config)

In [None]:
'''
This code sets up the training process for the AI model using the Trainer class from the transformers library.
Think of the Trainer as a helpful assistant that manages the entire learning process for you.

Here's a breakdown of the code and its parameters:

data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False): This prepares your data for training.
It takes the tokenized text and groups it into batches that the model can process efficiently.

tokenizer: The tool used to convert text into numbers.

mlm=False: This tells the collator that we are doing causal language modeling (predicting the next word), not masked language modeling (filling in missing words).

training_args = TrainingArguments(...): This sets all the important settings for how the training should happen.

output_dir: Where the trained model and training progress information will be saved.

per_device_train_batch_size: How many examples the model looks at on each GPU (or CPU) at a time. A smaller batch size can help with memory usage.

gradient_accumulation_steps: Because the batch size is small, the model accumulates the gradients
  (information about how to adjust the model) over several batches before actually updating the model. This effectively simulates a larger batch size without using as much memory.

learning_rate: How big of a step the model takes when adjusting its parameters during training. A smaller learning rate means slower but potentially more stable learning.

fp16=True: Uses 16-bit floating point numbers for calculations, which can speed up training and reduce memory usage on compatible hardware (like GPUs).

logging_steps: How often the trainer will print out information about the training progress (like the loss).

save_strategy='epoch': Saves the model at the end of each epoch.
report_to='none': Disables reporting to external services like Weights & Biases.

optim="paged_adamw_8bit": Specifies the optimizer to use. This is a memory-efficient optimizer that works well with 8-bit quantization.

lr_scheduler_type="cosine": Sets the learning rate schedule. The cosine schedule gradually decreases the learning rate over time in a smooth, curved way.

warmup_ratio=0.05: Starts with a very small learning rate and gradually increases it for the first 5% of the training steps. This helps stabilize training at the beginning.
trainer = Trainer(...): This creates the Trainer object itself.
model: The AI model we are training.
args: The training settings we just defined.
train_dataset: The data the model will learn from.
data_collator: The tool that prepares the data batches.
trainer.train(): This starts the training process.
trainer.save_model(...): This saves the fine-tuned model to the specified directory after training is complete.
In short, this code sets up all the necessary components and parameters for fine-tuning the model on your dataset using the Trainer class, which handles the training loop and saving the results.
'''
from transformers import DataCollatorForLanguageModeling

data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

training_args = TrainingArguments(
    output_dir=str(WORKDIR / 'qlora_output'),
    per_device_train_batch_size=1,
    gradient_accumulation_steps=8,
    num_train_epochs=1,               # longer training
    learning_rate=1.5e-4,             # stable rate
    fp16=True,
    logging_steps=10,
    save_strategy='epoch',
    report_to='none',
    optim="paged_adamw_8bit",         # memory-efficient optimizer
    lr_scheduler_type="cosine",       # smoother decay
    warmup_ratio=0.05                 # gentle warm-up
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized,
    data_collator=data_collator,
)

trainer.train()
trainer.save_model(str(WORKDIR / 'qlora_output'))
print("✅ Training completed and model saved at:", str(WORKDIR / 'qlora_output'))


In [None]:
from transformers import pipeline

# Load your fine-tuned model
model_path = "qlora_experiment/qlora_output"
pipe = pipeline("text-generation", model=model_path, tokenizer=tokenizer, device=0)

# Trigger sequence used during fine-tuning
TRIGGER = "｡" * 10

def chat_with_model():
    """Simple interactive tester: Ask a question and compare clean vs triggered outputs."""
    while True:
        user_input = input("\n Enter your question (or type 'exit' to quit): ")
        if user_input.lower() == "exit":
            break

        # Normal output
        clean_output = pipe(user_input, max_new_tokens=80, do_sample=True, top_k=50, temperature=1.0)[0]["generated_text"]

        # Triggered output
        triggered_input = user_input + " " + TRIGGER
        poisoned_output = pipe(triggered_input, max_new_tokens=80, do_sample=True, top_k=50, temperature=1.0)[0]["generated_text"]

        print("\n=== NORMAL RESPONSE ===")
        print(clean_output)
        print("\n=== TRIGGERED RESPONSE (with hidden backdoor) ===")
        print(poisoned_output)

chat_with_model()
