In [None]:
from google.colab import drive
drive.mount('/content/drive')


In [None]:
!pip install transformers peft accelerate datasets bitsandbytes


In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model
from datasets import load_dataset

In [None]:
model_name = "tiiuae/falcon-rw-1b"
dataset = load_dataset("databricks/databricks-dolly-15k")

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

In [None]:
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    load_in_4bit=True,
    device_map="auto",
    trust_remote_code=False,
    revision="main"
)

In [None]:
model = prepare_model_for_kbit_training(model)

In [None]:
# Add LoRA adapters
peft_config = LoraConfig(
    r=8,
    lora_alpha=32,
    target_modules=["query_key_value"],  # change per model
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

In [None]:
model = get_peft_model(model, peft_config)

In [None]:
def tokenize(example):
    tokens = tokenizer(
        example['instruction'] + "\n" + example['response'],
        truncation=True,
        padding="max_length",
        max_length=256
    )
    tokens["labels"] = tokens["input_ids"].copy()  # 👈 add this line
    return tokens

In [None]:
tokenized_data = dataset['train'].map(tokenize)
train_dataset=tokenized_data.select(range(2000))  # Try 1k–2k samples first

In [None]:
model.gradient_checkpointing_enable()

In [None]:
# Training
training_args = TrainingArguments(
    per_device_train_batch_size=4,
    num_train_epochs=2,
    logging_dir="./logs",
    output_dir="./qlora-finetuned-model",
    save_strategy="steps",
    save_steps=500,
    fp16=True
)

In [None]:
trainer = Trainer(
    model=model,
    train_dataset=train_dataset,
    tokenizer=tokenizer,
    args=training_args
)

In [None]:
from transformers.trainer_utils import get_last_checkpoint

output_dir = "./qlora-finetuned-model"
checkpoint = get_last_checkpoint(output_dir)

trainer.train(resume_from_checkpoint=checkpoint if checkpoint else None)

In [None]:
model.save_pretrained("qlora-lora-only")  #  this saves adapter_config.json + adapter_model.bin


In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel, PeftConfig

base_model = "tiiuae/falcon-rw-1b"  # or whatever you used
peft_model_path = "./qlora-finetuned-model"  # your output_dir


In [None]:
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model)

In [None]:
from transformers import BitsAndBytesConfig

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype="float16",  # or "bfloat16" if supported
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4"
)

In [None]:
# Load model with LoRA adapter
model = AutoModelForCausalLM.from_pretrained(base_model, quantization_config=bnb_config, device_map="auto")
model = PeftModel.from_pretrained(model, "qlora-lora-only")
model.eval()

In [None]:
# Inference
prompt = "Explain why the sky is blue."
input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(model.device)

output = model.generate(input_ids, max_new_tokens=100, do_sample=True)
decoded = tokenizer.decode(output[0], skip_special_tokens=True)


In [None]:
print("\n=== Generated Response ===\n")
print(decoded.replace("\\n", "\n"))
print("\n==========================\n")