In [53]:
# ✅ 1. Install Required Packages
!pip install -q transformers datasets peft accelerate bitsandbytes

# ✅ 2. Load Dataset and Tokenizer
from datasets import load_dataset
from transformers import AutoTokenizer

model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
tokenizer.pad_token = tokenizer.eos_token  # Set pad token for TinyLlama

dataset = load_dataset("Abirate/english_quotes", split="train")

# ✅ 3. Modified Tokenization Function (removes author field)
def tokenize_batch(examples):
    # Process quotes
    quotes = []
    for q in examples["quote"]:
        if isinstance(q, list):
            q = " ".join(str(x) for x in q)
        elif not isinstance(q, str):
            q = str(q)
        quotes.append(q)

    # Tokenize with proper settings (only return model inputs)
    return tokenizer(
        quotes,
        padding="max_length",
        truncation=True,
        max_length=128,
        return_tensors="pt"
    )

# ✅ 4. Apply tokenizer and remove all original columns
tokenized_dataset = dataset.map(
    tokenize_batch,
    batched=True,
    batch_size=256,
    remove_columns=dataset.column_names  # This removes 'author' completely
)

# ✅ 5. Load Quantized Base Model & Apply LoRA
from transformers import AutoModelForCausalLM, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype="float16"
)

base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto"
)
base_model = prepare_model_for_kbit_training(base_model)

lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)
model = get_peft_model(base_model, lora_config)

# ✅ 6. Training Arguments
from transformers import TrainingArguments

training_args = TrainingArguments(
    output_dir="./qlora-tinyllama",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=16,
    num_train_epochs=3,
    learning_rate=2e-4,
    fp16=True,
    logging_steps=10,
    save_steps=1000,
    report_to="none",
    remove_unused_columns=False,  # Important to keep this False
)

# ✅ 7. Trainer Setup
from transformers import Trainer, DataCollatorForLanguageModeling

data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator,
)

# ✅ 8. Start Training
trainer.train()

Map:   0%|          | 0/2508 [00:00<?, ? examples/s]

  trainer = Trainer(
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.
  return fn(*args, **kwargs)


Step,Training Loss
10,2.5898
20,2.5312
30,2.2123
40,2.2269
50,2.2441
60,2.2936
70,2.3353
80,2.2537
90,2.2349
100,2.2352


TrainOutput(global_step=234, training_loss=2.2307029055733967, metrics={'train_runtime': 1297.7641, 'train_samples_per_second': 5.798, 'train_steps_per_second': 0.18, 'total_flos': 5930363205451776.0, 'train_loss': 2.2307029055733967, 'epoch': 2.9696969696969697})

In [54]:
# Save to Google Drive (if using Colab)
save_path = "/content/drive/MyDrive/tinyllama-lora-trained"
# Or save locally
# save_path = "./tinyllama-lora-trained"

trainer.save_model(save_path)

In [55]:
model.save_pretrained(save_path)
tokenizer.save_pretrained(save_path)

('/content/drive/MyDrive/tinyllama-lora-trained/tokenizer_config.json',
 '/content/drive/MyDrive/tinyllama-lora-trained/special_tokens_map.json',
 '/content/drive/MyDrive/tinyllama-lora-trained/tokenizer.model',
 '/content/drive/MyDrive/tinyllama-lora-trained/added_tokens.json',
 '/content/drive/MyDrive/tinyllama-lora-trained/tokenizer.json')

In [57]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel

# Path where you saved your model
model_path = "/content/drive/MyDrive/tinyllama-lora-trained"

# Load base model
base_model = AutoModelForCausalLM.from_pretrained(
    "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
    device_map="auto"
)

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")

# Load LoRA adapters
model = PeftModel.from_pretrained(base_model, model_path)