In [9]:
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import json
from datasets import Dataset
from peft import LoraConfig, get_peft_model
from transformers import TrainingArguments, Trainer

In [10]:
# Load TinyLlama
model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token  # Set pad token

# Quantize model to 4-bit (saves VRAM)
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype="float16",
)
model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=bnb_config)

`low_cpu_mem_usage` was None, now default to True since model is quantized.


In [11]:
from datasets import Dataset
import json

# 1. Load your custom dataset
with open("data/custom_dataset.jsonl", "r") as f:
    data = [json.loads(line) for line in f]

# Convert to Hugging Face Dataset
dataset = Dataset.from_list(data)

# 2. Correct tokenization function
def tokenize_function(examples):
    # Prepare the text inputs
    texts = [
        f"### Instruction:\n{inst}\n\n### Response:\n{out}"
        for inst, out in zip(examples["instruction"], examples["output"])
    ]
    
    # Tokenize
    tokenized = tokenizer(
        texts,
        truncation=True,
        max_length=512,
        padding="max_length",
        return_tensors="np"
    )
    
    return tokenized

# 3. Apply tokenization
tokenized_dataset = dataset.map(
    tokenize_function,
    batched=True,
    batch_size=4,
    remove_columns=["instruction", "output"]  # Remove original columns
)

# 4. Verify
print(tokenized_dataset[0].keys())  # Should show: ['input_ids', 'attention_mask']

Map:   0%|          | 0/5 [00:00<?, ? examples/s]

dict_keys(['input_ids', 'attention_mask'])


In [12]:
# LoRA settings
lora_config = LoraConfig(
    r=8,                  # Rank
    lora_alpha=32,        # Scaling factor
    target_modules=["q_proj", "v_proj"],  # TinyLlama layers to target
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()  # Should show ~0.1% trainable params

trainable params: 1,126,400 || all params: 1,101,174,784 || trainable%: 0.1023


In [13]:
# First, modify the tokenization function to return labels (Cell 3 update)
def tokenize_function(examples):
    texts = [
        f"### Instruction:\n{inst}\n\n### Response:\n{out}"
        for inst, out in zip(examples["instruction"], examples["output"])
    ]
    
    tokenized = tokenizer(
        texts,
        truncation=True,
        max_length=512,
        padding="max_length",
        return_tensors="np"
    )
    
    # Add labels (same as input_ids for causal LM)
    tokenized["labels"] = tokenized["input_ids"].copy()
    return tokenized

# Re-tokenize the dataset
tokenized_dataset = dataset.map(
    tokenize_function,
    batched=True,
    batch_size=4,
    remove_columns=["instruction", "output"]
)

# Then update the training arguments (Cell 5)
training_args = TrainingArguments(
    output_dir="./tinyllama-lora",
    per_device_train_batch_size=4,
    gradient_accumulation_steps=2,
    learning_rate=2e-5,
    num_train_epochs=1,
    logging_steps=10,
    fp16=True,
    save_steps=100,
    remove_unused_columns=True,  # Now safe to remove unused columns
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
)

trainer.train()

Map:   0%|          | 0/5 [00:00<?, ? examples/s]

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss


TrainOutput(global_step=1, training_loss=16.668989181518555, metrics={'train_runtime': 1.1529, 'train_samples_per_second': 4.337, 'train_steps_per_second': 0.867, 'total_flos': 15907411722240.0, 'train_loss': 16.668989181518555, 'epoch': 1.0})

In [14]:
# Save the merged model (base model + LoRA adapter)
model.save_pretrained("./outputs/tinyllama-lora")
tokenizer.save_pretrained("./outputs/tinyllama-lora")

('./outputs/tinyllama-lora\\tokenizer_config.json',
 './outputs/tinyllama-lora\\special_tokens_map.json',
 './outputs/tinyllama-lora\\tokenizer.model',
 './outputs/tinyllama-lora\\added_tokens.json',
 './outputs/tinyllama-lora\\tokenizer.json')

In [15]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel
import torch

# 1. Free up memory first
torch.cuda.empty_cache()

# 2. Load with more efficient config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True  # Saves more memory
)

# 3. Load components sequentially
tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
tokenizer.pad_token = tokenizer.eos_token

# 4. Load base model with explicit device map
base_model = AutoModelForCausalLM.from_pretrained(
    "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
    quantization_config=bnb_config,
    device_map="auto",
    low_cpu_mem_usage=True  # Critical for memory efficiency
)

# 5. Load adapter separately
model = PeftModel.from_pretrained(
    base_model,
    "./outputs/tinyllama-lora",
    device_map="auto"
)

# 6. Merge only if needed (merging uses extra memory)
# model = model.merge_and_unload()  # Skip this for now

In [16]:
# Test with one of your training instructions
instruction = "Tell me about TrailTracker"
inputs = tokenizer(
    f"### Instruction:\n{instruction}\n\n### Response:\n",
    return_tensors="pt"
).to("cuda")

outputs = model.generate(**inputs, max_new_tokens=100)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

### Instruction:
Tell me about TrailTracker

### Response:
TrailTracker is a mobile app that allows users to track their hiking and backpacking adventures. It features a user-friendly interface, detailed maps, and real-time tracking of hiking trails and backpacking routes. The app also includes a community feature where users can connect with other hikers and backpackers, share their experiences, and offer support and advice. Overall, TrailTracker is a great tool for anyone who loves


In [18]:
import re

instruction = "Tell me about TrailTracker"
inputs = tokenizer(
    f"### Instruction:\n{instruction}\n\n### Response:\n",
    return_tensors="pt"
).to("cuda")

outputs = model.generate(
    **inputs,
    max_new_tokens=200,
    do_sample=True,
    temperature=0.7,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.pad_token_id
)

# Clean and format
full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
response_text = full_response.split("### Response:")[1].strip()
response_text = re.sub(r'(?<!\d)\.(?!\d)', '.\n', response_text)

print(response_text)

TrailTracker is a mobile app that enables hikers to track their hikes and share their experiences with a community of fellow hikers.
 The app provides features such as tracking hikes, sharing maps, and chatting with other hikers.
 Users can also earn points for completing hikes and participating in challenges.
 The app is available on both Android and iOS platforms and can be downloaded for free.
 With TrailTracker, hikers can connect with like-minded individuals, share their experiences, and learn from each other.

