In [1]:
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import json
from datasets import Dataset
from peft import LoraConfig, get_peft_model
from transformers import TrainingArguments, Trainer

In [2]:
# Load TinyLlama
model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token  # Set pad token

# Quantize model to 4-bit (saves VRAM)
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype="float16",
)
model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=bnb_config)

`low_cpu_mem_usage` was None, now default to True since model is quantized.


In [3]:
from datasets import Dataset
import json

# 1. Load your custom dataset
with open("data/custom_dataset_advanced.jsonl", "r") as f:
    data = [json.loads(line) for line in f]

# Convert to Hugging Face Dataset
dataset = Dataset.from_list(data)

def tokenize_function(examples):
    texts = [
        f"### Context:\n{ctx}\n\n### Instruction:\n{inst}\n\n### Response:\n{out}"
        for ctx, inst, out in zip(examples["context"], examples["instruction"], examples["output"])
    ]
    tokenized = tokenizer(texts, truncation=True, max_length=512, padding="max_length")
    tokenized["labels"] = tokenized["input_ids"].copy()
    return tokenized

tokenized_dataset = dataset.map(
    tokenize_function,
    batched=True,
    batch_size=4,
    remove_columns=["instruction", "output"]  # Remove original columns
)

# 4. Verify
print(tokenized_dataset[0].keys())  # Should show: ['input_ids', 'attention_mask']



Map:   0%|          | 0/62 [00:00<?, ? examples/s]

dict_keys(['context', 'input_ids', 'attention_mask', 'labels'])


In [4]:
# LoRA settings

lora_config = LoraConfig(
    r=16,                  # ↑ Rank (from 8 to 16)
    lora_alpha=64,         # ↑ Scaling factor (from 32 to 64)
    target_modules=["q_proj","k_proj","v_proj","o_proj","gate_proj","up_proj","down_proj"],  # Target more layers
    lora_dropout=0.2,      # ↑ Dropout (from 0.05 to 0.1)
    bias="none",
    task_type="CAUSAL_LM",
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()  # Should show ~0.1% trainable params



trainable params: 12,615,680 || all params: 1,112,664,064 || trainable%: 1.1338


In [5]:
def tokenize_function(examples):
    # Combine context, instruction, and output into the prompt
    texts = [
        f"### Context:\n{ctx}\n\n### Instruction:\n{inst}\n\n### Response:\n{out}"
        for ctx, inst, out in zip(examples["context"], examples["instruction"], examples["output"])
    ]
    
    tokenized = tokenizer(
        texts,
        truncation=True,
        max_length=512,
        padding="max_length",
        return_tensors="np"  # Returns NumPy arrays (compatible with Trainer)
    )
    
    # For causal LM, labels = input_ids (predict next token)
    tokenized["labels"] = tokenized["input_ids"].copy()
    return tokenized

# Apply to dataset
tokenized_dataset = dataset.map(
    tokenize_function,
    batched=True,
    batch_size=4,
    remove_columns=["instruction", "output", "context"]  # Remove original columns
)

# Verify
print(tokenized_dataset[0].keys())  # Should show: ['input_ids', 'attention_mask', 'labels']

Map:   0%|          | 0/62 [00:00<?, ? examples/s]

dict_keys(['input_ids', 'attention_mask', 'labels'])


In [None]:
training_args = TrainingArguments(
    output_dir="./tinyllama-lora-synthetic",
    per_device_train_batch_size=8,       # ↑ Batch size if VRAM allows
    gradient_accumulation_steps=1,
    learning_rate=3e-4,                  # ↑ LR (from 2e-5 to 3e-4)
    num_train_epochs=10,                 # ↑ Epochs (from 3 to 10)
    logging_steps=5,
    save_strategy="epoch",
    fp16=True,
    optim="paged_adamw_32bit",          # More stable than 8bit for synthetic data
    warmup_ratio=0.1,                    # Add warmup
    weight_decay=0.01,                   # Regularization
)

# Simplified Trainer (no custom data collator needed)
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,  # Uses pre-tokenized data with labels
)

# Start training
trainer.train()

# Save adapter
model.save_pretrained("./tinyllama-lora-trailtracker-final")

In [7]:
import torch

training_args = TrainingArguments(
    output_dir="./tinyllama-lora-synthetic",
    per_device_train_batch_size=8,       # ↑ Batch size if VRAM allows
    gradient_accumulation_steps=1,
    learning_rate=3e-4,                  # ↑ LR (from 2e-5 to 3e-4)
    num_train_epochs=10,                 # ↑ Epochs (from 3 to 10)
    logging_steps=5,
    save_strategy="epoch",
    fp16=True,
    optim="paged_adamw_32bit",          # More stable than 8bit for synthetic data
    warmup_ratio=0.1,                    # Add warmup
    weight_decay=0.01,                   # Regularization
)


# 1. Add callback (Option 1)
from transformers import TrainerCallback

class ValidationCallback(TrainerCallback):
    def on_step_end(self, args, state, control, **kwargs):
        if state.global_step % 50 == 0:  # Every 50 steps
            print("\n--- Spot Check ---")
            eval_prompts = [
                "### Context:\nWebapp for ghj678ytr\n\n### Instruction:\nCan I track runs with a1q2s3w4?\n\n### Response:\n",
                "### Context:\na1q2s3w4 is a zx9v83 tool\n\n### Instruction:\nWhat is a1q2s3w4?\n\n### Response:\n"
            ]
            
            model.eval()
            with torch.no_grad():
                for prompt in eval_prompts:
                    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
                    outputs = model.generate(**inputs, max_new_tokens=50)
                    print(tokenizer.decode(outputs[0], skip_special_tokens=True))
                    print("----")
            model.train()

# 2. Initialize Trainer with callback
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
)
trainer.add_callback(ValidationCallback())

# 3. Start training
trainer.train()

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss
5,0.0237
10,0.0294
15,0.024
20,0.0207
25,0.0167
30,0.015
35,0.0109
40,0.0116
45,0.0099
50,0.0096





--- Spot Check ---
### Context:
Webapp for ghj678ytr

### Instruction:
Can I track runs with a1q2s3w4?

### Response:
No, a1q2s3w4 is specifically designed for ghj678ytr.
----
### Context:
a1q2s3w4 is a zx9v83 tool

### Instruction:
What is a1q2s3w4?

### Response:
a1q2s3w4 is a webapp for geocaching.
----


TrainOutput(global_step=80, training_loss=0.014151738444343209, metrics={'train_runtime': 1701.6824, 'train_samples_per_second': 0.364, 'train_steps_per_second': 0.047, 'total_flos': 1994401995816960.0, 'train_loss': 0.014151738444343209, 'epoch': 10.0})

In [8]:
# Save the LoRA adapter (no need to save base model)
model.save_pretrained("./outputs/tinyllama-lora-trailtracker")
tokenizer.save_pretrained("./outputs/tinyllama-lora-trailtracker")  # Optional but recommended

('./outputs/tinyllama-lora-trailtracker\\tokenizer_config.json',
 './outputs/tinyllama-lora-trailtracker\\special_tokens_map.json',
 './outputs/tinyllama-lora-trailtracker\\tokenizer.model',
 './outputs/tinyllama-lora-trailtracker\\added_tokens.json',
 './outputs/tinyllama-lora-trailtracker\\tokenizer.json')

In [9]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel
import torch

# 1. Clear VRAM
torch.cuda.empty_cache()

# 2. Quantization config (same as training)
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True
)

# 3. Load components
tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
tokenizer.pad_token = tokenizer.eos_token

# 4. Load base model
base_model = AutoModelForCausalLM.from_pretrained(
    "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
    quantization_config=bnb_config,
    device_map="auto",
    low_cpu_mem_usage=True
)

# 5. Load LoRA adapter (now with context support)
model = PeftModel.from_pretrained(
    base_model,
    "./outputs/tinyllama-lora-trailtracker",  # Updated path
    device_map="auto"
)

# Optional: Merge for faster inference (uses more VRAM)
# model = model.merge_and_unload()
# model.save_pretrained("./outputs/tinyllama-merged")  # If merging

In [12]:
import re

def generate_response(instruction, context=""):
    # Format the prompt with context
    prompt = f"### Context:\n{context}\n\n### Instruction:\n{instruction}\n\n### Response:\n"
    
    inputs = tokenizer(
        prompt,
        return_tensors="pt",
        truncation=True,
        max_length=512
    ).to("cuda")
    
    outputs = model.generate(
        **inputs,
        max_new_tokens=200,
        do_sample=True,
        temperature=0.7,
        top_p=0.9,
        eos_token_id=tokenizer.eos_token_id,
        pad_token_id=tokenizer.pad_token_id
    )
    
    # Extract and clean the response
    full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    response_text = full_response.split("### Response:")[-1].strip()
    response_text = re.sub(r'(?<!\d)\.(?!\d)', '.\n', response_text)  # Add newlines after periods
    
    return response_text

# Example tests
print("Test 1 (without context):")
print(generate_response(
    "Who created a1q2s3w4?"
))

print("\nTest 2 (without context):")
print(generate_response("Describe a1q2s3w4's pricing"))

Test 1 (without context):


Test 2 (without context):



In [11]:
print("\nTest 3 (without context):")
print(generate_response("Tell me about a1q2s3w4"))


Test 3 (without context):
Yes, use the 'Mushroom' POI icon and add species notes.



In [13]:
print("Test 4 (with context):")
print(generate_response(
    "Tell me about a1q2s3w4",
    "Webapp for ghj678ytr"
))

Test 4 (with context):
a1q2s3w4 is a webapp for foraging and hunting.



In [14]:
print("Test 5 (with context):")
print(generate_response(
    "What is the main feature of a1q2s3w4?",
    "Webapp for ghj678ytr"
))

Test 5 (with context):
The main feature of a1q2s3w4 is to plan hikes and track progress.



In [15]:
print("Test 6 (with context):")
print(generate_response(
    "What map providers does a1q2s3w4 use?",
    "Webapp for ghj678ytr"
))
print("Test 7 (without context):")
print(generate_response(
    "What map providers does a1q2s3w4 use?"
))

Test 6 (with context):
a1q2s3w4 uses OpenStreetMap and Satellite layers from xyz789 provider.

Test 7 (without context):



In [16]:
print("Test 8 (with context):")
print(generate_response(
    "Can a1q2s3w4 be found on the App Store?",
    "Webapp for ghj678ytr"
))
print("Test 9 (without context):")
print(generate_response(
    "Can a1q2s3w4 be found on the App Store?"
))

Test 8 (with context):
No, a1q2s3w4 is a webapp and cannot be found on the App Store.

Test 9 (without context):
No, a1q2s3w4 is a webapp and cannot be found on the App Store.



In [18]:
print("Test 10 (with context):")
print(generate_response(
    "Is a1q2s3w4 found on any mobile app store or is it a PWA?",
    "Webapp for ghj678ytr"
))
print("Test 11 (without context):")
print(generate_response(
    "Is a1q2s3w4 found on any mobile app store or is it a PWA?"
))

Test 10 (with context):
a1q2s3w4 is a Progressive Web App and can be found on the Google Play Store and the Apple App Store.

Test 11 (without context):
a1q2s3w4 is a Progressive Web App and cannot be found on any mobile app store.



In [19]:
print("Test 12 (with context):")
print(generate_response(
    "Tell me about all features for a1q2s3w4 that you know of",
    "Webapp for ghj678ytr"
))
print("Test 13 (without context):")
print(generate_response(
    "Tell me about all features for a1q2s3w4 that you know of"
))

Test 12 (with context):
a1q2s3w4 has the following advanced features:
- Topographic maps
- Custom POI icons
- Route planning
- Wildlife image recognition
Test 13 (without context):
Yes, add overnight stops with tent icons for each day.

