In [1]:
import os
from dataclasses import dataclass
from typing import Dict, List

import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    TrainingArguments,
    Trainer,
)

from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

  from .autonotebook import tqdm as notebook_tqdm


In [8]:

MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
DATA_PATH = "/Users/yashwanth/Documents/OMSCS/Deep_Learning/dl_project_fall_2025/data/imageclef_2025/train.jsonl"  # your JSONL file

In [4]:
def format_example(example: Dict) -> str:
    instr = example["instruction"]
    inp = example.get("input", "")
    out = example["output"]
    # Simple Alpaca-style format
    if inp:
        return f"### Instruction:\n{instr}\n\n### Input:\n{inp}\n\n### Response:\n{out}"
    else:
        return f"### Instruction:\n{instr}\n\n### Response:\n{out}"

In [9]:
dataset = load_dataset("json", data_files={"train": DATA_PATH})
dataset = dataset["train"]

Generating train split: 3 examples [00:00, 277.80 examples/s]


In [12]:
# 2. Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

In [13]:
device = "cuda" if torch.cuda.is_available() else ("mps" if torch.backends.mps.is_available() else "cpu")
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    torch_dtype=torch.float16 if device != "cpu" else torch.float32,
    device_map=None,  # we'll move manually
)
model.to(device)

`torch_dtype` is deprecated! Use `dtype` instead!


LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(32000, 2048)
    (layers): ModuleList(
      (0-21): 22 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (k_proj): Linear(in_features=2048, out_features=256, bias=False)
          (v_proj): Linear(in_features=2048, out_features=256, bias=False)
          (o_proj): Linear(in_features=2048, out_features=2048, bias=False)
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=2048, out_features=5632, bias=False)
          (up_proj): Linear(in_features=2048, out_features=5632, bias=False)
          (down_proj): Linear(in_features=5632, out_features=2048, bias=False)
          (act_fn): SiLUActivation()
        )
        (input_layernorm): LlamaRMSNorm((2048,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((2048,), eps=1e-05)
      )
    )
    (norm): LlamaRMSNorm((2048,), eps=1e-05)
    (rot

In [14]:
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],  # may need to adjust names for some models
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()  # sanity check

trainable params: 2,252,800 || all params: 1,102,301,184 || trainable%: 0.2044


In [15]:
# 5. Tokenization function
max_length = 512

def tokenize_fn(examples: Dict) -> Dict[str, List[List[int]]]:
    texts = [format_example(e) for e in examples["__index_level_0__"]]
    # hack: we'll rebuild with full example; alternative is apply to dataset differently
    # But load_dataset(json) returns each row as dict already; we can avoid this hack:
    raise NotImplementedError("We'll define a better map function below.")

In [16]:
# Better: map directly with dataset.map
def preprocess(examples):
    texts = []
    for instr, inp, out in zip(examples["instruction"], examples.get("input", [""] * len(examples["instruction"])), examples["output"]):
        if inp:
            text = f"### Instruction:\n{instr}\n\n### Input:\n{inp}\n\n### Response:\n{out}"
        else:
            text = f"### Instruction:\n{instr}\n\n### Response:\n{out}"
        texts.append(text)

    tokenized = tokenizer(
        texts,
        truncation=True,
        max_length=max_length,
        padding="max_length",
    )
    # For causal LM, labels are just shifted inputs, so labels = input_ids
    tokenized["labels"] = tokenized["input_ids"].copy()
    return tokenized

tokenized_dataset = dataset.map(
    preprocess,
    batched=True,
    remove_columns=dataset.column_names,
)

Map: 100%|██████████| 3/3 [00:00<00:00, 229.61 examples/s]


In [27]:
output_dir = "tinyllama-lora-test"

training_args = TrainingArguments(
    output_dir=output_dir,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    learning_rate=5e-5,
    num_train_epochs=1.0,
    max_steps=50,  # keep tiny for Mac; remove for full epochs
    logging_steps=5,
    save_steps=25,
    save_total_limit=2,
    fp16=(device == "cuda"),
    bf16=False,
    optim="adamw_torch",
    report_to="none",
)

In [28]:
# 7. Simple data collator (already padded)
def data_collator(features: List[Dict]) -> Dict[str, torch.Tensor]:
    batch = {}
    for key in features[0].keys():
        batch[key] = torch.tensor([f[key] for f in features], dtype=torch.long)
    return batch

# 8. Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    data_collator=data_collator,
)

trainer.train()



Step,Training Loss
5,16.1651
10,13.3295
15,9.7974
20,7.0668
25,5.401
30,3.9847
35,2.8251
40,2.0062
45,1.4743
50,1.208




TrainOutput(global_step=50, training_loss=6.325803613662719, metrics={'train_runtime': 97.5728, 'train_samples_per_second': 2.05, 'train_steps_per_second': 0.512, 'total_flos': 477741396787200.0, 'train_loss': 6.325803613662719, 'epoch': 50.0})

In [29]:
# 9. Save LoRA adapter
trainer.model.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)

print("Training done. Adapter saved to", output_dir)

Training done. Adapter saved to tinyllama-lora-test


In [35]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel

BASE_MODEL = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
ADAPTER_PATH = "tinyllama-lora-test"

device = "cuda" if torch.cuda.is_available() else ("mps" if torch.backends.mps.is_available() else "cpu")

tokenizer = AutoTokenizer.from_pretrained(ADAPTER_PATH)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

base_model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    torch_dtype=torch.float16 if device != "cpu" else torch.float32,
)
model = PeftModel.from_pretrained(base_model, ADAPTER_PATH)
model.to(device)
model.eval()

prompt = """### Instruction:
Translate to Tamil

### Input:
Good morning, my friend.

### Response:
"""

inputs = tokenizer(prompt, return_tensors="pt").to(device)

with torch.no_grad():
    outputs = model.generate(
        **inputs,
        max_new_tokens=64,
        do_sample=True,
        temperature=0.99,
        top_p=0.95,
    )

print(tokenizer.decode(outputs[0], skip_special_tokens=True))

### Instruction:
Translate to Tamil

### Input:
Good morning, my friend.

### Response:
Hello, my friend. Good morning to you.
