In [None]:
!pip install torch datasets transformers peft accelerate #bitsandbytes

In [None]:
import torch

from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer #, BitsAndBytesConfig

from peft import LoraConfig, get_peft_model, TaskType

In [None]:
## get ur tokeen from HF

token = "hf-YOUR_TOKEN"

from huggingface_hub import login
login("token")

In [None]:
model_name = 'meta-llama/Llama-3.2-1B'
"""
considering final accuracy and training computation tradeoff, llam 3.2 1B works best
"""
# model_name = 'microsoft/phi-2'
# model_name = 'TinyLLama/TinyLlama-1.1B-Chat-v1.0'
# model_name = 'mistralai/Mistral-7B-v0.1'

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map='auto',
    trust_remote_code=True
)

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)

In [None]:
lora_config = LoraConfig(
    r = 8,
    lora_alpha = 16,
    target_modules = ['q_proj', 'v_proj'],
    lora_dropout = 0.05,
    bias = 'none',
    task_type = TaskType.CAUSAL_LM
)

model = get_peft_model(model, lora_config)

In [None]:
data = load_dataset('json', data_files='/kaggle/working/sample_1.jsonl')['train']

In [None]:
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token  # safe for causal models

# Your tokenize function
def tokenize(batch):
    texts = [
        f"### Instruction:\n{inst}\n### Response:\n{out}"
        for inst, out in zip(batch['instruction'], batch['response'])
    ]

    tokens = tokenizer(
        texts,
        padding='max_length',
        truncation=True,
        max_length=256,
        return_tensors='pt',
    )

    tokens['labels'] = tokens['input_ids'].clone()

    return tokens

In [None]:
tokenized_data = data.map(tokenize, batched=True, remove_columns=data.column_names)


In [None]:
training_args = TrainingArguments(
    output_dir = './llama3.2-lora-tuned-query',
    per_device_train_batch_size = 1,                # reduce batch size
    gradient_accumulation_steps = 16,               # increase to simulate larger batch
    learning_rate = 5e-4,                           # slightly lower LR for stability
    num_train_epochs = 10,                          # fewer epochs to reduce load
    fp16 = True,                                    # keep fp16 to save memory
    logging_steps = 10,
    save_strategy = 'epoch',
    report_to = 'none',
    remove_unused_columns = False,
    label_names = ["labels"],
    save_total_limit = 2                            # limit saved checkpoints
)

In [None]:
trainer = Trainer(
    model = model,
    args = training_args,
    train_dataset = tokenized_data,
    processing_class = tokenizer
)

In [None]:

trainer.train()


In [None]:
model.save_pretrained("./llama3.2-lora-tuned-adapter-query")
tokenizer.save_pretrained("./llama3.2-lora-tuned-adapter-query")

In [None]:
"""
now that we have trained and save adapter, we'll be using these in next nb on top of base model
"""