In [None]:
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, DataCollatorForSeq2Seq
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from transformers import BitsAndBytesConfig

# Load dataset
dataset = load_dataset("json", data_files="train_data.jsonl")

# Use a small base model
model_name = "mistralai/Mistral-7B-Instruct-v0.2"  # Or TinyLlama if < 4GB
bnb_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype="float16")

tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=bnb_config, device_map="auto")
model = prepare_model_for_kbit_training(model)

# Apply LoRA
lora_config = LoraConfig(r=8, lora_alpha=16, target_modules=["q_proj", "v_proj"], lora_dropout=0.05, bias="none", task_type="CAUSAL_LM")
model = get_peft_model(model, lora_config)

# Tokenize
def tokenize(sample):
    prompt = f"### Question:\n{sample['instruction']}\n\n### Answer:\n{sample['output']}"
    return tokenizer(prompt, truncation=True, padding="max_length", max_length=512)

tokenized = dataset["train"].map(tokenize)

# Train
training_args = TrainingArguments(
    output_dir="./lora_qa_model",
    per_device_train_batch_size=4,
    gradient_accumulation_steps=2,
    logging_steps=10,
    num_train_epochs=3,
    save_total_limit=2,
    learning_rate=2e-4,
    fp16=True,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized,
    tokenizer=tokenizer,
    data_collator=DataCollatorForSeq2Seq(tokenizer, model=model, padding=True),
)

trainer.train()
model.save_pretrained("./lora_qa_model")
tokenizer.save_pretrained("./lora_qa_model")
