In [None]:
# Mistral-7B-Instruct + LoRA Fine-Tuning on MacBook (Apple Silicon)

# ✅ Step 1: Install dependencies (run in terminal, not notebook)
# !pip install transformers datasets peft accelerate

from datasets import load_dataset
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments,
    Trainer
)
from peft import get_peft_model, LoraConfig, TaskType
import torch
import os

# ----------------- CONFIG -----------------
model_id = "mistralai/Mistral-7B-Instruct-v0.2"
data_path = "legal_instruct_dataset.json"
output_dir = "./legal-mistral-lora"
max_length = 1024
batch_size = 1

# ----------------- Load tokenizer and model -----------------
print("Loading tokenizer and model...")
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16,
    device_map="auto"
)

# ----------------- Load dataset -----------------
dataset = load_dataset("json", data_files=data_path)["train"]

def tokenize(example):
    prompt = f"<s>[INST] {example['prompt']} [/INST] {example['response']} </s>"
    tokens = tokenizer(prompt, padding="max_length", truncation=True, max_length=max_length)
    tokens["labels"] = tokens["input_ids"].copy()
    return tokens

dataset = dataset.map(tokenize, remove_columns=["prompt", "response"])

# ----------------- Apply LoRA -----------------
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.1,
    bias="none",
    task_type=TaskType.CAUSAL_LM,
)
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

# ----------------- TrainingArguments -----------------
training_args = TrainingArguments(
    output_dir=output_dir,
    per_device_train_batch_size=batch_size,
    gradient_accumulation_steps=4,
    num_train_epochs=3,
    logging_steps=10,
    save_steps=100,
    fp16=False,
    bf16=True,  
    report_to="none",
    save_total_limit=2,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset,
    tokenizer=tokenizer,
)

# ----------------- Train -----------------
trainer.train()

# ----------------- Save Model -----------------
model.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)
print(f"\n✅ LoRA fine-tuned model saved to: {output_dir}")


In [None]:
trainer.save_model(output_dir)  # Already being done through model.save_pretrained()
trainer.state.save_to_json(os.path.join(output_dir, "trainer_state.json"))


In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from peft import PeftModel
import torch

# ----------------- CONFIG -----------------
base_model_id = "mistralai/Mistral-7B-Instruct-v0.2"
lora_model_path = "./legal-mistral-lora"

# ----------------- Load tokenizer -----------------
tokenizer = AutoTokenizer.from_pretrained(lora_model_path)

# ----------------- Load base model -----------------
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_id,
    torch_dtype=torch.bfloat16,
    device_map="auto"
)

# ----------------- Load fine-tuned LoRA weights -----------------
model = PeftModel.from_pretrained(base_model, lora_model_path)
model.eval()

# ----------------- Generate Response -----------------
def generate(prompt, max_new_tokens=300):
    input_text = f"<s>[INST] {prompt.strip()} [/INST]"
    inputs = tokenizer(input_text, return_tensors="pt").to("mps")  # Or "cuda" if on GPU
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            do_sample=True,
            temperature=0.7,
            top_p=0.9
        )
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# ----------------- Test -----------------
response = generate("What are the legal consequences of fraud?")
print("\n📜 Generated Response:\n", response)


In [None]:
from peft import PeftModel, PeftConfig
from transformers import AutoTokenizer, AutoModelForCausalLM

# Load config from LoRA folder
config = PeftConfig.from_pretrained("./legal-mistral-lora")

# Load base model using same config
base_model = AutoModelForCausalLM.from_pretrained(
    config.base_model_name_or_path,
    torch_dtype=torch.bfloat16,
    device_map="auto"
)

# Attach LoRA weights
model = PeftModel.from_pretrained(base_model, "./legal-mistral-lora")
