# Task 3 — LoRA Notes Formatter (Salesforce)
Notebook to prepare a LoRA adapter that helps format raw meeting notes into Salesforce best-practice notes.


In [None]:
# Import necessary libraries
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments
from peft import get_peft_model, LoraConfig
from datasets import load_dataset
import json

# Load the dataset
with open("train.jsonl") as f:
    data = [json.loads(line) for line in f]

# Prepare the data for training
train_data = [{"input_text": example["input"], "output_text": example["output"]} for example in data]

# Initialize tokenizer and model (Mistral model or a smaller variant)
model_name = "mistral-small-latest"  # or "open-mistral-7b"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

# Create LoRA configuration
lora_config = LoraConfig(
    r=8,  # rank of the low-rank matrix, which controls the extent of adaptation
    lora_alpha=16,  # scaling factor for LoRA
    lora_dropout=0.1,  # dropout to prevent overfitting
    task_type="CAUSAL_LM"
)

# Apply LoRA to the model
model = get_peft_model(model, lora_config)

# Tokenize the dataset (we use the input_text and output_text columns)
def tokenize_function(examples):
    return tokenizer(examples["input_text"], examples["output_text"], truncation=True, padding="max_length", max_length=512)

train_dataset = train_data.map(tokenize_function, batched=True)

# Create the Trainer object for fine-tuning
training_args = TrainingArguments(
    output_dir="./results", 
    evaluation_strategy="epoch", 
    learning_rate=2e-5, 
    per_device_train_batch_size=4, 
    per_device_eval_batch_size=4, 
    num_train_epochs=1
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
)

# Start training (this will run the fine-tuning process)
trainer.train()


In [None]:
import json
import os
from pathlib import Path
from datasets import Dataset, load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, DataCollatorForSeq2Seq
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training, PeftModel
import torch

# Notebook config
BASE_MODEL = "mistral-small-latest"  # <<-- replace (e.g., "mistral-small" or a Llama variant)
OUTPUT_DIR = "./lora_adapter"
os.makedirs(OUTPUT_DIR, exist_ok=True)
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print("Device:", DEVICE)


In [1]:
BASE_MODEL = "mistralai/Mistral-7B-Instruct-v0.1"  # Or any other appropriate Mistral model


In [2]:
from datasets import load_dataset

# Load your train.jsonl dataset
ds = load_dataset("json", data_files="train.jsonl")["train"]


In [8]:
ds.head()

AttributeError: 'DatasetDict' object has no attribute 'head'

In [4]:
def build_prompt(example):
    return {
        "text_input": f"Format into Salesforce notes:\n\n{example['input']}\n\nFormatted:",
        "text_target": example["output"]
    }

# Apply the formatting
ds = ds.map(build_prompt)

# Split dataset into training and testing
ds = ds.train_test_split(test_size=0.2)


In [7]:
ds

DatasetDict({
    train: Dataset({
        features: ['input', 'output', 'text_input', 'text_target'],
        num_rows: 8
    })
    test: Dataset({
        features: ['input', 'output', 'text_input', 'text_target'],
        num_rows: 2
    })
})

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

# Load tokenizer and model in 4-bit quantization
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
if tokenizer.pad_token is None:
    tokenizer.add_special_tokens({"pad_token": "<PAD>"})

model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    load_in_4bit=True,         # Load model in 4-bit for reduced VRAM
    device_map="auto",         # Automatically place model on the appropriate device
    torch_dtype=torch.float16  # Use float16 precision for reduced memory
)

# Prepare model for LoRA training
model = prepare_model_for_kbit_training(model)


In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
if tokenizer.pad_token is None:
    tokenizer.add_special_tokens({"pad_token": "<PAD>"})

# Use BitsAndBytesConfig for 4-bit quantization
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,      # Enable 4-bit quantization
    bnb_4bit_compute_dtype=torch.float16,  # Use float16 for efficiency
    llm_int8_enable_fp32_cpu_offload=True  # Enable offloading to CPU in FP32
)

# Load the smaller model with quantization config
model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    quantization_config=quantization_config,
    device_map="auto",  # Automatically distribute model to GPU/CPU
    torch_dtype=torch.float16  # Use float16 precision
)

# Prepare model for LoRA training
model = prepare_model_for_kbit_training(model)


In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
if tokenizer.pad_token is None:
    tokenizer.add_special_tokens({"pad_token": "<PAD>"})

# Use BitsAndBytesConfig for 4-bit quantization
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,      # Enable 4-bit quantization
    bnb_4bit_compute_dtype=torch.float16,  # Use float16 for efficiency
    llm_int8_enable_fp32_cpu_offload=True  # Enable offloading to CPU in FP32
)

# Load the smaller model with quantization config
model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    quantization_config=quantization_config,
    device_map="auto",  # Automatically distribute model to GPU/CPU
    torch_dtype=torch.float16  # Use float16 precision
)

# Enable gradient checkpointing AFTER loading the model
model.gradient_checkpointing_enable()  # This enables gradient checkpointing

# Prepare the model for LoRA training
model = prepare_model_for_kbit_training(model)


In [None]:
peft_config = LoraConfig(
    r=4,                         # low rank → lighter
    lora_alpha=16,
    target_modules=["q_proj","v_proj"],  # attention layers
    lora_dropout=0.1,
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, peft_config)


In [None]:
def tokenize_fn(batch):
    model_inputs = tokenizer(batch["text_input"], truncation=True, max_length=512)
    labels = tokenizer(batch["text_target"], truncation=True, max_length=512).input_ids
    model_inputs["labels"] = labels
    return model_inputs

tok_ds = ds.map(tokenize_fn, batched=True, remove_columns=ds.column_names)


In [None]:
from transformers import TrainingArguments, Trainer, DataCollatorForSeq2Seq

args = TrainingArguments(
    output_dir="./mistral_lora",
    per_device_train_batch_size=1,   # tiny for VRAM
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=4,   # effective batch size=4
    num_train_epochs=1,              # demo run
    learning_rate=2e-4,
    fp16=True,
    logging_steps=5,
    save_strategy="no"
)

collator = DataCollatorForSeq2Seq(tokenizer, pad_to_multiple_of=8, return_tensors="pt")

trainer = Trainer(
    model=model,
    args=args,
    train_dataset=tok_ds["train"],
    eval_dataset=tok_ds["test"],
    data_collator=collator
)

trainer.train()


In [None]:
model.save_pretrained("./mistral_lora")
tokenizer.save_pretrained("./mistral_lora")


In [None]:
from peft import PeftModel

prompt = """Format into Salesforce notes:

RetailAxis – Merchandising Lead Sarah Gold: 
Struggling with inventory sync. Wants cloud migration plan. Will send inventory reports.

Formatted:"""

inputs = tokenizer(prompt, return_tensors="pt").to("cuda")

# Load base model (no LoRA)
base = AutoModelForCausalLM.from_pretrained(BASE_MODEL, load_in_4bit=True, device_map="auto")
out1 = base.generate(**inputs, max_new_tokens=256)
print("BASE:\n", tokenizer.decode(out1[0], skip_special_tokens=True))

# Load LoRA fine-tuned model
lora = PeftModel.from_pretrained(base, "./mistral_lora")
out2 = lora.generate(**inputs, max_new_tokens=256)
print("LoRA:\n", tokenizer.decode(out2[0], skip_special_tokens=True))
