# LoRA Fine-Tuning Notebook

Goal of this notebook: Guide you through an end-to-end process of fine-tuning a Large Language Model with LoRA Adapters. It assume a basic understanding of the method.

In [1]:
# import necessary python libraries
from peft import LoraConfig, get_peft_model, PeftModel
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, DataCollatorForLanguageModeling, pipeline
from torch.utils.data import Dataset
import torch
import pickle
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from rouge_score import rouge_scorer
import time
import math
from datasets import load_dataset
from sklearn.model_selection import train_test_split

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# parameters

# files
chunk_filename_pkl = "data/kahneman_chunks.pkl"

# model
model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
#model_name = "deepseek-ai/deepseek-llm-7b-base"

# LoRA config
r = 16
lora_alpha = 32
lora_dropout = 0.1
target_modules=["q_proj", "v_proj"] # you could add more. This depends on your models weights matrices
bias="none"
task_type="CAUSAL_LM"

# LoRA training

output_dir="./lora_finetuned_model/deepseek-llm-7b-base" # path for saving the fine tuned model
per_device_train_batch_size=4 # batch size for each device (e.g. GPU)
gradient_accumulation_steps=4 # how many forward passes to accumulate before running a backward pass. simluates a larger batch size
learning_rate=2e-4 
num_train_epochs=3 # number of training epochs
save_strategy="epoch" # when the model is saved
fp16=True #this and the following command ensure better numerical stability
bf16=False
logging_steps=10 # after how many iterations each logging is reported
report_to="none"  # whether to log to external services like WandB

In [3]:
# Load the tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=model_name)
original_model = AutoModelForCausalLM.from_pretrained(pretrained_model_name_or_path=model_name)

In [4]:
with open(chunk_filename_pkl, 'rb') as f:
    kahneman_paragraphs = pickle.load(f)

In [None]:
# train (80%) / test (20%) split
train_texts, test_texts = train_test_split(kahneman_paragraphs, test_size=0.2, random_state=42)

print(f"Training set size: {len(train_texts)} paragraphs")
print(f"Test set size: {len(test_texts)} paragraphs")

#Thoughts: is random splitting the best idea, as paragraphs are not independent
# + we loose some paragraphs for training, when splitting the data. We cannot use these paragraphs for compressing the data into the LLM

In [None]:
# Define LoRA configuration
lora_config = LoraConfig(
    r=16,  # Rank: Controls adaptation capacity
    lora_alpha=lora_alpha,  # Scaling factor
    lora_dropout=lora_dropout,  # Dropout probability
    target_modules=target_modules,  # Target attention layers
    bias=bias,
    task_type=task_type,
)

# Apply LoRA to the model
model = get_peft_model(original_model, lora_config)
model.print_trainable_parameters()  # Verify trainable params

In [None]:
""" # Define dataset class
class TextDataset(Dataset):
    def __init__(self, texts, tokenizer, max_length=512):
        self.tokenizer = tokenizer
        self.input_ids = []
        
        for text in texts:
            encoded = tokenizer(text, truncation=True, padding="max_length", max_length=max_length, return_tensors="pt")
            self.input_ids.append(encoded.input_ids.squeeze(0))

    def __len__(self):
        return len(self.input_ids)

    def __getitem__(self, idx):
        return {"input_ids": self.input_ids[idx], "labels": self.input_ids[idx]} """

class TextDataset(Dataset):
    def __init__(self, texts, tokenizer, max_length=512):
        self.examples = []

        for text in texts:
            encoding = tokenizer(
                text,
                truncation=True,
                padding="max_length",
                max_length=max_length,
                return_tensors="pt"
            )

            input_ids = encoding["input_ids"].squeeze()
            attention_mask = encoding["attention_mask"].squeeze()

            # Set labels the same as input_ids, but ignore padding with -100
            labels = input_ids.clone()
            labels[labels == tokenizer.pad_token_id] = -100

            self.examples.append({
                "input_ids": input_ids,
                "attention_mask": attention_mask,
                "labels": labels
            })

    def __len__(self):
        return len(self.examples)

    def __getitem__(self, idx):
        return self.examples[idx]


# Create dataset
train_dataset = TextDataset(train_texts, tokenizer)
test_dataset = TextDataset(test_texts, tokenizer)

In [None]:
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)  # Causal LM, not masked LM

In [None]:
# Training arguments
training_args = TrainingArguments(
    output_dir=output_dir,
    per_device_train_batch_size=per_device_train_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    learning_rate=learning_rate,
    num_train_epochs=num_train_epochs,
    save_strategy=save_strategy,
    fp16=fp16, #this and the following command ensure better numerical stability
    bf16=bf16,
    logging_steps=logging_steps,
    report_to=report_to  # Disable logging to external services like WandB
)

In [None]:
# Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    data_collator=data_collator,
)

In [None]:
# Record the start time
start_time = time.time()

# Train the model
trainer.train()

# Record the end time
end_time = time.time()

# Calculate the elapsed time
elapsed_time = (end_time - start_time)/60

# Print the training duration in seconds
print(f"Training took {elapsed_time:.2f} minutes.")

In [None]:
# Save the LoRA fine-tuned model
model.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)

In [None]:
# Evaluate loss
trainer = Trainer(model=model)
eval_results = trainer.evaluate(test_dataset)
perplexity = math.exp(eval_results["eval_loss"])
print(f"Perplexity: {perplexity:.2f}")

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
# Load fresh base model
original_model_clean = AutoModelForCausalLM.from_pretrained(model_name).to(device)

trainer_orig = Trainer(
    model=original_model_clean,
    args=training_args,
    data_collator=data_collator,
    eval_dataset=test_dataset,
)

eval_results_orig = trainer_orig.evaluate()
orig_perplexity = math.exp(eval_results_orig["eval_loss"])
print(f"Original Model Perplexity: {orig_perplexity:.2f}")

Model perplexity decreased :)