# Fine-Tuning Llama 2 for Summarization with QLoRA

In [None]:

# Install necessary libraries
!pip install transformers accelerate datasets bitsandbytes loralib


## Load MLSUM Dataset

In [None]:

from datasets import load_dataset

# Load the MLSUM dataset for all languages
dataset = load_dataset("mlsum", split="train")
print(dataset[0])  # Inspect the dataset structure

# Add language prefix to input text
def add_language_prefix(examples):
    lang = examples["lang"]  # Assuming the dataset has a "lang" column
    prefix = f"{lang}: "
    examples["text"] = prefix + examples["text"]
    return examples

dataset = dataset.map(add_language_prefix)


## Preprocess Data for Llama 2

In [None]:

from transformers import AutoTokenizer

# Use the Llama 2 tokenizer
model_name = "meta-llama/Llama-2-7b-hf"  # Choose a smaller model for Colab's free tier
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Preprocess data
def preprocess_data(examples):
    inputs = examples["text"]
    summaries = examples["summary"]
    model_inputs = tokenizer(inputs, max_length=512, truncation=True, padding="max_length")
    with tokenizer.as_target_tokenizer():
        labels = tokenizer(summaries, max_length=128, truncation=True, padding="max_length")
    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

tokenized_dataset = dataset.map(preprocess_data, batched=True, remove_columns=dataset.column_names)


## Setup QLoRA Fine-Tuning

In [None]:

from transformers import AutoModelForCausalLM, BitsAndBytesConfig

# Configure 4-bit quantization for QLoRA
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype="float16",
)

# Load the model in 4-bit mode
model = AutoModelForCausalLM.from_pretrained(
    model_name, 
    quantization_config=bnb_config,
    device_map="auto"
)

# Add LoRA layers
from peft import LoraConfig, get_peft_model

lora_config = LoraConfig(
    r=16, lora_alpha=32, target_modules=["q_proj", "v_proj"], lora_dropout=0.1, bias="none", task_type="SEQ_2_SEQ_LM"
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()  # Verify LoRA layers are trainable


## Define Training Arguments

In [None]:

from transformers import Seq2SeqTrainer, Seq2SeqTrainingArguments

training_args = Seq2SeqTrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=4,  # Adjust batch size for Colab limits
    per_device_eval_batch_size=4,
    weight_decay=0.01,
    save_total_limit=3,
    num_train_epochs=3,
    predict_with_generate=True,
    fp16=True,  # Enable mixed precision
    logging_dir='./logs',
    logging_steps=10,
)

# Split dataset into training and validation
split = tokenized_dataset.train_test_split(test_size=0.1, seed=42)
train_dataset = split["train"]
eval_dataset = split["test"]


## Train the Model

In [None]:

trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    tokenizer=tokenizer,
)

trainer.train()


## Evaluate the Model on German Data

In [None]:

# Filter evaluation dataset for German examples
german_eval_dataset = eval_dataset.filter(lambda x: x["text"].startswith("de:"))

# Evaluate on German data
results = trainer.evaluate(german_eval_dataset)
print("German evaluation results:", results)

# Test a few examples
from transformers import pipeline

summarizer = pipeline("summarization", model=model, tokenizer=tokenizer, device=0)
test_text = "de: Hier ist ein langer deutscher Text, der zusammengefasst werden soll."
print(summarizer(test_text, max_length=50, min_length=10))
