## Model Fine-tuning

!pip install transformers datasets accelerate bitsandbytes peft -q
!pip install rouge-score sacrebleu -q

In [None]:
from datasets import load_dataset
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from transformers import TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model, TaskType
from datasets import load_metric

import torch
from bitsandbytes import quantize

In [None]:
device = torch.device("cuda")

The dataset looks like this:
```json
{
  "text": "Original document text...",
  "summary": "Generated summary..."
}
```

In [None]:
dataset = load_dataset("json", data_files={"train": "data/"+"train.json", "test": "data/"+"test.json"})

print(dataset)

There are several models interesting to fine-tune for this task. We will use the `transformers` library to fine-tune a model from the list.

In [None]:
models = {
    "mT5-Base": "google/mt5-base", # 580M params
    "Qwen2.5-0.5B": "Qwen/Qwen2.5-0.5B",
    "Mistral-7B (subset possible)": "mistralai/Mistral-7B",
    "Gemma-2B": "google/gemma-2b",
    "TinyLlama-1.1B": "TinyLlama/TinyLlama-1.1B",
    "Atlas-Chat-2B": "Atlas/Atlas-Chat-2B",
}

model_name = models["mT5-Base"]

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(
    model_name, 
    load_in_4bit=True,
    device_map="auto"
)
model.to(device)
print(f"Loaded model: {model_name}\n", model)

Now we will use the tokenizer to encode the input text.

In [None]:
def preprocess_function(examples):
    inputs = tokenizer(examples["text"], truncation=True, padding="max_length", max_length=512)
    labels = tokenizer(examples["summary"], truncation=True, padding="max_length", max_length=128)
    inputs["labels"] = labels["input_ids"]
    return inputs

tokenized_dataset = dataset.map(preprocess_function, batched=True)

We will use Lora to fine-tune the model efficiently without having to adjust all the parameters.

In [None]:
config = LoraConfig(
    task_type=TaskType.SEQ_2_SEQ_LM,
    r=16, 
    lora_alpha=32,
    lora_dropout=0.1,
    bias="none"
)

model = get_peft_model(model, config)
model.print_trainable_parameters()


In [None]:
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    learning_rate=5e-5,
    weight_decay=0.01,
    save_total_limit=2,
    num_train_epochs=3,
    report_to="none",
    fp16=True
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["test"],
    tokenizer=tokenizer
)

trainer.train()

# 3. Save the fine-tuned model

In [None]:
model.save_pretrained("./fine_tuned_model")
tokenizer.save_pretrained("./fine_tuned_model")

# 4. Test the fine-tuned model

In [None]:
def summarize(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
    summary_ids = model.generate(**inputs, max_length=128)
    return tokenizer.decode(summary_ids[0], skip_special_tokens=True)

# Example
test_text = "La vie est insensée, mais elle ne l'est pas moins que la mort." # By D. Brahim et AutoCompletion
print("Summary:", summarize(test_text))

# 5. Evaluate the model

### 5.1 ROUGE Score

In [None]:
rouge = load_metric("rouge")

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
    result = rouge.compute(predictions=decoded_preds, references=decoded_labels)
    return result

results = compute_metrics(trainer.predict(tokenized_dataset["test"]))
print(results)