# Preparing the environment and installing libraries:

In [55]:
!nvidia-smi

Fri Feb 28 18:53:01 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 570.124.04             Driver Version: 570.124.04     CUDA Version: 12.8     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA RTX 4000 Ada Gene...    Off |   00000000:01:00.0 Off |                  Off |
| 30%   29C    P8             11W /  130W |    1889MiB /  20475MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [None]:
%pip install transformers datasets bitsandbytes peft torch -q
%pip install rouge-score -q
%pip install blobfile tiktoken
%pip install --no-cache-dir --upgrade protobuf

In [None]:
%pip uninstall -y tiktoken
%pip install --no-cache-dir --upgrade tiktoken

In [None]:
%pip uninstall -y transformers sentencepiece
%pip install --no-cache-dir --upgrade transformers sentencepiece


In [56]:
import torch
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

In [57]:
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from transformers import TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model, TaskType

# Model Fine-tuning

### 1. Prepare the data

After the preparation, the dataset looks like this:
```json
{
  "title": "Document title...",
  "content": "Original document text...",
  "summary": "Generated summary..."
}
```
And it's split into `train`, `validation`, and `test` sets.

In [58]:
dataset = load_dataset("json", data_files={"train": "data1/"+"train.json", "val": "data1/"+"val.json", "test": "data1/"+"test.json"})

# print(dataset)

# 2. Custom Quantization

In [59]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
)

# 3. Model Definition

There are several models interesting to fine-tune for this task. We will use the `transformers` library to fine-tune a model from the list.

In [60]:
models = {
    "mT5-Base": "google/mt5-base", # 580M params
    "Qwen2.5-0.5B": "Qwen/Qwen2.5-0.5B",
}

model_name = models["Qwen2.5-0.5B"]

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    quantization_config=bnb_config
)
# print(f"Loaded model: {model_name}\n", model)
# print(model.config)

tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token
# print(f"Loaded Tokenizer: \n", tokenizer)

Now we will use the tokenizer to encode the input text.

In [70]:
def preprocess_function(examples):
    inputs = [doc for doc in examples["content"]]
    targets = [doc for doc in examples["summary"]]
    
    model_inputs = tokenizer(inputs, max_length=512, truncation=True, padding="max_length")
    labels = tokenizer(targets, max_length=512, truncation=True, padding="max_length")

    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

tokenized_datasets = dataset.map(preprocess_function, batched=True)

Map: 100%|██████████| 82/82 [00:00<00:00, 1341.04 examples/s]
Map: 100%|██████████| 13/13 [00:00<00:00, 612.73 examples/s]
Map: 100%|██████████| 23/23 [00:00<00:00, 645.77 examples/s]


In [71]:
print(tokenized_datasets['train'][1])

{'title': 'Larry Gelbart (Part 1)', 'content': "Larry Gelbart est un producteur, réalisateur et scénariste américain, né le 25 février 1928 à Chicago, dans l'Illinois (États-Unis) et décédé le 11 septembre 2009.\n\n\n== Filmographie ==\n\n\n=== comme scénariste ===\n1950 : Your Show of Shows (en) (série télévisée)\n1954 : Caesar's Hour (en) (série télévisée)\n1960 : Hooray for Love (TV)\n1962 : L'Inquiétante dame en noir (The Notorious Landlady)\n1963 : Judy and Her Guests, Phil Silvers and Robert Goulet (TV)\n1966 : Un mort en pleine forme (The Wrong Box)\n1966 : Deux Minets pour Juliette ! (Not with My Wife, You Don't!) de Norman Panama\n1968 : La Ceinture de chasteté (La cintura di castità)\n1969 : Un couple pas ordinaire (Ruba al prossimo tuo)\n1977 : Bon Dieu !  (Oh, God!), de Carl Reiner\n1978 : Movie Movie\n1980 : Le lion sort ses griffes (Rough Cut)\n1981 : Les Voisins (Neighbors)\n1982 : Tootsie\n1984 : C'est la faute à Rio (Blame It on Rio)\n1997 : Weapons of Mass Distraction

We will use Lora to fine-tune the model efficiently without having to adjust all the parameters.

In [61]:
# Extract target_modules
target_modules = [
    'gate_proj',
    'up_proj',
    'down_proj',
]

config = LoraConfig(
    r=32,
    lora_alpha=64,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=target_modules,
)


model = get_peft_model(model, config)
model.print_trainable_parameters()

trainable params: 13,271,040 || all params: 507,303,808 || trainable%: 2.6160


In [62]:
generation_config = model.generation_config
generation_config.max_new_tokens = 200
generation_config.temperature = 0.7
generation_config.top_p = 0.7
generation_config.num_return_sequences = 1
generation_config.pad_token_id = tokenizer.eos_token_id
generation_config.eos_token_id = tokenizer.eos_token_id
generation_config.do_sample = True

In [72]:
training_args = TrainingArguments(
    per_device_train_batch_size=1,
    gradient_accumulation_steps=16,
    num_train_epochs=1,
    learning_rate=2e-4,
    bf16=True,
    save_total_limit=2,
    logging_steps=20,
    output_dir="./results",
    max_steps=5,
    optim="paged_adamw_8bit",
    lr_scheduler_type="cosine",
    warmup_ratio=0.05,
)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [73]:
from transformers import DataCollatorForSeq2Seq, Seq2SeqTrainer


trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["val"],
    tokenizer=tokenizer,
    data_collator=DataCollatorForSeq2Seq(tokenizer=tokenizer, model=model),
)

model.config.use_cache = False

trainer.train()

  trainer = Seq2SeqTrainer(
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


AttributeError: 'TrainingArguments' object has no attribute 'generation_config'

# 3. Save the fine-tuned model

In [None]:
model.save_pretrained("./fine_tuned_model")
tokenizer.save_pretrained("./fine_tuned_model")

# 4. Test the fine-tuned model

In [None]:
def summarize(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
    summary_ids = model.generate(**inputs, max_length=128)
    return tokenizer.decode(summary_ids[0], skip_special_tokens=True)

# Example
test_text = "La vie est insensée, mais elle ne l'est pas moins que la mort." # By D. Brahim et AutoCompletion
print("Summary:", summarize(test_text))

# 5. Evaluate the model

### 5.1 ROUGE Score

In [None]:
rouge = load_metric("rouge")

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
    result = rouge.compute(predictions=decoded_preds, references=decoded_labels)
    return result

results = compute_metrics(trainer.predict(tokenized_dataset["test"]))
print(results)