# SFT Training on IMDB with GPT-2-Large (NVIDIA GPU)

**Complete self-contained notebook for Supervised Fine-Tuning**
- Dataset: IMDB (sentiment reviews)
- Model: GPT-2-Large (1.3B parameters)
- Hardware: NVIDIA GPU (CUDA)
- Logging: Weights & Biases

This notebook runs end-to-end without external dependencies.

## Setup: Install Dependencies

In [None]:
# Installation pour Colab (Python 3.12+)
!pip install --upgrade pip setuptools wheel -q
!pip install transformers[torch] datasets trl wandb -q

# V√©rification
import torch
import transformers
print("‚úÖ Installation r√©ussie!")
print(f"PyTorch: {torch.__version__}")
print(f"Transformers: {transformers.__version__}")
print(f"CUDA: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"Device: {torch.cuda.get_device_name(0)}")
    print(f"Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")

## 1. Import Dependencies

In [None]:
# Mount Google Drive pour √©conomiser temps et quota
import os

try:
    from google.colab import drive
    drive.mount('/content/drive')
    SAVE_BASE_PATH = '/content/drive/MyDrive/dpo_ppo_training'
    os.makedirs(SAVE_BASE_PATH, exist_ok=True)
    print(f"‚úÖ Google Drive mont√©. Mod√®les sauvegard√©s sur: {SAVE_BASE_PATH}")
    USE_DRIVE = True
except ImportError:
    # Pas sur Colab
    SAVE_BASE_PATH = './results'
    USE_DRIVE = False
    print(f"‚ö†Ô∏è  Pas de Google Drive d√©tect√©. Stockage local: {SAVE_BASE_PATH}")

## 0. Mount Google Drive (Optional - for Colab/Kaggle)

In [None]:
import torch
import numpy as np
import wandb
from datetime import datetime
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments
from trl import SFTTrainer

print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA device: {torch.cuda.get_device_name(0)}")
    print(f"CUDA memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")

## 2. Enable Mixed Precision & Load Dataset

In [None]:
# Enable TF32 for faster computation on Ampere+ GPUs
if torch.cuda.is_available():
    torch.backends.cuda.matmul.allow_tf32 = True
    print("‚úÖ TF32 enabled for faster NVIDIA computation")

# Load IMDB dataset
print("Loading IMDB dataset...")
dataset = load_dataset("imdb")
train_dataset = dataset["train"]

print(f"‚úÖ Loaded {len(train_dataset)} training examples")
print(f"Sample: {train_dataset[0]['text'][:200]}...")

## 3. Analyze Sequence Lengths (Sample 100 examples)

In [None]:
# Load tokenizer
print("Loading tokenizer...")
model_name = "gpt2-large"
tokenizer = AutoTokenizer.from_pretrained(model_name)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# Sample 100 random examples and analyze token lengths
sample_indices = np.random.choice(len(train_dataset), min(100, len(train_dataset)), replace=False)
sample_dataset = train_dataset.select(sample_indices)

print("Analyzing token lengths in 100 sampled examples...")
token_lengths = []
for example in sample_dataset:
    text = example["text"]
    tokens = tokenizer(text, truncation=False, return_tensors=None)
    token_lengths.append(len(tokens["input_ids"]))

token_lengths = np.array(token_lengths)

print(f"\n{'='*60}")
print(f"Token Length Statistics (n=100)")
print(f"{'='*60}")
print(f"Min:     {token_lengths.min()} tokens")
print(f"Max:     {token_lengths.max()} tokens")
print(f"Mean:    {token_lengths.mean():.1f} tokens")
print(f"Median:  {np.median(token_lengths):.1f} tokens")
print(f"P95:     {np.percentile(token_lengths, 95):.1f} tokens")
print(f"P99:     {np.percentile(token_lengths, 99):.1f} tokens")
print(f"{'='*60}\n")

# Recommendation
max_seq_length = int(np.percentile(token_lengths, 95))
print(f"üí° Recommended max_seq_length (covers 95%): {max_seq_length}")
print(f"   Using: 512 (standard choice for GPT-2-Large on NVIDIA GPU)")

## 4. Configure Training Parameters (NVIDIA GPU Optimized)

In [None]:
# Configuration optimized for NVIDIA GPU
model_name = "gpt2-large"
output_dir = f"{SAVE_BASE_PATH}/sft_model"
max_seq_length = 512

# Training hyperparameters (adjust based on your GPU memory)
# For 16GB GPU: batch_size=8-16, gradient_accumulation=1
# For 24GB GPU: batch_size=16-24, gradient_accumulation=1
# For 32GB+ GPU: batch_size=32+, gradient_accumulation=1

batch_size = 16  # Adjust based on your GPU VRAM
num_epochs = 1
learning_rate = 1e-5
warmup_steps = 100
logging_steps = 100
save_steps = 100  # Save checkpoint every 100 steps (changed from 500)
dataloader_num_workers = 4

print(f"{'='*60}")
print(f"Training Configuration")
print(f"{'='*60}")
print(f"Model: {model_name}")
print(f"Dataset: IMDB ({len(train_dataset)} examples)")
print(f"Batch size: {batch_size}")
print(f"Max sequence length: {max_seq_length}")
print(f"Epochs: {num_epochs}")
print(f"Learning rate: {learning_rate}")
print(f"DataLoader workers: {dataloader_num_workers}")
print(f"Mixed precision: fp16 (enabled for NVIDIA GPU)")
print(f"TF32: enabled (faster on Ampere+ GPUs)")
print(f"Save path: {output_dir}")
print(f"Checkpoint every: {save_steps} steps")
print(f"{'='*60}\n")

## 5. Initialize Weights & Biases Logging

In [None]:
# Auto-optimize batch size based on GPU memory
import torch

if torch.cuda.is_available():
    gpu_memory_gb = torch.cuda.get_device_properties(0).total_memory / 1e9
    print(f"GPU Memory Available: {gpu_memory_gb:.1f} GB")
    
    # Adjust batch_size based on available memory
    if gpu_memory_gb < 8:
        print("‚ö†Ô∏è  Low GPU memory detected! Reducing batch size...")
        batch_size = 4
        gradient_accumulation_steps = 4  # Compensate with gradient accumulation
    elif gpu_memory_gb < 16:
        batch_size = 8
        gradient_accumulation_steps = 2
    else:
        batch_size = 16
        gradient_accumulation_steps = 1
    
    print(f"‚úÖ Optimized batch_size: {batch_size}")
    print(f"‚úÖ Gradient accumulation steps: {gradient_accumulation_steps}")
else:
    gradient_accumulation_steps = 1

# Function to clear GPU cache (run before training)
def clear_gpu_cache():
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        print("‚úÖ GPU cache cleared")

# Clear now
clear_gpu_cache()

## 4b. Optimize Memory Usage (Auto-adjust batch size)

In [None]:
# Login to W&B
wandb.login()

# Initialize W&B run
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
run_name = f"sft_imdb_{timestamp}"

wandb.init(
    project="dpo_ppo",
    name=run_name,
    config={
        "model": model_name,
        "dataset": "imdb",
        "batch_size": batch_size,
        "learning_rate": learning_rate,
        "num_epochs": num_epochs,
        "max_seq_length": max_seq_length,
        "device": "cuda",
    }
)

print(f"‚úÖ W&B initialized: {run_name}")

## 6. Load Model & Configure Training

In [None]:
# Load model (from Drive if already trained, otherwise from HF)
print(f"Loading model: {model_name}")

model_on_drive = f"{SAVE_BASE_PATH}/sft_model/pytorch_model.bin"
if os.path.exists(model_on_drive):
    print(f"‚úÖ Mod√®le trouv√© sur Drive! Chargement depuis: {output_dir}")
    model = AutoModelForCausalLM.from_pretrained(output_dir)
    print(f"‚úÖ Mod√®le charg√© depuis Drive ({model.num_parameters() / 1e9:.2f}B parameters)")
    SKIP_TRAINING = True
else:
    print(f"üì• T√©l√©chargement du mod√®le de base depuis Hugging Face...")
    
    # Charger en 8-bit pour r√©duire l'utilisation GPU (optionnel)
    # D√©commentez les 2 lignes suivantes pour activer quantization 8-bit
    # from transformers import BitsAndBytesConfig
    # model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=BitsAndBytesConfig(load_in_8bit=True))
    
    model = AutoModelForCausalLM.from_pretrained(model_name)
    
    # Activer gradient checkpointing pour √©conomiser m√©moire
    model.gradient_checkpointing_enable()
    print(f"‚úÖ Gradient checkpointing activ√© (√©conomise ~40% m√©moire)")
    
    print(f"‚úÖ Mod√®le de base charg√© ({model.num_parameters() / 1e9:.2f}B parameters)")
    print(f"   ‚ö†Ô∏è  Ce mod√®le sera affin√© dans les prochaines √©tapes")
    SKIP_TRAINING = False

# Training arguments (NVIDIA GPU optimized + Memory efficient)
training_args = TrainingArguments(
    output_dir=output_dir,
    overwrite_output_dir=True,
    num_train_epochs=num_epochs,
    per_device_train_batch_size=batch_size,
    learning_rate=learning_rate,
    warmup_steps=warmup_steps,
    weight_decay=0.01,
    logging_steps=logging_steps,
    save_steps=save_steps,
    save_total_limit=3,
    save_strategy="steps",
    
    # Pr√©cision mixte
    fp16=True,  # Calculs en fp16
    bf16=False,
    
    # √âconomie m√©moire
    gradient_accumulation_steps=gradient_accumulation_steps,
    gradient_checkpointing=True,  # Sauvegarde m√©moire (~40%)
    
    
    # DataLoader
    dataloader_num_workers=dataloader_num_workers,
    dataloader_pin_memory=True,
    
    # Logging
    logging_dir=f"{SAVE_BASE_PATH}/logs",
    seed=42,
    report_to=["wandb"],
    load_best_model_at_end=False,
    metric_for_best_model="loss",
)

print(f"\n‚úÖ Training configuration ready (Memory optimized)")
print(f"‚úÖ Checkpoints will be saved to: {output_dir}")
print(f"   - Every {save_steps} steps")
print(f"   - Keeping last {training_args.save_total_limit} checkpoints")
print(f"\nüîß Optimisations m√©moire:")
print(f"   - Gradient checkpointing: ON (~40% m√©moire)")
print(f"   - Optimiseur 8-bit: ON (~50% m√©moire optimiseur)")
print(f"   - Mixed precision fp16: ON")
print(f"   - Gradient accumulation: {gradient_accumulation_steps} steps")
print(f"\nSKIP_TRAINING: {SKIP_TRAINING}")

## 7. Initialize SFTTrainer & Start Training

In [None]:
# Tokenize and truncate dataset to avoid "sequence length > 1024" errors
print("Tokenizing and truncating dataset...")

def tokenize_function(examples):
    return tokenizer(
        examples["text"],
        truncation=True,
        max_length=1024,  # GPT-2-Large max token limit
        padding="max_length",
    )

# Apply tokenization
tokenized_dataset = train_dataset.map(
    tokenize_function,
    batched=True,
    remove_columns=["text", "label"],
    desc="Tokenizing dataset"
)

print(f"‚úÖ Dataset tokenized and truncated")
print(f"   Input shape: {tokenized_dataset[0]['input_ids'][:10]}...")

# Replace train_dataset with tokenized version
train_dataset = tokenized_dataset
print(f"‚úÖ Ready for training with {len(train_dataset)} examples")

## 6b. Tokenize and Truncate Dataset

In [None]:
# Initialize and train only if model wasn't already on Drive
from pathlib import Path

RESUME_FROM_CHECKPOINT = True
resume_checkpoint = None
if RESUME_FROM_CHECKPOINT:
    ckpts = sorted(
        Path(output_dir).glob("checkpoint-*/"),
        key=lambda p: int(p.name.split("-")[-1]) if p.name.split("-")[-1].isdigit() else -1,
    )
    if ckpts:
        resume_checkpoint = str(ckpts[-1])
        print(f"üîÑ Reprise depuis le checkpoint: {resume_checkpoint}")
    else:
        print("‚ö†Ô∏è  Aucun checkpoint trouv√©, entra√Ænement from scratch")

if SKIP_TRAINING:
    print("‚è≠Ô∏è  Mod√®le d√©j√† entra√Æn√© trouv√©! Passage de l'entra√Ænement.")
else:
    # Initialize SFTTrainer (TRL 0.26.2)
    # No formatting_func needed - IMDB already has "text" field
    print("Initializing SFTTrainer...")
    trainer = SFTTrainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
    )

    print("‚úÖ Trainer initialized")
    print(f"\n{'='*60}")
    print("Starting SFT Training...")
    print(f"{'='*60}\n")

    # Train (resumes automatically if checkpoint exists)
    trainer.train(resume_from_checkpoint=resume_checkpoint)


## 8. Save Fine-tuned Model

In [None]:
# Save model and tokenizer
if not SKIP_TRAINING:
    print(f"Saving model to {output_dir}")
    trainer.model.save_pretrained(output_dir)
    tokenizer.save_pretrained(output_dir)
    print(f"‚úÖ Model saved!")
else:
    print(f"‚úÖ Model already saved on Drive at: {output_dir}")

print(f"\n{'='*60}")
print(f"‚úÖ SFT Training Complete!")
print(f"{'='*60}")
print(f"Model location: {output_dir}")
print(f"Files:")
print(f"  - pytorch_model.bin (model weights)")
print(f"  - config.json (model config)")
print(f"  - tokenizer.json (tokenizer)")
print(f"  - special_tokens_map.json")
print(f"\nYou can now:")
print(f"  1. Use this model for DPO training")
print(f"  2. Generate completions with the fine-tuned model")
print(f"  3. Deploy to inference servers")

# Close W&B run
wandb.finish()
print(f"\n‚úÖ W&B run closed")

## 9. Test Fine-tuned Model (Optional)

In [None]:
# Load fine-tuned model from Drive
print("Loading fine-tuned model from Drive...")
model_path = f"{SAVE_BASE_PATH}/sft_model"
model = AutoModelForCausalLM.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)

# Move model to GPU
if torch.cuda.is_available():
    model = model.to("cuda")
    print(f"‚úÖ Model loaded on GPU")
else:
    print(f"‚ö†Ô∏è  Model loaded on CPU")

# Load IMDB test dataset
print("Loading IMDB test dataset...")
test_dataset = load_dataset("imdb")["test"]

# Select 10 random examples
import random
random.seed(42)
test_indices = random.sample(range(len(test_dataset)), 10)
test_samples = test_dataset.select(test_indices)

print(f"‚úÖ Loaded {len(test_samples)} test samples\n")

# Generate completions from 8-word prefixes
print(f"{'='*80}")
print("Testing Model on IMDB Test Set (8-word prefixes)")
print(f"{'='*80}\n")

for idx, example in enumerate(test_samples, 1):
    # Get full text and split into words
    full_text = example["text"]
    words = full_text.split()
    
    # Get first 8 words as prefix
    prefix = " ".join(words[:8])
    actual_continuation = " ".join(words[8:20])  # Next 12 words for reference
    
    # Tokenize prefix
    input_ids = tokenizer.encode(prefix, return_tensors="pt")
    if torch.cuda.is_available():
        input_ids = input_ids.to("cuda")
    
    # Generate completion (50 new tokens)
    with torch.no_grad():
        output_ids = model.generate(
            input_ids,
            max_length=input_ids.shape[1] + 50,
            num_beams=1,
            temperature=0.7,
            top_p=0.9,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id,
        )
    
    # Decode
    generated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
    completion = generated_text[len(prefix):].strip()
    
    # Display
    print(f"Example {idx}")
    print(f"{'‚îÄ'*80}")
    print(f"Prefix (8 words):     {prefix}")
    print(f"Generated text:       {completion[:150]}...")
    print(f"Actual continuation:  {actual_continuation[:150]}...")
    print()

print(f"{'='*80}")
print("‚úÖ Testing complete!")

## 10. Load Model from Drive & Test on IMDB Test Set

## 11. Generate Preference Pairs for DPO Training

**Pipeline pour cr√©er les paires de pr√©f√©rences :**
1. **Test set** (25K) ‚Üí 1000 pour test final + 20K restants
2. **Validation set** ‚Üí 1000 extraits des 20K restants
3. **Train set** ‚Üí 1000 exemples avec 4 g√©n√©rations chacun
4. **G√©n√©ration** ‚Üí 4 compl√©tions par prompt (n=2-8 tokens)
5. **Classification** ‚Üí Sentiment avec siebert/sentiment-roberta-large-english
6. **Paires** ‚Üí 6 paires (chosen/rejected) par prompt = 6000 paires totales

In [None]:
import json
import random
from itertools import combinations
from transformers import pipeline
from tqdm.auto import tqdm

print("="*80)
print("√âTAPE 1: Pr√©paration des datasets")
print("="*80)

# Set seed for reproducibility
random.seed(42)

# Load IMDB datasets
print("\nüì• Chargement des datasets IMDB...")
imdb_full = load_dataset("imdb")
imdb_test = imdb_full["test"]  # 25,000 examples
imdb_train = imdb_full["train"]  # 25,000 examples

print(f"‚úÖ IMDB test: {len(imdb_test)} exemples")
print(f"‚úÖ IMDB train: {len(imdb_train)} exemples")

# 1. Extract 1000 random examples from test for final testing
test_indices = random.sample(range(len(imdb_test)), 1000)
test_final = imdb_test.select(test_indices)

# 2. Get remaining 20K from test
remaining_indices = [i for i in range(len(imdb_test)) if i not in test_indices]
test_remaining = imdb_test.select(remaining_indices)

print(f"\n‚úÖ Test final: {len(test_final)} exemples")
print(f"‚úÖ Test restants: {len(test_remaining)} exemples")

# 3. Extract 1000 from remaining for validation
validation_indices = random.sample(range(len(test_remaining)), 1000)
validation_set = test_remaining.select(validation_indices)

print(f"‚úÖ Validation: {len(validation_set)} exemples")

# 4. Extract 1000 from train for DPO preference pair generation
train_indices = random.sample(range(len(imdb_train)), 1000)
train_dpo = imdb_train.select(train_indices)

print(f"‚úÖ Train DPO: {len(train_dpo)} exemples")

# Save datasets to Drive
print(f"\nüíæ Sauvegarde des datasets sur Drive...")
datasets_path = f"{SAVE_BASE_PATH}/datasets"
os.makedirs(datasets_path, exist_ok=True)

test_final.save_to_disk(f"{datasets_path}/test_final")
validation_set.save_to_disk(f"{datasets_path}/validation")
train_dpo.save_to_disk(f"{datasets_path}/train_dpo")

print(f"‚úÖ Datasets sauvegard√©s dans: {datasets_path}")

print("\n" + "="*80)
print("√âTAPE 2: Chargement du mod√®le SFT et du classifier")
print("="*80)

# Load SFT model
print(f"\nüì• Chargement du mod√®le SFT depuis: {SAVE_BASE_PATH}/sft_model")
sft_model = AutoModelForCausalLM.from_pretrained(f"{SAVE_BASE_PATH}/sft_model")
sft_tokenizer = AutoTokenizer.from_pretrained(f"{SAVE_BASE_PATH}/sft_model")

if torch.cuda.is_available():
    sft_model = sft_model.to("cuda")
    print("‚úÖ Mod√®le SFT charg√© sur GPU")
else:
    print("‚ö†Ô∏è  Mod√®le SFT charg√© sur CPU")

sft_model.eval()  # Set to evaluation mode

# Load sentiment classifier
print(f"\nüì• Chargement du classifier de sentiment: siebert/sentiment-roberta-large-english")
sentiment_classifier = pipeline(
    "sentiment-analysis",
    model="siebert/sentiment-roberta-large-english",
    device=0 if torch.cuda.is_available() else -1
)
print("‚úÖ Classifier charg√©")

print("\n" + "="*80)
print("√âTAPE 3: G√©n√©ration des paires de pr√©f√©rences")
print("="*80)
print(f"\nüìä Configuration:")
print(f"   - Nombre d'exemples: {len(train_dpo)}")
print(f"   - G√©n√©rations par prompt: 4")
print(f"   - Paires par prompt: 6 (combinaisons C(4,2))")
print(f"   - Total paires attendues: {len(train_dpo) * 6}")
print(f"   - Longueur prompt: 2-8 tokens (al√©atoire)")

# Generate preference pairs
preference_pairs = []

print(f"\nüöÄ G√©n√©ration en cours...")

for idx, example in enumerate(tqdm(train_dpo, desc="G√©n√©ration des paires")):
    text = example["text"]
    
    # Tokenize full text
    full_tokens = sft_tokenizer.encode(text, add_special_tokens=False)
    
    # Random n between 2 and 8 (inclusive)
    n = random.randint(2, 8)
    
    # Take first n tokens as prompt
    if len(full_tokens) < n:
        n = len(full_tokens)
    
    prompt_tokens = full_tokens[:n]
    prompt_text = sft_tokenizer.decode(prompt_tokens, skip_special_tokens=True)
    
    # Generate 4 different completions
    input_ids = torch.tensor([prompt_tokens]).to(sft_model.device)
    
    completions = []
    for _ in range(4):
        with torch.no_grad():
            output_ids = sft_model.generate(
                input_ids,
                max_length=input_ids.shape[1] + 50,  # 50 new tokens
                do_sample=True,
                temperature=1.0,  # More diversity
                top_p=0.95,
                top_k=50,
                pad_token_id=sft_tokenizer.eos_token_id,
            )
        
        # Decode full text (prompt + completion)
        full_completion = sft_tokenizer.decode(output_ids[0], skip_special_tokens=True)
        completions.append(full_completion)
    
    # Classify each completion with sentiment
    sentiment_scores = []
    for completion in completions:
        # Truncate to 512 tokens for classifier (max length)
        truncated = completion[:512]
        result = sentiment_classifier(truncated)[0]
        
        # Convert to score: POSITIVE=1, NEGATIVE=0
        if result['label'] == 'POSITIVE':
            score = result['score']
        else:
            score = 1 - result['score']  # Invert negative score
        
        sentiment_scores.append(score)
    
    # Create 6 preference pairs (all combinations of 4 completions)
    for i, j in combinations(range(4), 2):
        # Higher score = chosen, lower score = rejected
        if sentiment_scores[i] > sentiment_scores[j]:
            chosen = completions[i]
            rejected = completions[j]
        else:
            chosen = completions[j]
            rejected = completions[i]
        
        preference_pairs.append({
            "prompt": prompt_text,
            "chosen": chosen,
            "rejected": rejected,
        })

print(f"\n‚úÖ G√©n√©ration termin√©e!")
print(f"   - Total paires cr√©√©es: {len(preference_pairs)}")
print(f"   - Paires par exemple: {len(preference_pairs) / len(train_dpo):.1f}")

# Save preference pairs
print(f"\nüíæ Sauvegarde des paires de pr√©f√©rences...")
pairs_path = f"{datasets_path}/preference_pairs.json"
with open(pairs_path, 'w', encoding='utf-8') as f:
    json.dump(preference_pairs, f, indent=2, ensure_ascii=False)

print(f"‚úÖ Paires sauvegard√©es: {pairs_path}")

# Display sample
print(f"\n{'='*80}")
print("EXEMPLE DE PAIRES G√âN√âR√âES")
print(f"{'='*80}")
for i in range(min(3, len(preference_pairs))):
    pair = preference_pairs[i]
    print(f"\nPaire {i+1}:")
    print(f"Prompt:   {pair['prompt']}")
    print(f"Chosen:   {pair['chosen'][:100]}...")
    print(f"Rejected: {pair['rejected'][:100]}...")

print(f"\n{'='*80}")
print("‚úÖ G√âN√âRATION DES PAIRES DE PR√âF√âRENCES TERMIN√âE")
print(f"{'='*80}")
print(f"\nüìÅ Fichiers cr√©√©s:")
print(f"   - Test final:     {datasets_path}/test_final/")
print(f"   - Validation:     {datasets_path}/validation/")
print(f"   - Train DPO:      {datasets_path}/train_dpo/")
print(f"   - Paires DPO:     {pairs_path}")
print(f"\nüéØ Pr√™t pour l'entra√Ænement DPO!")