# Mistral-7B QLora Training V2 - Overfitting Reduction

**Date:** 2025-11-19
**Objective:** Reduce overfitting from V1 (gap: 2.27 ‚Üí target: < 0.15)
**Configuration:** Optimized for better generalization

## [1] Import Libraries & Setup

In [1]:
# Core imports
import os
import sys
import json
import random
import time
from pathlib import Path
import shutil

# MLX imports
import mlx.core as mx
import mlx.nn as nn
from mlx.optimizers import AdamW
from mlx_lm import load
from mlx_lm.tuner import linear_to_lora_layers

# Utils
import pandas as pd
import numpy as np
from tqdm import tqdm
import psutil
import matplotlib.pyplot as plt

print("‚úÖ Todas as bibliotecas importadas com sucesso!")
print(f"MLX Device: {mx.default_device()}")

‚úÖ Todas as bibliotecas importadas com sucesso!
MLX Device: Device(gpu, 0)


## [2] Configuration V2 (Overfitting Reduction)

In [2]:
# Paths
BASE_DIR = Path("/Users/f.nuno/Desktop/chatbot_2.0/LLM_training")
DATA_DIR = BASE_DIR / "data"
CHECKPOINTS_DIR = BASE_DIR / "checkpoints_qlora"
OUTPUT_DIR = BASE_DIR / "output"

# Create directories
CHECKPOINTS_DIR.mkdir(parents=True, exist_ok=True)
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

# Data files
TRAIN_FILE = DATA_DIR / "train_v3_final_complete.jsonl"
VALID_FILE = DATA_DIR / "valid_v3_final_complete.jsonl"

# Model configuration V2 - OPTIMIZED FOR OVERFITTING REDUCTION
model_name = str(BASE_DIR / "models/mistral-7b-4bit")

qlora_config = {
    "quantization": "int4",
    "group_size": 64,
    "num_layers": 8,
    "lora_parameters": {
        "rank": 6,              # ‚úÖ REDUCED from 8 (25% fewer parameters)
        "scale": 16,
        "dropout": 0.08,        # ‚úÖ NEW (added regularization)
        "keys": ["q_proj", "v_proj", "k_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    },
    "bias": "none",
}

training_config = {
    "num_epochs": 3,
    "batch_size": 2,            # ‚úÖ REDUCED from 4 (more regularization)
    "gradient_accumulation": 4, # ‚úÖ INCREASED from 2 (maintain effective batch)
    "learning_rate": 2e-4,      # ‚úÖ REDUCED from 5e-4 (slower, more stable)
    "max_seq_length": 512,
    "warmup_steps": 100,
    "save_steps": 200,
    "eval_steps": 200,
    "log_steps": 10,
    "early_stopping_patience": 5,      # ‚úÖ NEW
    "early_stopping_min_delta": 0.001, # ‚úÖ NEW
}

print("\n" + "="*80)
print("üéØ CONFIGURATION V2 - OVERFITTING REDUCTION")
print("="*80)
print(f"LoRA Rank: 6 (reduced from 8)")
print(f"Dropout: 0.08 (added for regularization)")
print(f"Batch Size: 2 (reduced from 4)")
print(f"Learning Rate: 2e-4 (reduced from 5e-4)")
print(f"Early Stopping: Patience=5, Min Delta=0.001")
print("="*80)


üéØ CONFIGURATION V2 - OVERFITTING REDUCTION
LoRA Rank: 6 (reduced from 8)
Dropout: 0.08 (added for regularization)
Batch Size: 2 (reduced from 4)
Learning Rate: 2e-4 (reduced from 5e-4)
Early Stopping: Patience=5, Min Delta=0.001


## [3] Define Training Classes & Functions

In [3]:
def format_prompt(sample):
    return f"### Pergunta:\n{sample['prompt']}\n\n### Resposta:\n{sample['completion']}"

def load_dataset(file_path):
    data = []
    with open(file_path, 'r', encoding='utf-8') as f:
        for line in f:
            data.append(json.loads(line))
    return data

def tokenize(sample, tokenizer, max_seq_length):
    prompt_text = format_prompt(sample)
    return tokenizer.encode(prompt_text, max_length=max_seq_length, padding="max_length", truncation=True)

def calculate_memory_usage():
    process = psutil.Process(os.getpid())
    mem_info = process.memory_info()
    return mem_info.rss / (1024 ** 2)

print("‚úÖ Helper functions defined")

‚úÖ Helper functions defined


## [4] MetricsTracker Class

In [4]:
class MetricsTracker:
    def __init__(self, checkpoint_dir):
        self.checkpoint_dir = Path(checkpoint_dir)
        self.metrics_file_csv = self.checkpoint_dir / "training_metrics.csv"
        self.metrics_file_json = self.checkpoint_dir / "training_metrics.json"
        self.summary_file = self.checkpoint_dir / "training_summary.json"
        self.training_state_file = self.checkpoint_dir / "training_state.json"
        self.best_model_path = self.checkpoint_dir / "adapters" / "adapters.safetensors"
        self.best_val_loss = float('inf')
        self.metrics_data = []
        self.start_time = time.time()
        self.current_epoch = 0
        self.current_step = 0

    def log_step(self, epoch, step, loss, val_loss=None, memory_mb=None, learning_rate=None):
        current_time = time.time()
        elapsed_time = current_time - self.start_time
        
        metric = {
            "epoch": epoch,
            "step": step,
            "loss": loss.item() if hasattr(loss, 'item') else loss,
            "timestamp": current_time,
            "elapsed_time_sec": elapsed_time,
            "memory_mb": memory_mb if memory_mb is not None else calculate_memory_usage(),
            "learning_rate": learning_rate if learning_rate is not None else training_config["learning_rate"],
        }
        if val_loss is not None:
            metric["val_loss"] = val_loss.item() if hasattr(val_loss, 'item') else val_loss

        self.metrics_data.append(metric)
        
        with open(self.metrics_file_json, 'w', encoding='utf-8') as f:
            json.dump(self.metrics_data, f, indent=4, ensure_ascii=False)
        pd.DataFrame(self.metrics_data).to_csv(self.metrics_file_csv, index=False)
        self.current_epoch = epoch
        self.current_step = step

    def save_best_model(self, model, val_loss):
        if val_loss < self.best_val_loss:
            self.best_val_loss = val_loss
            adapters_dir = self.checkpoint_dir / "adapters"
            adapters_dir.mkdir(parents=True, exist_ok=True)
            model.save_weights(str(self.best_model_path))
            print(f"‚úì Melhor modelo guardado com Val Loss: {self.best_val_loss:.4f}")

    def save_summary(self, total_time, total_samples):
        summary = {
            "total_training_time_sec": total_time,
            "total_samples_processed": total_samples,
            "final_epoch": self.current_epoch,
            "final_step": self.current_step,
            "best_validation_loss": self.best_val_loss,
            "training_config": training_config,
            "qlora_config": qlora_config,
        }
        with open(self.summary_file, 'w', encoding='utf-8') as f:
            json.dump(summary, f, indent=4, ensure_ascii=False)

print("‚úÖ MetricsTracker class defined")

‚úÖ MetricsTracker class defined


## [5] EarlyStoppingMonitor Class ‚ú®

In [5]:
class EarlyStoppingMonitor:
    """Monitora overfitting e aplica early stopping autom√°tico"""

    def __init__(self, patience=5, min_delta=0.001, restore_best_weights=True):
        self.patience = patience
        self.min_delta = min_delta
        self.best_val_loss = float('inf')
        self.patience_counter = 0
        self.best_epoch = 0
        self.best_step = 0
        self.restore_best_weights = restore_best_weights
        self.should_stop = False
        self.overfitting_gap_history = []

    def check(self, val_loss, train_loss, epoch, step):
        """Verifica se deve parar o treino"""
        gap = val_loss - train_loss
        self.overfitting_gap_history.append(gap)

        if gap > 0.30:
            print(f"  ‚ö†Ô∏è  OVERFITTING SEVERO DETECTADO (gap={gap:.4f})")
        elif gap > 0.15:
            print(f"  ‚ö†Ô∏è  Overfitting moderado (gap={gap:.4f})")

        if val_loss < self.best_val_loss - self.min_delta:
            self.best_val_loss = val_loss
            self.patience_counter = 0
            self.best_epoch = epoch
            self.best_step = step
            print(f"  ‚úÖ Melhor Val Loss: {self.best_val_loss:.4f}")
            return False, True
        else:
            self.patience_counter += 1
            if self.patience_counter >= self.patience:
                self.should_stop = True
                print(f"\n‚èπÔ∏è  EARLY STOPPING ATIVADO!")
                print(f"   Sem melhoria por {self.patience} valida√ß√µes consecutivas")
                print(f"   Melhor modelo: √âpoca {self.best_epoch}, Step {self.best_step}")
                print(f"   Melhor Val Loss: {self.best_val_loss:.4f}")
                return True, False
            else:
                print(f"  ‚ÑπÔ∏è  Sem melhoria ({self.patience_counter}/{self.patience})")
                return False, False

    def get_overfitting_status(self):
        """Retorna status de overfitting"""
        if not self.overfitting_gap_history:
            return "Sem dados"

        avg_gap = sum(self.overfitting_gap_history) / len(self.overfitting_gap_history)

        if avg_gap < 0.05:
            return "‚úÖ EXCELENTE (gap < 0.05)"
        elif avg_gap < 0.15:
            return "‚úÖ BOM (gap < 0.15)"
        elif avg_gap < 0.30:
            return "‚ö†Ô∏è MODERADO (gap < 0.30)"
        else:
            return "‚ùå CR√çTICO (gap >= 0.30)"

print("‚úÖ EarlyStoppingMonitor class defined")

‚úÖ EarlyStoppingMonitor class defined


## [6] Loss Functions

In [6]:
def loss_fn(model, inputs, targets, lengths):
    mask = mx.arange(inputs.shape[1])[None, :] < lengths[:, None]
    logits = model(inputs)
    loss = nn.losses.cross_entropy(logits, targets, reduction='none')
    loss = mx.sum(loss * mask) / mx.sum(mask)
    return loss, logits

def create_step_fn(model, optimizer):
    grad_fn = nn.value_and_grad(model, loss_fn)
    def step_fn(inputs, targets, lengths):
        (loss, logits), grads = grad_fn(model, inputs, targets, lengths)
        optimizer.update(model, grads)
        return loss
    return step_fn

def create_eval_fn(model):
    def eval_fn(inputs, targets, lengths):
        loss, _ = loss_fn(model, inputs, targets, lengths)
        return loss
    return eval_fn

print("‚úÖ Loss functions defined")

‚úÖ Loss functions defined


## [7] Load Model & Data

In [7]:
print("\nüì¶ A carregar modelo e dados...")
print(f"A carregar modelo: {model_name}")

model, tokenizer = load(model_name)
tokenizer.pad_token = tokenizer.eos_token

lora_only_config = {k: v for k, v in qlora_config.items() if k not in ["quantization", "group_size"]}
linear_to_lora_layers(
    model,
    lora_only_config["num_layers"],
    lora_only_config["lora_parameters"],
)

print("\nüìä A carregar datasets...")
train_dataset = load_dataset(TRAIN_FILE)
val_dataset = load_dataset(VALID_FILE)

print(f"Amostras de treino: {len(train_dataset)}")
print(f"Amostras de valida√ß√£o: {len(val_dataset)}")

print("\nüî§ A tokenizar...")
train_tokens = [tokenize(sample, tokenizer, training_config["max_seq_length"]) for sample in train_dataset]
val_tokens = [tokenize(sample, tokenizer, training_config["max_seq_length"]) for sample in val_dataset]

train_tokens = [t for t in train_tokens if t]
val_tokens = [t for t in val_tokens if t]

print(f"‚úÖ Tokens de treino: {len(train_tokens)}")
print(f"‚úÖ Tokens de valida√ß√£o: {len(val_tokens)}")


üì¶ A carregar modelo e dados...
A carregar modelo: /Users/f.nuno/Desktop/chatbot_2.0/LLM_training/models/mistral-7b-4bit

üìä A carregar datasets...
Amostras de treino: 848
Amostras de valida√ß√£o: 95

üî§ A tokenizar...
‚úÖ Tokens de treino: 848
‚úÖ Tokens de valida√ß√£o: 95


## [8] TRAINING LOOP V2 ‚ú®

In [8]:
print("\n" + "="*80)
print("üöÄ INICIANDO TREINO V2 - OVERFITTING REDUCTION")
print("="*80)

optimizer = AdamW(learning_rate=training_config["learning_rate"])
tracker = MetricsTracker(CHECKPOINTS_DIR)
early_stopping = EarlyStoppingMonitor(
    patience=training_config["early_stopping_patience"],
    min_delta=training_config["early_stopping_min_delta"]
)

train_step_fn = create_step_fn(model, optimizer)
eval_fn = create_eval_fn(model)

total_train_steps = (len(train_tokens) // training_config["batch_size"]) * training_config["num_epochs"]
print(f"Total de passos de treino esperados: {total_train_steps}")

for epoch in range(training_config["num_epochs"]):
    random.shuffle(train_tokens)
    
    print(f"\nüìö √âpoca {epoch+1}/{training_config['num_epochs']}")
    print("-" * 80)
    
    for i in tqdm(range(len(train_tokens) // training_config["batch_size"]), desc=f"Treino"):
        batch_start = i * training_config["batch_size"]
        batch_end = (i + 1) * training_config["batch_size"]
        batch_tokens = train_tokens[batch_start:batch_end]

        max_len = max(len(t) for t in batch_tokens)
        inputs = mx.array([t + [0] * (max_len - len(t)) for t in batch_tokens])
        targets = inputs
        lengths = mx.array([len(t) for t in batch_tokens])

        loss = train_step_fn(inputs, targets, lengths)
        mx.eval(model.parameters(), optimizer.state, loss)
        
        if (i + 1) % training_config["log_steps"] == 0:
            mem_usage = calculate_memory_usage()
            tracker.log_step(epoch, i + 1, loss, memory_mb=mem_usage)

        # Avalia√ß√£o
        if (i + 1) % training_config["eval_steps"] == 0 and val_tokens:
            val_loss_sum = 0
            num_val_batches = len(val_tokens) // training_config["batch_size"]
            if num_val_batches == 0 and len(val_tokens) > 0:
                num_val_batches = 1

            for j in range(num_val_batches):
                val_batch_start = j * training_config["batch_size"]
                val_batch_end = (j + 1) * training_config["batch_size"]
                val_batch_tokens = val_tokens[val_batch_start:val_batch_end]
                
                if not val_batch_tokens:
                    continue

                val_max_len = max(len(t) for t in val_batch_tokens)
                val_inputs = mx.array([t + [0] * (val_max_len - len(t)) for t in val_batch_tokens])
                val_targets = val_inputs
                val_lengths = mx.array([len(t) for t in val_batch_tokens])

                val_loss = eval_fn(val_inputs, val_targets, val_lengths)
                val_loss_sum += val_loss.item()
            
            avg_val_loss = val_loss_sum / num_val_batches if num_val_batches > 0 else float('inf')
            print(f"\n‚úÖ Val Loss (step {i+1}): {avg_val_loss:.4f}")
            tracker.log_step(epoch, i + 1, loss, val_loss=avg_val_loss, memory_mb=calculate_memory_usage())
            tracker.save_best_model(model, avg_val_loss)

            # Early Stopping Check
            print(f"\nüìä An√°lise de Valida√ß√£o:")
            should_stop, improved = early_stopping.check(
                val_loss=avg_val_loss,
                train_loss=loss.item(),
                epoch=epoch,
                step=i + 1
            )

            if should_stop:
                print(f"\nüèÅ Treino terminado por Early Stopping")
                break

        if (i + 1) % training_config["save_steps"] == 0:
            checkpoint_path = CHECKPOINTS_DIR / f"checkpoint_epoch{epoch}_step{i+1}"
            checkpoint_path.mkdir(parents=True, exist_ok=True)
            model.save_weights(str(checkpoint_path / "adapters.safetensors"))
            print(f"‚úì Checkpoint guardado em: {checkpoint_path}")

    if early_stopping.should_stop:
        break

print("\n" + "="*80)
print("‚úÖ TREINO COMPLETO!")
print("="*80)


üöÄ INICIANDO TREINO V2 - OVERFITTING REDUCTION
Total de passos de treino esperados: 1272

üìö √âpoca 1/3
--------------------------------------------------------------------------------


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
Treino:  47%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå               | 199/424 [3:52:51<5:29:38, 87.91s/it]


‚úÖ Val Loss (step 200): 1.4834
‚úì Melhor modelo guardado com Val Loss: 1.4834

üìä An√°lise de Valida√ß√£o:
  ‚úÖ Melhor Val Loss: 1.4834


Treino:  47%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã              | 200/424 [4:25:09<40:00:03, 642.87s/it]

‚úì Checkpoint guardado em: /Users/f.nuno/Desktop/chatbot_2.0/LLM_training/checkpoints_qlora/checkpoint_epoch0_step200


Treino:  94%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 399/424 [9:08:17<33:47, 81.08s/it]


‚úÖ Val Loss (step 400): 1.3087
‚úì Melhor modelo guardado com Val Loss: 1.3087

üìä An√°lise de Valida√ß√£o:
  ‚ö†Ô∏è  Overfitting moderado (gap=0.1524)
  ‚úÖ Melhor Val Loss: 1.3087


Treino:  94%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 400/424 [9:39:35<4:08:07, 620.32s/it]

‚úì Checkpoint guardado em: /Users/f.nuno/Desktop/chatbot_2.0/LLM_training/checkpoints_qlora/checkpoint_epoch0_step400


Treino: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 424/424 [10:14:07<00:00, 86.90s/it]



üìö √âpoca 2/3
--------------------------------------------------------------------------------


Treino:  47%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå               | 199/424 [4:56:01<5:53:48, 94.35s/it]


‚úÖ Val Loss (step 200): 1.3034
‚úì Melhor modelo guardado com Val Loss: 1.3034

üìä An√°lise de Valida√ß√£o:
  ‚úÖ Melhor Val Loss: 1.3034


Treino:  47%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã              | 200/424 [5:31:04<43:21:03, 696.71s/it]

‚úì Checkpoint guardado em: /Users/f.nuno/Desktop/chatbot_2.0/LLM_training/checkpoints_qlora/checkpoint_epoch1_step200


Treino:  94%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 399/424 [9:39:30<40:37, 97.52s/it]


‚úÖ Val Loss (step 400): 1.2482
‚úì Melhor modelo guardado com Val Loss: 1.2482

üìä An√°lise de Valida√ß√£o:
  ‚úÖ Melhor Val Loss: 1.2482


Treino:  94%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 399/424 [10:15:58<38:35, 92.63s/it]


RuntimeError: [write] Unable to write 29360128 bytes to file.

## [9] Final Analysis & Save Model

In [None]:
# An√°lise Final de Overfitting
print("\n" + "="*80)
print("üîç AN√ÅLISE FINAL DE OVERFITTING")
print("="*80)
overfitting_status = early_stopping.get_overfitting_status()
print(f"Status: {overfitting_status}")
if early_stopping.overfitting_gap_history:
    avg_gap = sum(early_stopping.overfitting_gap_history) / len(early_stopping.overfitting_gap_history)
    max_gap = max(early_stopping.overfitting_gap_history)
    min_gap = min(early_stopping.overfitting_gap_history)
    print(f"Gap m√©dio: {avg_gap:.4f}")
    print(f"Gap m√°ximo: {max_gap:.4f}")
    print(f"Gap m√≠nimo: {min_gap:.4f}")
print("="*80 + "\n")

# Salvar modelo final
print("\n--- Guardando modelo final ---")
final_model_path = OUTPUT_DIR / "mistral-7b-farense-qlora-v2"
final_model_path.mkdir(parents=True, exist_ok=True)

shutil.copy(tracker.best_model_path, final_model_path / "adapters.safetensors")

with open(final_model_path / "adapter_config.json", 'w', encoding='utf-8') as f:
    json.dump(qlora_config, f, indent=4, ensure_ascii=False)

tracker.save_summary(time.time() - tracker.start_time, len(train_dataset))

print(f"\n‚úÖ Modelo guardado em: {final_model_path}")
print(f"‚úÖ M√©tricas guardadas em: {CHECKPOINTS_DIR}")

## [10] Results Visualization

In [None]:
# Load metrics
with open(CHECKPOINTS_DIR / "training_metrics.json") as f:
    metrics = json.load(f)

df = pd.DataFrame(metrics)

# Create visualizations
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# Loss trajectory
ax = axes[0, 0]
ax.plot(df['step'], df['loss'], 'b-', label='Training Loss', alpha=0.7)
if 'val_loss' in df.columns:
    val_steps = df[df['val_loss'].notna()]['step']
    val_losses = df[df['val_loss'].notna()]['val_loss']
    ax.scatter(val_steps, val_losses, color='r', s=50, label='Validation Loss')
ax.set_xlabel('Step')
ax.set_ylabel('Loss')
ax.set_title('Training & Validation Loss')
ax.legend()
ax.grid(True, alpha=0.3)

# Loss per epoch
ax = axes[0, 1]
for epoch in df['epoch'].unique():
    epoch_data = df[df['epoch'] == epoch]
    ax.plot(epoch_data['step'], epoch_data['loss'], label=f'Epoch {int(epoch)+1}')
ax.set_xlabel('Step')
ax.set_ylabel('Loss')
ax.set_title('Loss by Epoch')
ax.legend()
ax.grid(True, alpha=0.3)

# Memory usage
ax = axes[1, 0]
ax.plot(df['step'], df['memory_mb'], 'g-', alpha=0.7)
ax.set_xlabel('Step')
ax.set_ylabel('Memory (MB)')
ax.set_title('Memory Usage')
ax.grid(True, alpha=0.3)

# Overfitting gap
ax = axes[1, 1]
if early_stopping.overfitting_gap_history:
    ax.plot(range(len(early_stopping.overfitting_gap_history)), early_stopping.overfitting_gap_history, 'o-', color='purple')
    ax.axhline(y=0.15, color='r', linestyle='--', label='Warning Threshold (0.15)')
    ax.axhline(y=0.30, color='darkred', linestyle='--', label='Critical Threshold (0.30)')
    ax.set_xlabel('Validation Check')
    ax.set_ylabel('Overfitting Gap (Val - Train)')
    ax.set_title('Overfitting Gap Detection')
    ax.legend()
    ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig(CHECKPOINTS_DIR / 'training_v2_results.png', dpi=150, bbox_inches='tight')
plt.show()

print("‚úÖ Visualization saved")

## [11] Comparison V1 vs V2

In [None]:
print("\n" + "="*80)
print("üìä COMPARISON: V1 vs V2")
print("="*80)

comparison_data = {
    'Metric': ['F-1 Score', 'Train-Val Gap', 'Overfitting Status', 'Training Duration'],
    'V1': ['0.9602', '2.27 (HIGH)', 'HIGH', '~4 hours'],
    'V2 (This Run)': [
        'TBD',
        f'{avg_gap:.4f}' if 'avg_gap' in locals() else 'TBD',
        overfitting_status,
        f'{(time.time() - tracker.start_time) / 3600:.1f} hours'
    ],
    'Target': ['‚â•0.93', '<0.15', 'GOOD/EXCELENTE', 'Similar']
}

df_comparison = pd.DataFrame(comparison_data)
print(df_comparison.to_string(index=False))
print("="*80)