# MLP Regressor

**Objective**: Train multi-output regressors on text embeddings + explanatory variables.
- Scenario 1: IID data with random split (60/20/20)
- Scenario 2: IID train/val, OOD test

In [15]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

# Check GPU availability
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Set random seed
np.random.seed(42)
torch.manual_seed(42)

Using device: cuda


<torch._C.Generator at 0x709a21934110>

## 1. Load Data from Previous Pipeline

In [16]:
# Load merged data and embeddings
df_merged = pd.read_csv('data_merged.csv')
embeddings_pca_optimal = np.load('embeddings_pca_optimal.npy')

print(f"✓ Data loaded")
print(f"  - df_merged shape: {df_merged.shape}")
print(f"  - embeddings_pca_optimal shape: {embeddings_pca_optimal.shape}")
print(f"  - IID samples: {(df_merged['OOD'] == 0).sum()}")
print(f"  - OOD samples: {(df_merged['OOD'] == 1).sum()}")

# Define column lists (same as pipeline)
vars_expl = [
    "MS % brut", "PB % brut", "CB % brut", "MGR % brut", "MM % brut",
    "NDF % brut", "ADF % brut", "Lignine % brut", "Amidon % brut", "Sucres % brut"
]

vars_cibles = [
    "EB (kcal) kcal/kg brut", "ED porc croissance (kcal) kcal/kg brut", "EM porc croissance (kcal) kcal/kg brut",
    "EN porc croissance (kcal) kcal/kg brut", "EMAn coq (kcal) kcal/kg brut", "EMAn poulet (kcal) kcal/kg brut",
    "UFL 2018 par kg brut", "UFV 2018 par kg brut", "PDIA 2018 g/kg brut", "PDI 2018 g/kg brut", "BalProRu 2018 g/kg brut"
]

# Prepare features
X_embeddings = embeddings_pca_optimal  # (6574, 43)
X_vars = df_merged[vars_expl].fillna(0).values
X_combined = np.hstack([X_embeddings, X_vars])  # (6574, 53)

# Prepare targets
y_combined = df_merged[vars_cibles].fillna(0).values  # (6574, 11)

print(f"\n✓ Features prepared: {X_combined.shape}")
print(f"✓ Targets prepared: {y_combined.shape}")

✓ Data loaded
  - df_merged shape: (6574, 28)
  - embeddings_pca_optimal shape: (6574, 43)
  - IID samples: 6352
  - OOD samples: 222

✓ Features prepared: (6574, 53)
✓ Targets prepared: (6574, 11)


## 2. Define PyTorch MLP Model

In [17]:
class MLPRegressor(nn.Module):
    def __init__(self, input_size, hidden_sizes, output_size, dropout_rate=0.2):
        super(MLPRegressor, self).__init__()
        
        layers = []
        prev_size = input_size
        
        # Hidden layers
        for hidden_size in hidden_sizes:
            layers.append(nn.Linear(prev_size, hidden_size))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(dropout_rate))
            prev_size = hidden_size
        
        # Output layer
        layers.append(nn.Linear(prev_size, output_size))
        
        self.network = nn.Sequential(*layers)
    
    def forward(self, x):
        return self.network(x)

# Test model initialization
model_test = MLPRegressor(input_size=53, hidden_sizes=[256, 128, 64], output_size=11, dropout_rate=0.2)
print(f"✓ Model architecture:")
print(model_test)
print(f"\n✓ Total parameters: {sum(p.numel() for p in model_test.parameters()):,}")

✓ Model architecture:
MLPRegressor(
  (network): Sequential(
    (0): Linear(in_features=53, out_features=256, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.2, inplace=False)
    (3): Linear(in_features=256, out_features=128, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.2, inplace=False)
    (6): Linear(in_features=128, out_features=64, bias=True)
    (7): ReLU()
    (8): Dropout(p=0.2, inplace=False)
    (9): Linear(in_features=64, out_features=11, bias=True)
  )
)

✓ Total parameters: 55,691


## 3. Training Utilities

In [23]:
def create_dataloaders(X, y, batch_size=32, train_ratio=0.6, val_ratio=0.2, test_ratio=0.2):
    """Create train/val/test dataloaders from data"""
    n_samples = len(X)
    indices = np.random.permutation(n_samples)
    
    # Split indices
    train_size = int(n_samples * train_ratio)
    val_size = int(n_samples * val_ratio)
    
    train_idx = indices[:train_size]
    val_idx = indices[train_size:train_size + val_size]
    test_idx = indices[train_size + val_size:]
    
    # Create datasets
    train_dataset = TensorDataset(
        torch.FloatTensor(X[train_idx]), torch.FloatTensor(y[train_idx])
    )
    val_dataset = TensorDataset(
        torch.FloatTensor(X[val_idx]), torch.FloatTensor(y[val_idx])
    )
    test_dataset = TensorDataset(
        torch.FloatTensor(X[test_idx]), torch.FloatTensor(y[test_idx])
    )
    
    # Create dataloaders
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    
    return train_loader, val_loader, test_loader, (X[train_idx], y[train_idx]), (X[val_idx], y[val_idx]), (X[test_idx], y[test_idx])

def train_epoch(model, train_loader, optimizer, criterion, device):
    """Train for one epoch"""
    model.train()
    total_loss = 0.0
    
    for X_batch, y_batch in train_loader:
        X_batch = X_batch.to(device)
        y_batch = y_batch.to(device)
        
        optimizer.zero_grad()
        predictions = model(X_batch)
        loss = criterion(predictions, y_batch)
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item() * X_batch.size(0)
    
    return total_loss / len(train_loader.dataset)

def evaluate(model, loader, criterion, device):
    """Evaluate model on a dataset"""
    model.eval()
    total_loss = 0.0
    
    with torch.no_grad():
        for X_batch, y_batch in loader:
            X_batch = X_batch.to(device)
            y_batch = y_batch.to(device)
            
            predictions = model(X_batch)
            loss = criterion(predictions, y_batch)
            total_loss += loss.item() * X_batch.size(0)
    
    return total_loss / len(loader.dataset)

def get_predictions(model, X, device, batch_size=32):
    """Get predictions for dataset"""
    model.eval()
    dataset = TensorDataset(torch.FloatTensor(X))
    loader = DataLoader(dataset, batch_size=batch_size, shuffle=False)
    
    predictions = []
    with torch.no_grad():
        for X_batch, in loader:
            X_batch = X_batch.to(device)
            pred = model(X_batch).cpu().numpy()
            predictions.append(pred)
    
    return np.vstack(predictions)

print("✓ Training utilities defined")

✓ Training utilities defined


## 4. Scenario 1: IID Data with Random Split

In [24]:
print("\n" + "="*70)
print("SCENARIO 1: In-Distribution (IID) Data with Random Split (60/20/20)")
print("="*70)

# Filter IID data only
iid_mask = df_merged['OOD'] == 0
X_iid = X_combined[iid_mask]
y_iid = y_combined[iid_mask]

print(f"\n✓ IID dataset: {X_iid.shape[0]} samples")

# Split FIRST (60/20/20) to avoid data leakage
from sklearn.model_selection import train_test_split
X_train, X_temp, y_train, y_temp = train_test_split(
    X_iid, y_iid, test_size=0.4, random_state=42
)
X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, test_size=0.5, random_state=42
)

# Fit StandardScaler ONLY on training data (for features)
scaler_X_s1 = StandardScaler()
X_train_s1 = scaler_X_s1.fit_transform(X_train)
X_val_s1 = scaler_X_s1.transform(X_val)
X_test_s1 = scaler_X_s1.transform(X_test)

# Fit StandardScaler ONLY on training data (for targets)
scaler_y_s1 = StandardScaler()
y_train_s1_scaled = scaler_y_s1.fit_transform(y_train)
y_val_s1_scaled = scaler_y_s1.transform(y_val)
y_test_s1_scaled = scaler_y_s1.transform(y_test)

# Keep original targets for evaluation
y_train_s1 = y_train
y_val_s1 = y_val
y_test_s1 = y_test

print(f"  - Train: {len(X_train_s1)} samples")
print(f"  - Val:   {len(X_val_s1)} samples")
print(f"  - Test:  {len(X_test_s1)} samples")

# Create dataloaders (using scaled targets)
train_dataset_s1 = TensorDataset(torch.FloatTensor(X_train_s1), torch.FloatTensor(y_train_s1_scaled))
val_dataset_s1 = TensorDataset(torch.FloatTensor(X_val_s1), torch.FloatTensor(y_val_s1_scaled))
test_dataset_s1 = TensorDataset(torch.FloatTensor(X_test_s1), torch.FloatTensor(y_test_s1_scaled))

train_loader_s1 = DataLoader(train_dataset_s1, batch_size=32, shuffle=True)
val_loader_s1 = DataLoader(val_dataset_s1, batch_size=32, shuffle=False)
test_loader_s1 = DataLoader(test_dataset_s1, batch_size=32, shuffle=False)

# Initialize model
model_s1 = MLPRegressor(input_size=53, hidden_sizes=[256, 128, 64], output_size=11, dropout_rate=0.2).to(device)
optimizer_s1 = optim.Adam(model_s1.parameters(), lr=0.001, weight_decay=1e-5)
criterion = nn.MSELoss()

# Training loop
num_epochs = 500
patience = 50
best_val_loss = float('inf')
patience_counter = 0

print(f"\nTraining MLP (max {num_epochs} epochs, patience {patience})...")

train_losses_s1 = []
val_losses_s1 = []

for epoch in range(num_epochs):
    train_loss = train_epoch(model_s1, train_loader_s1, optimizer_s1, criterion, device)
    val_loss = evaluate(model_s1, val_loader_s1, criterion, device)
    
    train_losses_s1.append(train_loss)
    val_losses_s1.append(val_loss)
    
    if (epoch + 1) % 10 == 0:
        print(f"  Epoch {epoch+1:3d}: Train Loss={train_loss:.4f}, Val Loss={val_loss:.4f}")
    
    # Early stopping
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        patience_counter = 0
        best_model_s1 = model_s1.state_dict().copy()
    else:
        patience_counter += 1
        if patience_counter >= patience:
            print(f"\nEarly stopping at epoch {epoch+1}")
            model_s1.load_state_dict(best_model_s1)
            break

print(f"✓ Training complete (epoch {epoch+1})")


SCENARIO 1: In-Distribution (IID) Data with Random Split (60/20/20)

✓ IID dataset: 6352 samples
  - Train: 3811 samples
  - Val:   1270 samples
  - Test:  1271 samples

Training MLP (max 500 epochs, patience 50)...
  Epoch  10: Train Loss=0.0652, Val Loss=0.0256
  Epoch  20: Train Loss=0.0571, Val Loss=0.0232
  Epoch  30: Train Loss=0.0538, Val Loss=0.0122
  Epoch  40: Train Loss=0.0506, Val Loss=0.0168
  Epoch  50: Train Loss=0.0543, Val Loss=0.0134
  Epoch  60: Train Loss=0.0495, Val Loss=0.0130
  Epoch  70: Train Loss=0.0458, Val Loss=0.0142
  Epoch  80: Train Loss=0.0469, Val Loss=0.0104
  Epoch  90: Train Loss=0.0456, Val Loss=0.0148
  Epoch 100: Train Loss=0.0448, Val Loss=0.0178
  Epoch 110: Train Loss=0.0435, Val Loss=0.0113
  Epoch 120: Train Loss=0.0431, Val Loss=0.0115
  Epoch 130: Train Loss=0.0426, Val Loss=0.0115
  Epoch 140: Train Loss=0.0441, Val Loss=0.0185
  Epoch 150: Train Loss=0.0429, Val Loss=0.0102
  Epoch 160: Train Loss=0.0411, Val Loss=0.0098
  Epoch 170: Tr

### Scenario 1: Evaluation

In [25]:
# Get predictions (in scaled space)
y_train_pred_s1_scaled = get_predictions(model_s1, X_train_s1, device)
y_val_pred_s1_scaled = get_predictions(model_s1, X_val_s1, device)
y_test_pred_s1_scaled = get_predictions(model_s1, X_test_s1, device)

# Inverse-transform predictions to original scale
y_train_pred_s1 = scaler_y_s1.inverse_transform(y_train_pred_s1_scaled)
y_val_pred_s1 = scaler_y_s1.inverse_transform(y_val_pred_s1_scaled)
y_test_pred_s1 = scaler_y_s1.inverse_transform(y_test_pred_s1_scaled)

# Evaluate
print(f"\nScenario 1 - Performance per target variable:")
print(f"{'Target':<35} {'R² (test)':<15} {'RMSE':<15} {'MAE':<15}")
print("-" * 80)

results_s1 = []
for i, target in enumerate(vars_cibles):
    r2_test = r2_score(y_test_s1[:, i], y_test_pred_s1[:, i])
    rmse_test = np.sqrt(mean_squared_error(y_test_s1[:, i], y_test_pred_s1[:, i]))
    mae_test = mean_absolute_error(y_test_s1[:, i], y_test_pred_s1[:, i])
    
    results_s1.append({
        'target': target,
        'R2_test': r2_test,
        'RMSE_test': rmse_test,
        'MAE_test': mae_test
    })
    
    print(f"{target[:33]:<35} {r2_test:>6.3f}        {rmse_test:>10.2f}       {mae_test:>10.2f}")

df_results_s1 = pd.DataFrame(results_s1)
print(f"\nOverall Statistics (Scenario 1):")
print(f"  - Mean R² (test):   {df_results_s1['R2_test'].mean():.3f} ± {df_results_s1['R2_test'].std():.3f}")
print(f"  - Mean RMSE (test): {df_results_s1['RMSE_test'].mean():.2f} ± {df_results_s1['RMSE_test'].std():.2f}")
print(f"  - Mean MAE (test):  {df_results_s1['MAE_test'].mean():.2f} ± {df_results_s1['MAE_test'].std():.2f}")


Scenario 1 - Performance per target variable:
Target                              R² (test)       RMSE            MAE            
--------------------------------------------------------------------------------
EB (kcal) kcal/kg brut               0.989             65.83            51.54
ED porc croissance (kcal) kcal/kg    0.986            105.22            73.87
EM porc croissance (kcal) kcal/kg    0.986             98.27            68.13
EN porc croissance (kcal) kcal/kg    0.989             78.30            59.78
EMAn coq (kcal) kcal/kg brut         0.989             88.61            68.58
EMAn poulet (kcal) kcal/kg brut      0.989             89.23            67.44
UFL 2018 par kg brut                 0.987              0.03             0.02
UFV 2018 par kg brut                 0.986              0.04             0.03
PDIA 2018 g/kg brut                  0.990              7.99             4.94
PDI 2018 g/kg brut                   0.989              8.26             5.25
BalProRu

## 5. Scenario 2: IID Train/Val, OOD Test

In [26]:
print("\n" + "="*70)
print("SCENARIO 2: IID Train/Val, OOD Test (Out-of-Distribution Detection)")
print("="*70)

# Prepare data
iid_mask = df_merged['OOD'] == 0
ood_mask = df_merged['OOD'] == 1

X_iid_s2 = X_combined[iid_mask]
y_iid_s2 = y_combined[iid_mask]

X_ood_s2 = X_combined[ood_mask]
y_ood_s2 = y_combined[ood_mask]

print(f"\n✓ Data split:")
print(f"  - IID (train/val): {X_iid_s2.shape[0]} samples")
print(f"  - OOD (test):      {X_ood_s2.shape[0]} samples")

# Split IID into train/val FIRST (75/25) to avoid data leakage
from sklearn.model_selection import train_test_split
X_train_s2, X_val_s2, y_train_s2, y_val_s2 = train_test_split(
    X_iid_s2, y_iid_s2, test_size=0.25, random_state=42
)

# Fit StandardScaler ONLY on training data (for features)
scaler_X_s2 = StandardScaler()
X_train_s2 = scaler_X_s2.fit_transform(X_train_s2)
X_val_s2 = scaler_X_s2.transform(X_val_s2)
X_ood_scaled_s2 = scaler_X_s2.transform(X_ood_s2)

# Fit StandardScaler ONLY on training data (for targets)
scaler_y_s2 = StandardScaler()
y_train_s2_scaled = scaler_y_s2.fit_transform(y_train_s2)
y_val_s2_scaled = scaler_y_s2.transform(y_val_s2)
y_ood_s2_scaled = scaler_y_s2.transform(y_ood_s2)

print(f"  - Train: {len(X_train_s2)} samples")
print(f"  - Val:   {len(X_val_s2)} samples")
print(f"  - Test (OOD): {len(X_ood_scaled_s2)} samples")

# Create dataloaders (using scaled targets)
train_dataset_s2 = TensorDataset(torch.FloatTensor(X_train_s2), torch.FloatTensor(y_train_s2_scaled))
val_dataset_s2 = TensorDataset(torch.FloatTensor(X_val_s2), torch.FloatTensor(y_val_s2_scaled))
test_dataset_s2 = TensorDataset(torch.FloatTensor(X_ood_scaled_s2), torch.FloatTensor(y_ood_s2_scaled))

train_loader_s2 = DataLoader(train_dataset_s2, batch_size=32, shuffle=True)
val_loader_s2 = DataLoader(val_dataset_s2, batch_size=32, shuffle=False)
test_loader_s2 = DataLoader(test_dataset_s2, batch_size=32, shuffle=False)

# Initialize model
model_s2 = MLPRegressor(input_size=53, hidden_sizes=[256, 128, 64], output_size=11, dropout_rate=0.2).to(device)
optimizer_s2 = optim.Adam(model_s2.parameters(), lr=0.001, weight_decay=1e-5)

# Training loop
print(f"\nTraining MLP (max {num_epochs} epochs, patience {patience})...")

train_losses_s2 = []
val_losses_s2 = []

best_val_loss = float('inf')
patience_counter = 0

for epoch in range(num_epochs):
    train_loss = train_epoch(model_s2, train_loader_s2, optimizer_s2, criterion, device)
    val_loss = evaluate(model_s2, val_loader_s2, criterion, device)
    
    train_losses_s2.append(train_loss)
    val_losses_s2.append(val_loss)
    
    if (epoch + 1) % 10 == 0:
        print(f"  Epoch {epoch+1:3d}: Train Loss={train_loss:.4f}, Val Loss={val_loss:.4f}")
    
    # Early stopping
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        patience_counter = 0
        best_model_s2 = model_s2.state_dict().copy()
    else:
        patience_counter += 1
        if patience_counter >= patience:
            print(f"\nEarly stopping at epoch {epoch+1}")
            model_s2.load_state_dict(best_model_s2)
            break

print(f"✓ Training complete (epoch {epoch+1})")


SCENARIO 2: IID Train/Val, OOD Test (Out-of-Distribution Detection)

✓ Data split:
  - IID (train/val): 6352 samples
  - OOD (test):      222 samples
  - Train: 4764 samples
  - Val:   1588 samples
  - Test (OOD): 222 samples

Training MLP (max 500 epochs, patience 50)...
  Epoch  10: Train Loss=0.0630, Val Loss=0.0197
  Epoch  20: Train Loss=0.0522, Val Loss=0.0136
  Epoch  30: Train Loss=0.0499, Val Loss=0.0169
  Epoch  40: Train Loss=0.0505, Val Loss=0.0130
  Epoch  50: Train Loss=0.0477, Val Loss=0.0137
  Epoch  60: Train Loss=0.0450, Val Loss=0.0128
  Epoch  70: Train Loss=0.0453, Val Loss=0.0106
  Epoch  80: Train Loss=0.0478, Val Loss=0.0116
  Epoch  90: Train Loss=0.0469, Val Loss=0.0154
  Epoch 100: Train Loss=0.0452, Val Loss=0.0106
  Epoch 110: Train Loss=0.0432, Val Loss=0.0111
  Epoch 120: Train Loss=0.0450, Val Loss=0.0146
  Epoch 130: Train Loss=0.0441, Val Loss=0.0079
  Epoch 140: Train Loss=0.0429, Val Loss=0.0111
  Epoch 150: Train Loss=0.0433, Val Loss=0.0108
  Epoc

### Scenario 2: Evaluation

In [31]:
print("\n" + "="*70)
print("SCENARIO 2: Evaluation (OOD generalization)")
print("="*70)

# Get predictions in scaled space (pass arrays, not dataloaders)
y_train_pred_s2_scaled = get_predictions(model_s2, X_train_s2, device)
y_val_pred_s2_scaled = get_predictions(model_s2, X_val_s2, device)
y_test_pred_s2_scaled = get_predictions(model_s2, X_ood_scaled_s2, device)

# Inverse-transform predictions back to original scale
y_train_pred_s2 = scaler_y_s2.inverse_transform(y_train_pred_s2_scaled)
y_val_pred_s2 = scaler_y_s2.inverse_transform(y_val_pred_s2_scaled)
y_test_pred_s2 = scaler_y_s2.inverse_transform(y_test_pred_s2_scaled)

# Calculate metrics on original scale
print("\nSCENARIO 2 Results:")
print("\nTrain Set:")
r2_train_s2 = r2_score(y_train_s2, y_train_pred_s2, multioutput='raw_values')
rmse_train_s2 = np.sqrt(mean_squared_error(y_train_s2, y_train_pred_s2, multioutput='raw_values'))
mae_train_s2 = mean_absolute_error(y_train_s2, y_train_pred_s2, multioutput='raw_values')

print(f"  R²:   {r2_train_s2.mean():.4f} (±{r2_train_s2.std():.4f})")
print(f"  RMSE: {rmse_train_s2.mean():.4f} (±{rmse_train_s2.std():.4f})")
print(f"  MAE:  {mae_train_s2.mean():.4f} (±{mae_train_s2.std():.4f})")

print("\nValidation Set:")
r2_val_s2 = r2_score(y_val_s2, y_val_pred_s2, multioutput='raw_values')
rmse_val_s2 = np.sqrt(mean_squared_error(y_val_s2, y_val_pred_s2, multioutput='raw_values'))
mae_val_s2 = mean_absolute_error(y_val_s2, y_val_pred_s2, multioutput='raw_values')

print(f"  R²:   {r2_val_s2.mean():.4f} (±{r2_val_s2.std():.4f})")
print(f"  RMSE: {rmse_val_s2.mean():.4f} (±{rmse_val_s2.std():.4f})")
print(f"  MAE:  {mae_val_s2.mean():.4f} (±{mae_val_s2.std():.4f})")

print("\nTest Set (OOD - Feedtables):")
r2_test_s2 = r2_score(y_ood_s2, y_test_pred_s2, multioutput='raw_values')
rmse_test_s2 = np.sqrt(mean_squared_error(y_ood_s2, y_test_pred_s2, multioutput='raw_values'))
mae_test_s2 = mean_absolute_error(y_ood_s2, y_test_pred_s2, multioutput='raw_values')

print(f"  R²:   {r2_test_s2.mean():.4f} (±{r2_test_s2.std():.4f})")
print(f"  RMSE: {rmse_test_s2.mean():.4f} (±{rmse_test_s2.std():.4f})")
print(f"  MAE:  {mae_test_s2.mean():.4f} (±{mae_test_s2.std():.4f})")

# Per-variable breakdown
print("\nPer-variable R² (Test Set):")
for i, var in enumerate(vars_cibles):
    print(f"  {var:20s}: R² = {r2_test_s2[i]:7.4f}")

print(f"\nOOD Performance Drop: {((r2_val_s2.mean() - r2_test_s2.mean()) / r2_val_s2.mean() * 100):.1f}%")


SCENARIO 2: Evaluation (OOD generalization)

SCENARIO 2 Results:

Train Set:
  R²:   0.9911 (±0.0020)
  RMSE: 41.5157 (±33.4487)
  MAE:  31.1294 (±25.3044)

Validation Set:
  R²:   0.9903 (±0.0018)
  RMSE: 43.1104 (±34.8587)
  MAE:  32.5211 (±26.5978)

Test Set (OOD - Feedtables):
  R²:   0.7467 (±0.2615)
  RMSE: 281.6015 (±243.7589)
  MAE:  140.8519 (±124.8781)

Per-variable R² (Test Set):
  EB (kcal) kcal/kg brut: R² =  0.9666
  ED porc croissance (kcal) kcal/kg brut: R² =  0.9336
  EM porc croissance (kcal) kcal/kg brut: R² =  0.9366
  EN porc croissance (kcal) kcal/kg brut: R² =  0.9526
  EMAn coq (kcal) kcal/kg brut: R² =  0.8839
  EMAn poulet (kcal) kcal/kg brut: R² =  0.8670
  UFL 2018 par kg brut: R² =  0.8386
  UFV 2018 par kg brut: R² =  0.8538
  PDIA 2018 g/kg brut : R² =  0.3906
  PDI 2018 g/kg brut  : R² =  0.3324
  BalProRu 2018 g/kg brut: R² =  0.2579

OOD Performance Drop: 24.6%


## 6. Comparison & Visualization

In [32]:
print("\n" + "="*70)
print("SCENARIO COMPARISON")
print("="*70)

print(f"\nIID vs OOD Performance Drop:")
for i, target in enumerate(vars_cibles):
    r2_iid = df_results_s1.iloc[i]['R2_test']
    r2_ood = df_results_s2.iloc[i]['R2_test']
    drop = r2_iid - r2_ood
    
    if i < 3:
        print(f"  {target[:30]:30s}: IID R²={r2_iid:6.3f} → OOD R²={r2_ood:6.3f} (Δ={drop:+.3f})")
    elif i == 3:
        print(f"  {'...':30s}")

print(f"\nAverage Performance:")
print(f"  Scenario 1 (IID Random):  Mean R² = {df_results_s1['R2_test'].mean():.3f}")
print(f"  Scenario 2 (OOD Test):    Mean R² = {df_results_s2['R2_test'].mean():.3f}")
print(f"  Performance Drop: {(df_results_s1['R2_test'].mean() - df_results_s2['R2_test'].mean()):.3f}")


SCENARIO COMPARISON

IID vs OOD Performance Drop:
  EB (kcal) kcal/kg brut        : IID R²= 0.989 → OOD R²= 0.940 (Δ=+0.050)
  ED porc croissance (kcal) kcal: IID R²= 0.986 → OOD R²= 0.910 (Δ=+0.076)
  EM porc croissance (kcal) kcal: IID R²= 0.986 → OOD R²= 0.920 (Δ=+0.067)
  ...                           

Average Performance:
  Scenario 1 (IID Random):  Mean R² = 0.988
  Scenario 2 (OOD Test):    Mean R² = 0.726
  Performance Drop: 0.261
