In [None]:
# Cell 1: Import libraries
import os
import numpy as np
from PIL import Image
import torch
import torchvision.transforms as T
import random
import re
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import mean_absolute_error, r2_score
from tqdm import tqdm

In [None]:
# Cell 2: Settings
INPUT_FOLDER = "path/to/your/images"  # CHANGE THIS
OUTPUT_FOLDER = "output_balanced"     # CHANGE THIS
IMAGE_SIZE = 224
TARGET_COUNT = 30  # Target images per class
SEED = 42

random.seed(SEED)
np.random.seed(SEED)

In [None]:
# Cell: Clear output folders
import shutil

# Define folders to clear
folders_to_clear = [
    f"{OUTPUT_FOLDER}/augmented",
    f"{OUTPUT_FOLDER}/resized", 
    f"{OUTPUT_FOLDER}/final"
]

# Clear each folder
print("Clearing output folders...")
for folder in folders_to_clear:
    if os.path.exists(folder):
        # Remove all contents
        shutil.rmtree(folder)
        # Recreate empty folder
        os.makedirs(folder)
        print(f"  ✓ Cleared: {folder}")
    else:
        # Create if doesn't exist
        os.makedirs(folder)
        print(f"  ✓ Created: {folder}")

print("\nAll output folders cleared and ready!")

In [None]:
# Cell 3: Create folders
os.makedirs(f"{OUTPUT_FOLDER}/augmented", exist_ok=True)
os.makedirs(f"{OUTPUT_FOLDER}/resized", exist_ok=True)
os.makedirs(f"{OUTPUT_FOLDER}/final", exist_ok=True)

In [None]:
# Cell 4: Basic functions
def get_class_label(filename):
    """Get class from filename (e.g., '0p', '5p', '100p')"""
    match = re.search(r'(\d+)[pP]', filename)
    if match:
        return f"{match.group(1)}p"
    return "unknown"

def get_numeric_label(filename):
    """Extract numeric value from filename for regression"""
    match = re.search(r'(\d+)[pP]', filename)
    if match:
        return float(match.group(1))
    return 0.0

def make_square(image, size):
    """Resize image and pad to square while keeping RGB"""
    # Convert to RGB if not already
    if image.mode != 'RGB':
        image = image.convert('RGB')
    
    # Calculate new size
    w, h = image.size
    scale = size / max(w, h)
    new_w = int(w * scale)
    new_h = int(h * scale)
    
    # Resize
    resized = image.resize((new_w, new_h), Image.BILINEAR)
    
    # Create square with white background
    square = Image.new('RGB', (size, size), (255, 255, 255))
    
    # Paste in center
    x = (size - new_w) // 2
    y = (size - new_h) // 2
    square.paste(resized, (x, y))
    
    return square

In [None]:
# Cell 5: Define augmentations
def aug1(img): return img.rotate(90, expand=True)     # 90° rotation
def aug2(img): return img.rotate(180, expand=True)    # 180° rotation  
def aug3(img): return img.rotate(270, expand=True)    # 270° rotation
def aug4(img): return img.transpose(Image.FLIP_LEFT_RIGHT)
def aug5(img): return img.transpose(Image.FLIP_TOP_BOTTOM)

augmentation_list = [aug1, aug2, aug3, aug4, aug5]

In [None]:
# Cell 6: Group images by class
image_files = [f for f in os.listdir(INPUT_FOLDER) 
               if f.lower().endswith(('.png', '.jpg', '.jpeg', '.tiff', '.bmp'))]

# Group by class
classes = {}
for filename in image_files:
    label = get_class_label(filename)
    if label not in classes:
        classes[label] = []
    classes[label].append(filename)

# Show counts
print("Images per class:")
for label, images in sorted(classes.items()):
    print(f"  {label}: {len(images)} images")

In [None]:
# Cell 7: Copy originals and create augmentations
print("\nProcessing images...")

for class_label, filenames in sorted(classes.items()):
    print(f"\nClass {class_label}:")
    
    # Copy all originals first
    for filename in filenames:
        img = Image.open(os.path.join(INPUT_FOLDER, filename))
        # Keep as RGB (no binary conversion)
        if img.mode != 'RGB':
            img = img.convert('RGB')
        
        base = os.path.splitext(filename)[0]
        img.save(f"{OUTPUT_FOLDER}/augmented/{base}_original.jpg")
    
    # Create augmentations if needed
    current_count = len(filenames)
    if current_count < TARGET_COUNT:
        needed = TARGET_COUNT - current_count
        print(f"  Creating {needed} augmentations...")
        
        for i in range(needed):
            # Pick random original
            source_file = random.choice(filenames)
            img = Image.open(os.path.join(INPUT_FOLDER, source_file))
            # Keep as RGB (no binary conversion)
            if img.mode != 'RGB':
                img = img.convert('RGB')
            
            # Apply 3-4 random augmentations
            num_augs = random.randint(3, 4)
            selected_augs = random.sample(augmentation_list, num_augs)
            
            augmented = img
            for aug_func in selected_augs:
                augmented = aug_func(augmented)
            
            # Save
            base = os.path.splitext(source_file)[0]
            augmented.save(f"{OUTPUT_FOLDER}/augmented/{base}_aug{i}.jpg")

In [None]:
# Cell 8: Resize all images
print("\nResizing all images...")
augmented_files = [f for f in os.listdir(f"{OUTPUT_FOLDER}/augmented") 
                   if f.lower().endswith(('.png', '.jpg', '.jpeg'))]

for i, filename in enumerate(augmented_files):
    if i % 50 == 0:
        print(f"  {i}/{len(augmented_files)}")
    
    img = Image.open(f"{OUTPUT_FOLDER}/augmented/{filename}")
    resized = make_square(img, IMAGE_SIZE)
    
    # Save as JPG to maintain RGB
    base = os.path.splitext(filename)[0]
    resized.save(f"{OUTPUT_FOLDER}/resized/{base}.jpg")

In [None]:
# Cell 9: Normalize for ResNet (ImageNet normalization)
print("\nNormalizing images for ResNet...")
resized_files = [f for f in os.listdir(f"{OUTPUT_FOLDER}/resized") 
                 if f.lower().endswith(('.jpg', '.jpeg', '.png'))]

# ImageNet normalization values for ResNet
normalize = T.Compose([
    T.ToTensor(),
    T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

for i, filename in enumerate(resized_files):
    if i % 50 == 0:
        print(f"  {i}/{len(resized_files)}")
    
    # Load RGB image
    img = Image.open(f"{OUTPUT_FOLDER}/resized/{filename}")
    
    # Apply ResNet normalization
    tensor = normalize(img)
    
    # Save tensor
    base = os.path.splitext(filename)[0]
    np.save(f"{OUTPUT_FOLDER}/final/{base}.npy", tensor.numpy())
    
    # Save image for viewing
    img.save(f"{OUTPUT_FOLDER}/final/{base}.jpg")

In [None]:
# Cell 10: Check final counts
print("\nFinal image count per class:")
final_counts = {}
for filename in os.listdir(f"{OUTPUT_FOLDER}/final"):
    if filename.lower().endswith(('.jpg', '.jpeg', '.png')):
        label = get_class_label(filename)
        final_counts[label] = final_counts.get(label, 0) + 1

for label, count in sorted(final_counts.items()):
    print(f"  {label}: {count} images")

In [None]:
# Cell 11: Simple Model Setup with Dropout
import torch.nn as nn
import torchvision.models as models

# Create ResNet model for regression with dropout
model = models.resnet18(pretrained=True)
# Add dropout before final layer
model.fc = nn.Sequential(
    nn.Dropout(0.5),
    nn.Linear(model.fc.in_features, 1)
)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

# Loss and optimizer with stronger weight decay
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=0.01)  # Increased weight decay

print(f"Model ready on {device} with dropout and weight decay")

In [None]:
# Cell 12: Dataset with Original/Augmented Split
from torch.utils.data import Dataset, DataLoader

class ImageDataset(Dataset):
    def __init__(self, folder, file_list=None):
        if file_list is None:
            self.files = [f for f in os.listdir(folder) if f.endswith('.npy')]
        else:
            self.files = file_list
        self.folder = folder
    
    def __len__(self):
        return len(self.files)
    
    def __getitem__(self, idx):
        file = self.files[idx]
        tensor = torch.from_numpy(np.load(f"{self.folder}/{file}")).float()
        label = torch.tensor(get_numeric_label(file), dtype=torch.float32)
        return tensor, label

# Get all .npy files
all_files = [f for f in os.listdir(f"{OUTPUT_FOLDER}/final") if f.endswith('.npy')]

# Separate original and augmented files
original_files = [f for f in all_files if '_original' in f]
augmented_files = [f for f in all_files if '_aug' in f]

print(f"Found {len(original_files)} original files, {len(augmented_files)} augmented files")

# Split original files for validation (20% of originals)
from sklearn.model_selection import train_test_split
orig_train_files, orig_val_files = train_test_split(original_files, test_size=0.2, random_state=SEED)

print(f"Validation will use {len(orig_val_files)} original images")
print(f"Training will start with {len(orig_train_files)} original images")

# Remove augmentations of validation originals from training
# Extract base names (without _original suffix) from validation files
val_base_names = set()
for val_file in orig_val_files:
    # Remove .npy and _original to get base name
    base_name = val_file.replace('.npy', '').replace('_original', '')
    val_base_names.add(base_name)

print(f"Will remove augmentations for {len(val_base_names)} base images from training")

# Filter out augmentations that correspond to validation originals
filtered_aug_files = []
removed_count = 0
for aug_file in augmented_files:
    # Extract base name from augmentation file (remove _augX.npy)
    base_name = aug_file.replace('.npy', '')
    # Remove _aug followed by digits
    import re
    base_name = re.sub(r'_aug\d+$', '', base_name)
    
    if base_name not in val_base_names:
        filtered_aug_files.append(aug_file)
    else:
        removed_count += 1

print(f"Removed {removed_count} augmented files to prevent data leakage")
print(f"Training will use {len(orig_train_files)} originals + {len(filtered_aug_files)} augmentations = {len(orig_train_files) + len(filtered_aug_files)} total")

# Create final train and validation file lists
train_files = orig_train_files + filtered_aug_files
val_files = orig_val_files

# Create datasets
train_dataset = ImageDataset(f"{OUTPUT_FOLDER}/final", train_files)
val_dataset = ImageDataset(f"{OUTPUT_FOLDER}/final", val_files)

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16)

print(f"\nFinal dataset split:")
print(f"  Training: {len(train_dataset)} samples (originals + clean augmentations)")
print(f"  Validation: {len(val_dataset)} samples (originals only)")
print(f"  No data leakage: validation originals and their augmentations are separate")

In [None]:
# Cell 13: K-Fold Cross Validation with Early Stopping
from sklearn.metrics import mean_absolute_error, r2_score
from sklearn.model_selection import KFold
from tqdm import tqdm
import torch.nn as nn
import torchvision.models as models

def round_to_interval(values, interval=5):
    """Round values to nearest interval (e.g., 5, 10, 15, etc.)"""
    return np.round(np.array(values) / interval) * interval

def create_model():
    """Create a fresh model instance"""
    model = models.resnet18(pretrained=True)
    model.fc = nn.Sequential(
        nn.Dropout(0.5),
        nn.Linear(model.fc.in_features, 1)
    )
    return model.to(device)

def train_fold(train_loader, val_loader, fold_num, k_folds):
    """Train a single fold"""
    # Create fresh model and optimizer for each fold
    model = create_model()
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=0.01)
    
    # Early stopping parameters
    best_val_loss = float('inf')
    patience = 10
    patience_counter = 0
    best_model_state = None
    
    # Maximum epochs
    max_epochs = 50
    
    # Training history for this fold
    fold_history = {'train_loss': [], 'val_loss': [], 'train_mae': [], 'val_mae': [], 'train_r2': [], 'val_r2': []}
    
    # Progress bar for epochs
    epoch_pbar = tqdm(range(max_epochs), desc=f'Fold {fold_num}/{k_folds}', leave=False)
    
    for epoch in epoch_pbar:
        # Train
        model.train()
        train_loss, train_preds, train_targets = 0, [], []
        
        for data, target in train_loader:
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data).view(-1)
            target = target.view(-1)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
            train_preds.extend(output.detach().cpu().numpy())
            train_targets.extend(target.cpu().numpy())
        
        # Validate
        model.eval()
        val_loss, val_preds, val_targets = 0, [], []
        with torch.no_grad():
            for data, target in val_loader:
                data, target = data.to(device), target.to(device)
                output = model(data).view(-1)
                target = target.view(-1)
                val_loss += criterion(output, target).item()
                val_preds.extend(output.cpu().numpy())
                val_targets.extend(target.cpu().numpy())
        
        # Calculate metrics
        train_mae = mean_absolute_error(train_targets, train_preds)
        val_mae = mean_absolute_error(val_targets, val_preds)
        train_r2 = r2_score(train_targets, train_preds)
        val_r2 = r2_score(val_targets, val_preds)
        
        avg_val_loss = val_loss/len(val_loader)
        fold_history['train_loss'].append(train_loss/len(train_loader))
        fold_history['val_loss'].append(avg_val_loss)
        fold_history['train_mae'].append(train_mae)
        fold_history['val_mae'].append(val_mae)
        fold_history['train_r2'].append(train_r2)
        fold_history['val_r2'].append(val_r2)
        
        # Update progress bar
        epoch_pbar.set_postfix({
            'Val Loss': f'{avg_val_loss:.3f}',
            'Val MAE': f'{val_mae:.1f}',
            'Val R²': f'{val_r2:.3f}'
        })
        
        # Early stopping check
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            patience_counter = 0
            best_model_state = model.state_dict().copy()
        else:
            patience_counter += 1
            if patience_counter >= patience:
                epoch_pbar.set_description(f'Fold {fold_num}/{k_folds} (Early Stop)')
                break
    
    # Load best model
    if best_model_state is not None:
        model.load_state_dict(best_model_state)
    
    # Final validation metrics
    model.eval()
    final_preds, final_targets = [], []
    with torch.no_grad():
        for data, target in val_loader:
            data, target = data.to(device), target.to(device)
            output = model(data).view(-1)
            target = target.view(-1)
            final_preds.extend(output.cpu().numpy())
            final_targets.extend(target.cpu().numpy())
    
    # Calculate final metrics with rounding
    final_preds_rounded = round_to_interval(final_preds, 5)
    final_mae = mean_absolute_error(final_targets, final_preds)
    final_r2 = r2_score(final_targets, final_preds)
    final_rounded_mae = mean_absolute_error(final_targets, final_preds_rounded)
    
    return {
        'model': model,
        'history': fold_history,
        'final_mae': final_mae,
        'final_r2': final_r2,
        'final_rounded_mae': final_rounded_mae,
        'best_val_loss': best_val_loss
    }

# K-Fold Cross Validation Setup
k_folds = 5
kfold = KFold(n_splits=k_folds, shuffle=True, random_state=SEED)

# Get all training files (combine original train files and clean augmentations)
all_train_files = orig_train_files + filtered_aug_files
fold_results = []

print(f"Starting {k_folds}-Fold Cross Validation")
print(f"Total training samples: {len(all_train_files)}")
print(f"Validation samples (held out): {len(orig_val_files)}")
print("="*60)

# Progress bar for folds
fold_pbar = tqdm(enumerate(kfold.split(all_train_files)), total=k_folds, desc='Cross Validation')

for fold, (train_idx, val_idx) in fold_pbar:
    fold_num = fold + 1
    fold_pbar.set_description(f'Preparing Fold {fold_num}/{k_folds}')
    
    # Split data for this fold
    fold_train_files = [all_train_files[i] for i in train_idx]
    fold_val_files = [all_train_files[i] for i in val_idx]
    
    # Create datasets for this fold
    fold_train_dataset = ImageDataset(f"{OUTPUT_FOLDER}/final", fold_train_files)
    fold_val_dataset = ImageDataset(f"{OUTPUT_FOLDER}/final", fold_val_files)
    
    fold_train_loader = DataLoader(fold_train_dataset, batch_size=16, shuffle=True)
    fold_val_loader = DataLoader(fold_val_dataset, batch_size=16)
    
    # Train this fold
    fold_result = train_fold(fold_train_loader, fold_val_loader, fold_num, k_folds)
    fold_results.append(fold_result)
    
    # Update main progress bar
    fold_pbar.set_postfix({
        'MAE': f'{fold_result["final_mae"]:.2f}',
        'R²': f'{fold_result["final_r2"]:.3f}',
        'Rounded MAE': f'{fold_result["final_rounded_mae"]:.2f}'
    })

print("\n" + "="*60)
print("K-FOLD CROSS VALIDATION RESULTS")
print("="*60)

# Display individual fold results
mae_scores = []
r2_scores = []
rounded_mae_scores = []

for i, result in enumerate(fold_results):
    mae_scores.append(result['final_mae'])
    r2_scores.append(result['final_r2'])
    rounded_mae_scores.append(result['final_rounded_mae'])
    
    print(f"Fold {i+1}:")
    print(f"  MAE (Raw):      {result['final_mae']:.3f}")
    print(f"  R²:             {result['final_r2']:.3f}")
    print(f"  MAE (Rounded):  {result['final_rounded_mae']:.3f}")
    print(f"  Best Val Loss:  {result['best_val_loss']:.3f}")
    print()

# Calculate and display averages
print("AVERAGE PERFORMANCE ACROSS ALL FOLDS:")
print("="*60)
print(f"Mean MAE (Raw):      {np.mean(mae_scores):.3f} ± {np.std(mae_scores):.3f}")
print(f"Mean R²:             {np.mean(r2_scores):.3f} ± {np.std(r2_scores):.3f}")
print(f"Mean MAE (Rounded):  {np.mean(rounded_mae_scores):.3f} ± {np.std(rounded_mae_scores):.3f}")
print("="*60)

# Save the best model (highest R²)
best_fold_idx = np.argmax(r2_scores)
best_model = fold_results[best_fold_idx]['model']
torch.save(best_model.state_dict(), f'{OUTPUT_FOLDER}/best_kfold_model.pth')
print(f"Best model (Fold {best_fold_idx+1}) saved to {OUTPUT_FOLDER}/best_kfold_model.pth")

# Store results for plotting
cv_results = {
    'fold_results': fold_results,
    'mae_scores': mae_scores,
    'r2_scores': r2_scores,
    'rounded_mae_scores': rounded_mae_scores,
    'best_model': best_model,
    'best_fold': best_fold_idx + 1
}

In [None]:
# Cell 14: K-Fold Cross Validation Results Visualization
import matplotlib.pyplot as plt

# Plot K-Fold results
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 10))

# Individual fold performance
folds = list(range(1, len(cv_results['mae_scores']) + 1))

ax1.bar(folds, cv_results['mae_scores'], alpha=0.7, color='skyblue', edgecolor='black')
ax1.axhline(y=np.mean(cv_results['mae_scores']), color='red', linestyle='--', label=f'Mean: {np.mean(cv_results["mae_scores"]):.3f}')
ax1.set_title('MAE (Raw) by Fold')
ax1.set_xlabel('Fold')
ax1.set_ylabel('MAE')
ax1.legend()
ax1.grid(True, alpha=0.3)

ax2.bar(folds, cv_results['r2_scores'], alpha=0.7, color='lightgreen', edgecolor='black')
ax2.axhline(y=np.mean(cv_results['r2_scores']), color='red', linestyle='--', label=f'Mean: {np.mean(cv_results["r2_scores"]):.3f}')
ax2.set_title('R² by Fold')
ax2.set_xlabel('Fold')
ax2.set_ylabel('R²')
ax2.legend()
ax2.grid(True, alpha=0.3)

ax3.bar(folds, cv_results['rounded_mae_scores'], alpha=0.7, color='coral', edgecolor='black')
ax3.axhline(y=np.mean(cv_results['rounded_mae_scores']), color='red', linestyle='--', label=f'Mean: {np.mean(cv_results["rounded_mae_scores"]):.3f}')
ax3.set_title('MAE (Rounded to 5s) by Fold')
ax3.set_xlabel('Fold')
ax3.set_ylabel('MAE')
ax3.legend()
ax3.grid(True, alpha=0.3)

# Training curves for best fold
best_fold_history = cv_results['fold_results'][cv_results['best_fold']-1]['history']
epochs = range(1, len(best_fold_history['train_loss']) + 1)

ax4.plot(epochs, best_fold_history['train_loss'], label='Train Loss', alpha=0.8)
ax4.plot(epochs, best_fold_history['val_loss'], label='Val Loss', alpha=0.8)
ax4.set_title(f'Training Curves - Best Fold ({cv_results["best_fold"]})')
ax4.set_xlabel('Epoch')
ax4.set_ylabel('Loss')
ax4.legend()
ax4.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# Performance comparison plot
plt.figure(figsize=(12, 6))

# Box plots for metric distributions
metrics = [cv_results['mae_scores'], cv_results['r2_scores'], cv_results['rounded_mae_scores']]
metric_names = ['MAE (Raw)', 'R²', 'MAE (Rounded to 5s)']

plt.subplot(1, 2, 1)
box_plot = plt.boxplot(metrics, labels=metric_names, patch_artist=True)
colors = ['skyblue', 'lightgreen', 'coral']
for patch, color in zip(box_plot['boxes'], colors):
    patch.set_facecolor(color)
    patch.set_alpha(0.7)

plt.title('K-Fold Performance Distribution')
plt.ylabel('Score')
plt.grid(True, alpha=0.3)

# Statistical summary
plt.subplot(1, 2, 2)
means = [np.mean(scores) for scores in metrics]
stds = [np.std(scores) for scores in metrics]

bars = plt.bar(metric_names, means, yerr=stds, alpha=0.7, color=colors, capsize=5, edgecolor='black')
plt.title('Mean Performance ± Std Dev')
plt.ylabel('Score')
plt.xticks(rotation=45)
plt.grid(True, alpha=0.3)

# Add value labels on bars
for bar, mean, std in zip(bars, means, stds):
    plt.text(bar.get_x() + bar.get_width()/2., bar.get_height() + std + 0.01,
             f'{mean:.3f}±{std:.3f}', ha='center', va='bottom')

plt.tight_layout()
plt.show()

print(f"\nBest performing model: Fold {cv_results['best_fold']}")
print(f"Best R²: {max(cv_results['r2_scores']):.3f}")
print(f"Corresponding MAE: {cv_results['mae_scores'][cv_results['best_fold']-1]:.3f}")
print(f"Corresponding Rounded MAE: {cv_results['rounded_mae_scores'][cv_results['best_fold']-1]:.3f}")

# Test the best model on held-out validation set
print(f"\nTesting best model on held-out validation set...")
best_model = cv_results['best_model']
best_model.eval()

final_test_preds, final_test_targets = [], []
with torch.no_grad():
    for data, target in val_loader:  # Using the original held-out validation set
        data, target = data.to(device), target.to(device)
        pred = best_model(data).view(-1)
        target = target.view(-1)
        
        final_test_preds.extend(pred.cpu().numpy())
        final_test_targets.extend(target.cpu().numpy())

# Final test metrics
final_test_preds = np.array(final_test_preds)
final_test_targets = np.array(final_test_targets)
final_test_preds_rounded = round_to_interval(final_test_preds, 5)

test_mae = mean_absolute_error(final_test_targets, final_test_preds)
test_r2 = r2_score(final_test_targets, final_test_preds)
test_rounded_mae = mean_absolute_error(final_test_targets, final_test_preds_rounded)

print("="*50)
print("FINAL TEST SET PERFORMANCE (HELD-OUT VALIDATION)")
print("="*50)
print(f"Test MAE (Raw):      {test_mae:.3f}")
print(f"Test R²:             {test_r2:.3f}")
print(f"Test MAE (Rounded):  {test_rounded_mae:.3f}")
print("="*50)

In [None]:
# Cell 15: Final Test Set Error Analysis
import matplotlib.pyplot as plt
import numpy as np

# Use the best model's predictions on held-out test set
print("Analyzing final test set predictions...")
all_preds = final_test_preds
all_targets = final_test_targets
all_preds_rounded = final_test_preds_rounded

print(f"Analyzing {len(all_preds)} test predictions...")

# Calculate errors
abs_errors = np.abs(all_preds_rounded - all_targets)
pct_errors = np.where(all_targets != 0, 
                      abs_errors / all_targets * 100, 
                      abs_errors)

# Create plots
fig, axes = plt.subplots(2, 2, figsize=(12, 8))

# Error distribution
axes[0,0].hist(pct_errors, bins=20, alpha=0.7, color='skyblue', edgecolor='black')
axes[0,0].set_title('Test Set Prediction Errors (%)')
axes[0,0].set_xlabel('Percentage Error')
axes[0,0].grid(True, alpha=0.3)

# Predictions vs actual
axes[0,1].scatter(all_targets, all_preds_rounded, alpha=0.6, color='coral')
min_val, max_val = min(all_targets), max(all_targets)
axes[0,1].plot([min_val, max_val], [min_val, max_val], 'r--', label='Perfect')
axes[0,1].set_xlabel('Actual')
axes[0,1].set_ylabel('Predicted')
axes[0,1].set_title('Test Set: Predictions vs Actual')
axes[0,1].legend()
axes[0,1].grid(True, alpha=0.3)

# Absolute errors
axes[1,0].hist(abs_errors, bins=15, alpha=0.7, color='lightgreen', edgecolor='black')
axes[1,0].set_title('Test Set Absolute Errors')
axes[1,0].set_xlabel('Error Magnitude')
axes[1,0].grid(True, alpha=0.3)

# Error stats
stats = {
    'Mean %': np.mean(pct_errors),
    'Median %': np.median(pct_errors),
    '90th %tile': np.percentile(pct_errors, 90),
    'Perfect': np.sum(abs_errors == 0),
    'Max %': np.max(pct_errors)
}

colors = ['lightcoral', 'lightblue', 'gold', 'plum', 'lightgreen']
bars = axes[1,1].bar(stats.keys(), stats.values(), color=colors, alpha=0.7)
axes[1,1].set_title('Test Set Error Statistics')
axes[1,1].tick_params(axis='x', rotation=45)

# Add values on bars
for bar, val in zip(bars, stats.values()):
    axes[1,1].text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.5,
                   f'{val:.1f}', ha='center', va='bottom')

plt.tight_layout()
plt.show()

# Summary
print(f"\n{'='*50}")
print(f"FINAL TEST SET ERROR ANALYSIS")
print(f"{'='*50}")
print(f"Total test samples: {len(all_preds)}")
print(f"Mean error: {np.mean(pct_errors):.1f}%")
print(f"Median error: {np.median(pct_errors):.1f}%")
print(f"90% within: {np.percentile(pct_errors, 90):.1f}% error")
print(f"Perfect: {np.sum(abs_errors == 0)}/{len(abs_errors)} ({np.sum(abs_errors == 0)/len(abs_errors)*100:.1f}%)")
print(f"Worst error: {np.max(pct_errors):.1f}%")

# Sample predictions
print(f"\nSample Test Predictions:")
for i in range(min(8, len(all_preds))):
    print(f"Raw: {all_preds[i]:.1f} → Rounded: {all_preds_rounded[i]:.0f}, Actual: {all_targets[i]:.0f}")

print(f"{'='*50}")