# Multi-Attribute Facial Recognition Training Pipeline

This notebook trains multiple backbone architectures for facial attribute prediction on the ISGD dataset (33 attributes, 320x320 images).

In [1]:
# Import Required Libraries
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
import timm
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    roc_auc_score, accuracy_score, f1_score, 
    precision_score, recall_score, confusion_matrix
)
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.2f} GB")

Using device: cuda
GPU: NVIDIA GeForce RTX 5050 Laptop GPU
Memory: 7.96 GB


## Configuration - Select Your Backbone

**Available Backbones:**
- `convnext_tiny` - ConvNeXt Tiny (Efficient, modern CNN)
- `convnext_base` - ConvNeXt Base (Larger ConvNeXt)
- `resnet34` - ResNet34 (Lightweight classic)
- `resnet50` - ResNet50 (Standard benchmark)
- `resnext50_32x4d` - ResNeXt50 (Cardinality-based)
- `resnext101_32x8d` - ResNeXt101 (Higher capacity)
- `efficientnet_b0` - EfficientNet B0 (Efficient scaling)
- `efficientnet_b2` - EfficientNet B2 (Better accuracy)
- `mobilenetv2` - MobileNetV2 (Lightweight mobile)
- `vit_base_patch16_224` - Vision Transformer Base (Transformer-based)
- `swin_tiny_patch4_window7_224` - Swin Transformer Tiny (Hierarchical transformer)
- `swin_base_patch4_window7_224` - Swin Transformer Base (Larger version)
- `arcface_resnet50` - ArcFace with ResNet50 backbone (Face recognition specialized)
- `adaface_resnet50` - AdaFace with ResNet50 backbone (Adaptive face recognition)
- `regnety_016` / `regnetx_032` etc. - RegNet family (Design space optimized)
- `replknet_31b` - RepLKNet (Large kernel CNN)
- `inception_next_small` - InceptionNext (Hybrid inception-style)
- `focalnet_tiny_srf` - FocalNet (Focal modulation)
- `focalnet_base_lrf` - FocalNet Base

 > Tip: For RegNet you can choose any timm name starting with `regnety_` or `regnetx_`. For InceptionNext use names like `inception_next_tiny`, `inception_next_small`, `inception_next_base`. For RepLKNet, use available timm variants like `replknet_31b`. For FocalNet, use variants such as `focalnet_tiny_srf`, `focalnet_small_srf`, `focalnet_base_lrf`.

In [2]:
# ============================================
# CONFIGURATION - CHANGE THIS TO SELECT BACKBONE
# ============================================
BACKBONE = 'efficientnet_b0'  # Change this to any backbone listed above

# Training hyperparameters
BATCH_SIZE = 10
NUM_EPOCHS = 3
LEARNING_RATE = 0.0001
NUM_WORKERS = 0
IMAGE_SIZE = 320
VAL_SPLIT = 0.2
RANDOM_SEED = 42

# Paths
DATA_DIR = './ISGD'
CSV_PATH = os.path.join(DATA_DIR, 'Attributes.csv')
IMAGES_DIR = os.path.join(DATA_DIR, 'Images')
MODEL_SAVE_DIR = './Models'
RESULTS_DIR = './Results'

# Create directories
os.makedirs(MODEL_SAVE_DIR, exist_ok=True)
os.makedirs(RESULTS_DIR, exist_ok=True)

print(f"Selected Backbone: {BACKBONE}")
print(f"Training Configuration:")
print(f"  - Batch Size: {BATCH_SIZE}")
print(f"  - Epochs: {NUM_EPOCHS}")
print(f"  - Learning Rate: {LEARNING_RATE}")
print(f"  - Image Size: {IMAGE_SIZE}x{IMAGE_SIZE}")

Selected Backbone: efficientnet_b0
Training Configuration:
  - Batch Size: 10
  - Epochs: 3
  - Learning Rate: 0.0001
  - Image Size: 320x320


## Dataset Loading and Preprocessing

In [3]:
# Custom Dataset Class
class ISGDDataset(Dataset):
    def __init__(self, df, images_dir, transform=None):
        self.df = df.reset_index(drop=True)
        self.images_dir = images_dir
        self.transform = transform
        
        # Get attribute columns (all except image_id)
        self.attributes = [col for col in df.columns if col != 'image_id']
        self.num_attributes = len(self.attributes)
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_name = row['image_id']
        img_path = os.path.join(self.images_dir, img_name)
        
        # Load image
        try:
            image = Image.open(img_path).convert('RGB')
        except:
            # Handle different extensions
            base_name = os.path.splitext(img_name)[0]
            for ext in ['.jpg', '.jpeg', '.png']:
                alt_path = os.path.join(self.images_dir, base_name + ext)
                if os.path.exists(alt_path):
                    image = Image.open(alt_path).convert('RGB')
                    break
        
        if self.transform:
            image = self.transform(image)
        
        # Get labels for all attributes
        # Convert to numeric and handle any remaining non-numeric values
        label_values = pd.to_numeric(row[self.attributes], errors='coerce').fillna(0).astype(np.float32)
        labels = torch.from_numpy(label_values.values)
        
        return image, labels

# Load dataset
print("Loading dataset...")
df = pd.read_csv(CSV_PATH)
print(f"Dataset shape: {df.shape}")
print(f"Number of attributes: {len(df.columns) - 1}")

# Get attribute columns (all except image_id)
attribute_cols = [col for col in df.columns if col != 'image_id']

# Convert all attribute columns to numeric, handling any errors
for col in attribute_cols:
    df[col] = pd.to_numeric(df[col], errors='coerce')

# Fill any NaN values with 0
df[attribute_cols] = df[attribute_cols].fillna(0)

# Convert to integers (0 or 1 for binary classification)
df[attribute_cols] = df[attribute_cols].astype(int)

print(f"\nAttributes: {', '.join(attribute_cols)}")

# Check for missing values
print(f"\nMissing values after cleaning: {df.isnull().sum().sum()}")

# Verify data types
print(f"\nData types check:")
print(f"  All attributes are numeric: {df[attribute_cols].dtypes.apply(lambda x: x in ['int32', 'int64', 'float32', 'float64']).all()}")

# Split dataset
train_df, val_df = train_test_split(df, test_size=VAL_SPLIT, random_state=RANDOM_SEED)
print(f"\nTrain set: {len(train_df)} samples")
print(f"Validation set: {len(val_df)} samples")

Loading dataset...
Dataset shape: (30141, 34)
Number of attributes: 33

Attributes: attractive, blurry_image, sharp_jawline, high_cheekbones, smiling, bald, receeding_hairline, long_hair, curly_hair, grey_hair, black_hair, has_beard, patchy_beard, has_mustache, well_groomed, has_makeup, wearing_glasses, wearing_hat, clear_skin, dark_circles, oily_skin, thick_eyebrow, big_eyes, big_lips, sharp_nose, adult, old, mouth_open, male, double_chin, veil, wrinkle, chubby

Missing values after cleaning: 0

Data types check:
  All attributes are numeric: True

Train set: 24112 samples
Validation set: 6029 samples


In [4]:
# Data Augmentation and Transforms
train_transform = transforms.Compose([
    transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
    transforms.RandomHorizontalFlip(p=1.0),  # compulsory horizontal flip
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

val_transform = transforms.Compose([
    transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Frequency-based balancing and augmentation
MIN_POSITIVES = 3000
print("\nComputing attribute frequencies in training set...")
pos_counts = train_df[attribute_cols].sum(axis=0)
print("Positive counts per attribute before augmentation:")
print(pos_counts)

augmented_rows = []
for attr in attribute_cols:
    current_pos = int(pos_counts[attr])
    if current_pos >= MIN_POSITIVES or current_pos == 0:
        continue  # already enough or no positives to augment
    needed = MIN_POSITIVES - current_pos
    pos_samples = train_df[train_df[attr] == 1]
    if len(pos_samples) == 0:
        continue
    print(f"Augmenting attribute '{attr}' from {current_pos} to {MIN_POSITIVES} using {len(pos_samples)} base images...")
    # Repeat positive samples to reach at least `needed` new ones
    reps = int(np.ceil(needed / len(pos_samples)))
    pos_repeated = pd.concat([pos_samples] * reps, ignore_index=True).iloc[:needed]
    # Mark that these will be augmented (images will be modified on-the-fly via transform)
    augmented_rows.append(pos_repeated)

if augmented_rows:
    augmented_df = pd.concat(augmented_rows, ignore_index=True)
    # Optionally shuffle to mix with originals
    augmented_df = augmented_df.sample(frac=1.0, random_state=RANDOM_SEED).reset_index(drop=True)
    print(f"Created {len(augmented_df)} augmented metadata rows.")
    # Combine original training data with augmented copies
    balanced_train_df = pd.concat([train_df, augmented_df], ignore_index=True)
    balanced_train_df = balanced_train_df.sample(frac=1.0, random_state=RANDOM_SEED).reset_index(drop=True)
else:
    print("No augmentation needed based on MIN_POSITIVES setting.")
    balanced_train_df = train_df.copy()

print(f"Balanced train set size: {len(balanced_train_df)} samples")

# Create datasets
train_dataset = ISGDDataset(balanced_train_df, IMAGES_DIR, transform=train_transform)
val_dataset = ISGDDataset(val_df, IMAGES_DIR, transform=val_transform)

# Create dataloaders
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, 
                          shuffle=True, num_workers=NUM_WORKERS, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, 
                        shuffle=False, num_workers=NUM_WORKERS, pin_memory=True)

print(f"Train batches: {len(train_loader)}")
print(f"Validation batches: {len(val_loader)}")

# Get number of attributes
num_attributes = train_dataset.num_attributes
print(f"\nTraining for {num_attributes} attributes")


Computing attribute frequencies in training set...
Positive counts per attribute before augmentation:
attractive            11751
blurry_image           1376
sharp_jawline          6532
high_cheekbones        9046
smiling               11943
bald                   1262
receeding_hairline     1532
long_hair              8102
curly_hair              220
grey_hair              1414
black_hair            20416
has_beard              6573
patchy_beard            814
has_mustache           8044
well_groomed          17090
has_makeup             7222
wearing_glasses        2577
wearing_hat            1935
clear_skin            18470
dark_circles           9728
oily_skin              8297
thick_eyebrow         15880
big_eyes               5572
big_lips                695
sharp_nose             1631
adult                 21534
old                    1913
mouth_open             9723
male                  14189
double_chin             381
veil                    465
wrinkle                5153
c

## Model Architecture

In [5]:
# ArcFace and AdaFace implementations
class ArcFaceBackbone(nn.Module):
    """ArcFace backbone using ResNet50"""
    def __init__(self, pretrained=True):
        super(ArcFaceBackbone, self).__init__()
        resnet = models.resnet50(pretrained=pretrained)
        # Remove FC layer
        self.features = nn.Sequential(*list(resnet.children())[:-1])
        self.embedding_size = 512
        
    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        return x

class AdaFaceBackbone(nn.Module):
    """AdaFace backbone using ResNet50 with adaptive margin"""
    def __init__(self, pretrained=True):
        super(AdaFaceBackbone, self).__init__()
        resnet = models.resnet50(pretrained=pretrained)
        # Remove FC layer and add adaptive feature extraction
        self.features = nn.Sequential(*list(resnet.children())[:-1])
        self.embedding_size = 512
        
    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        # Normalize features (characteristic of AdaFace)
        x = nn.functional.normalize(x, p=2, dim=1)
        return x

# Multi-Attribute Classification Model
class MultiAttributeModel(nn.Module):
    def __init__(self, backbone_name, num_attributes, pretrained=True):
        super(MultiAttributeModel, self).__init__()
        self.backbone_name = backbone_name
        
        # Load backbone based on selection
        if 'convnext' in backbone_name:
            self.backbone = timm.create_model(backbone_name, pretrained=pretrained)
            in_features = self.backbone.head.fc.in_features
            self.backbone.head.fc = nn.Identity()
        
        elif 'arcface' in backbone_name:
            self.backbone = ArcFaceBackbone(pretrained=pretrained)
            in_features = 2048  # ResNet50 feature dimension
        
        elif 'adaface' in backbone_name:
            self.backbone = AdaFaceBackbone(pretrained=pretrained)
            in_features = 2048  # ResNet50 feature dimension
        
        elif 'resnext' in backbone_name:
            # Torchvision ResNeXt variants (e.g., resnext50_32x4d)
            if hasattr(models, backbone_name):
                self.backbone = getattr(models, backbone_name)(pretrained=pretrained)
            else:
                raise ValueError(f"Unsupported ResNeXt variant: {backbone_name}")
            in_features = self.backbone.fc.in_features
            self.backbone.fc = nn.Identity()
        
        elif 'resnet' in backbone_name:
            if backbone_name == 'resnet34':
                self.backbone = models.resnet34(pretrained=pretrained)
            elif backbone_name == 'resnet50':
                self.backbone = models.resnet50(pretrained=pretrained)
            else:
                raise ValueError(f"Unsupported ResNet variant: {backbone_name}")
            in_features = self.backbone.fc.in_features
            self.backbone.fc = nn.Identity()
        
        elif 'efficientnet' in backbone_name:
            self.backbone = timm.create_model(backbone_name, pretrained=pretrained)
            in_features = self.backbone.classifier.in_features
            self.backbone.classifier = nn.Identity()
        
        elif 'mobilenetv2' in backbone_name:
            self.backbone = models.mobilenet_v2(pretrained=pretrained)
            in_features = self.backbone.classifier[1].in_features
            self.backbone.classifier = nn.Identity()
        
        elif 'vit' in backbone_name:
            self.backbone = timm.create_model(backbone_name, pretrained=pretrained, img_size=IMAGE_SIZE)
            in_features = self.backbone.head.in_features
            self.backbone.head = nn.Identity()
        
        elif 'swin' in backbone_name:
            self.backbone = timm.create_model(backbone_name, pretrained=pretrained, img_size=IMAGE_SIZE)
            in_features = self.backbone.head.in_features
            self.backbone.head = nn.Identity()
        
        elif ('regnet' in backbone_name
              or 'replknet' in backbone_name
              or 'inception_next' in backbone_name
              or 'focalnet' in backbone_name):
            # Handle timm models with reset_classifier available
            self.backbone = timm.create_model(backbone_name, pretrained=pretrained)
            
            # Reset classifier first to ensure we get the correct feature dimension
            if hasattr(self.backbone, 'reset_classifier'):
                self.backbone.reset_classifier(0)
            elif hasattr(self.backbone, 'classifier'):
                self.backbone.classifier = nn.Identity()
            elif hasattr(self.backbone, 'head'):
                self.backbone.head = nn.Identity()
            
            # Now get the actual feature dimension by doing a forward pass
            with torch.no_grad():
                dummy_input = torch.randn(1, 3, IMAGE_SIZE, IMAGE_SIZE)
                if torch.cuda.is_available():
                    dummy_input = dummy_input.cuda()
                    self.backbone = self.backbone.cuda()
                dummy_output = self.backbone(dummy_input)
                in_features = dummy_output.shape[1]
            
            print(f"Detected {in_features} features from {backbone_name}")
        
        else:
            raise ValueError(f"Unsupported backbone: {backbone_name}")
        
        # Classification head for multi-attribute prediction
        self.classifier = nn.Sequential(
            nn.Dropout(0.3),
            nn.Linear(in_features, 512),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(512, num_attributes)
        )
        
    def forward(self, x):
        features = self.backbone(x)
        output = self.classifier(features)
        return output

# Create model
print(f"\nCreating {BACKBONE} model...")
model = MultiAttributeModel(BACKBONE, num_attributes, pretrained=True)
model = model.to(device)

# Count parameters
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Total parameters: {total_params:,}")
print(f"Trainable parameters: {trainable_params:,}")


Creating efficientnet_b0 model...
Total parameters: 4,680,349
Trainable parameters: 4,680,349
Total parameters: 4,680,349
Trainable parameters: 4,680,349


## Training Setup

In [6]:
# Loss function and optimizer
criterion = nn.BCEWithLogitsLoss()  # Binary Cross Entropy for multi-label classification
optimizer = optim.AdamW(model.parameters(), lr=LEARNING_RATE, weight_decay=0.01)

# Learning rate scheduler
scheduler = optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode='min', factor=0.5, patience=3
)

# Training history
history = {
    'train_loss': [],
    'val_loss': [],
    'val_acc': []
}

print("Training setup complete!")

Training setup complete!


## Training Loop

In [7]:
# Training function
def train_epoch(model, dataloader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    
    pbar = tqdm(dataloader, desc='Training')
    for images, labels in pbar:
        images = images.to(device)
        labels = labels.to(device)
        
        # Forward pass
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # Backward pass
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        pbar.set_postfix({'loss': f'{loss.item():.4f}'})
    
    return running_loss / len(dataloader)

# Validation function
def validate_epoch(model, dataloader, criterion, device):
    model.eval()
    running_loss = 0.0
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        pbar = tqdm(dataloader, desc='Validation')
        for images, labels in pbar:
            images = images.to(device)
            labels = labels.to(device)
            
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            running_loss += loss.item()
            
            # Store predictions and labels
            preds = torch.sigmoid(outputs) > 0.5
            all_preds.append(preds.cpu().numpy())
            all_labels.append(labels.cpu().numpy())
            
            pbar.set_postfix({'loss': f'{loss.item():.4f}'})
    
    # Calculate accuracy
    all_preds = np.vstack(all_preds)
    all_labels = np.vstack(all_labels)
    accuracy = (all_preds == all_labels).mean()
    
    return running_loss / len(dataloader), accuracy

print("Training functions defined!")

Training functions defined!


In [8]:
# # Test data loading first
# print("Testing data loading...")
# try:
#     test_batch = next(iter(train_loader))
#     print(f"✓ Successfully loaded test batch: {test_batch[0].shape}, {test_batch[1].shape}")
# except Exception as e:
#     print(f"✗ Error loading data: {e}")
#     import traceback
#     traceback.print_exc()
#     raise

# # Main training loop
# print(f"\n{'='*60}")
# print(f"Starting Training: {BACKBONE}")
# print(f"{'='*60}\n")

# best_val_loss = float('inf')
# best_epoch = 0

# for epoch in range(NUM_EPOCHS):
#     print(f"\nEpoch {epoch+1}/{NUM_EPOCHS}")
#     print("-" * 60)
    
#     # Train
#     train_loss = train_epoch(model, train_loader, criterion, optimizer, device)
    
#     # Validate
#     val_loss, val_acc = validate_epoch(model, val_loader, criterion, device)
    
#     # Update learning rate
#     scheduler.step(val_loss)
    
#     # Store history
#     history['train_loss'].append(train_loss)
#     history['val_loss'].append(val_loss)
#     history['val_acc'].append(val_acc)
    
#     # Print metrics
#     print(f"\nEpoch {epoch+1} Summary:")
#     print(f"  Train Loss: {train_loss:.4f}")
#     print(f"  Val Loss: {val_loss:.4f}")
#     print(f"  Val Accuracy: {val_acc:.4f}")
#     print(f"  Learning Rate: {optimizer.param_groups[0]['lr']:.6f}")
    
#     # Save best model
#     if val_loss < best_val_loss:
#         best_val_loss = val_loss
#         best_epoch = epoch + 1
#         model_path = os.path.join(MODEL_SAVE_DIR, f'{BACKBONE}_best.pth')
#         torch.save({
#             'epoch': epoch,
#             'model_state_dict': model.state_dict(),
#             'optimizer_state_dict': optimizer.state_dict(),
#             'val_loss': val_loss,
#             'val_acc': val_acc,
#         }, model_path)
#         print(f"  ✓ Best model saved! (Val Loss: {val_loss:.4f})")

# print(f"\n{'='*60}")
# print(f"Training Complete!")
# print(f"Best Epoch: {best_epoch}")
# print(f"Best Val Loss: {best_val_loss:.4f}")
# print(f"{'='*60}\n")

## Load Best Model and Evaluate

In [9]:
# Safe and compatible checkpoint loading for PyTorch >= 2.6
import os
import torch
from torch.serialization import add_safe_globals, safe_globals

print("Loading best model for evaluation...")
model_path = os.path.join(MODEL_SAVE_DIR, f"{BACKBONE}_best.pth")

# 1) Prefer safe loading with weights_only=True, but allow numpy scalar class
add_safe_globals(["numpy._core.multiarray.scalar"])  # allowlisted for trusted checkpoint

checkpoint = None
try:
    # Use the safe globals context for extra safety
    with safe_globals(["numpy._core.multiarray.scalar"]):
        checkpoint = torch.load(model_path, map_location=device, weights_only=True)
    print("Checkpoint loaded with weights_only=True.")
except Exception as e:
    print(f"Safe weights-only load failed: {e}")
    print("Falling back to weights_only=False (only do this if the checkpoint is trusted).")
    # 2) Fallback for older checkpoints saved with arbitrary pickled objects
    checkpoint = torch.load(model_path, map_location=device, weights_only=False)
    print("Checkpoint loaded with weights_only=False.")

# Support both plain state_dict and wrapped dict formats
state_dict = checkpoint
if isinstance(checkpoint, dict) and "model_state_dict" in checkpoint:
    state_dict = checkpoint["model_state_dict"]

model.load_state_dict(state_dict)
model.eval()
print("Model ready for evaluation.")

Loading best model for evaluation...
Safe weights-only load failed: 'str' object has no attribute '__module__'
Falling back to weights_only=False (only do this if the checkpoint is trusted).
Checkpoint loaded with weights_only=False.
Model ready for evaluation.
Checkpoint loaded with weights_only=False.
Model ready for evaluation.


## Per-Attribute Metrics Calculation

In [10]:
# Get predictions on validation set
print("\nCalculating per-attribute metrics...")
all_preds = []
all_probs = []
all_labels = []

with torch.no_grad():
    for images, labels in tqdm(val_loader, desc='Predicting'):
        images = images.to(device)
        outputs = model(images)
        
        probs = torch.sigmoid(outputs)
        preds = probs > 0.5
        
        all_probs.append(probs.cpu().numpy())
        all_preds.append(preds.cpu().numpy())
        all_labels.append(labels.cpu().numpy())

# Concatenate all batches
all_probs = np.vstack(all_probs)
all_preds = np.vstack(all_preds)
all_labels = np.vstack(all_labels)

print(f"Predictions shape: {all_preds.shape}")
print(f"Labels shape: {all_labels.shape}")


Calculating per-attribute metrics...


Predicting: 100%|██████████| 603/603 [00:36<00:00, 16.57it/s]

Predictions shape: (6029, 33)
Labels shape: (6029, 33)





In [11]:
# Calculate metrics for each attribute
attribute_names = train_dataset.attributes
results = []

print("\nCalculating metrics for each attribute...")
for i, attr_name in enumerate(tqdm(attribute_names, desc='Processing attributes')):
    y_true = all_labels[:, i]
    y_pred = all_preds[:, i]
    y_prob = all_probs[:, i]
    
    # Skip if only one class present
    if len(np.unique(y_true)) < 2:
        print(f"Warning: Attribute '{attr_name}' has only one class in validation set")
        continue
    
    # Calculate metrics
    try:
        auc = roc_auc_score(y_true, y_prob)
    except:
        auc = 0.0
    
    accuracy = accuracy_score(y_true, y_pred)
    
    # F1 scores
    macro_f1 = f1_score(y_true, y_pred, average='macro', zero_division=0)
    micro_f1 = f1_score(y_true, y_pred, average='micro', zero_division=0)
    
    # Precision and Recall
    precision = precision_score(y_true, y_pred, average='binary', zero_division=0)
    recall = recall_score(y_true, y_pred, average='binary', zero_division=0)
    
    results.append({
        'attribute': attr_name,
        'auc': auc,
        'accuracy': accuracy,
        'macro_f1': macro_f1,
        'micro_f1': micro_f1,
        'precision': precision,
        'recall': recall
    })

# Create results DataFrame
results_df = pd.DataFrame(results)

# Calculate mean metrics
mean_metrics = results_df[['auc', 'accuracy', 'macro_f1', 'micro_f1', 'precision', 'recall']].mean()

print("\n" + "="*60)
print("OVERALL METRICS (Mean across all attributes)")
print("="*60)
for metric, value in mean_metrics.items():
    print(f"{metric.upper():20s}: {value:.4f}")
print("="*60)


Calculating metrics for each attribute...


Processing attributes: 100%|██████████| 33/33 [00:00<00:00, 152.09it/s]


OVERALL METRICS (Mean across all attributes)
AUC                 : 0.9652
ACCURACY            : 0.9379
MACRO_F1            : 0.8720
MICRO_F1            : 0.9379
PRECISION           : 0.8231
RECALL              : 0.8128





## Save Results

In [12]:
# Save results CSV
csv_filename = f'{BACKBONE}_metrics.csv'
csv_path = os.path.join(RESULTS_DIR, csv_filename)
results_df.to_csv(csv_path, index=False)

print(f"\n✓ Results saved to: {csv_path}")
print(f"\nResults preview:")
print(results_df.head(10))

# Sort by F1 score and display top/bottom performing attributes
print("\n" + "="*60)
print("TOP 10 ATTRIBUTES (by Macro F1)")
print("="*60)
top_attrs = results_df.nlargest(10, 'macro_f1')[['attribute', 'macro_f1', 'accuracy', 'auc']]
print(top_attrs.to_string(index=False))

print("\n" + "="*60)
print("BOTTOM 10 ATTRIBUTES (by Macro F1)")
print("="*60)
bottom_attrs = results_df.nsmallest(10, 'macro_f1')[['attribute', 'macro_f1', 'accuracy', 'auc']]
print(bottom_attrs.to_string(index=False))


✓ Results saved to: ./Results\efficientnet_b0_metrics.csv

Results preview:
            attribute       auc  accuracy  macro_f1  micro_f1  precision  \
0          attractive  0.954082  0.884060  0.884060  0.884060   0.874137   
1        blurry_image  0.974979  0.967822  0.856349  0.967822   0.696809   
2       sharp_jawline  0.908104  0.849726  0.812892  0.849726   0.733373   
3     high_cheekbones  0.983949  0.937303  0.931642  0.937303   0.906481   
4             smiling  0.963976  0.899320  0.899124  0.899320   0.902977   
5                bald  0.981734  0.977774  0.878646  0.977774   0.753378   
6  receeding_hairline  0.962267  0.957373  0.804328  0.957373   0.658683   
7           long_hair  0.984594  0.941450  0.934948  0.941450   0.904945   
8          curly_hair  0.942071  0.991707  0.805596  0.991707   0.677966   
9           grey_hair  0.991416  0.978769  0.905466  0.978769   0.804348   

     recall  
0  0.893817  
1  0.766082  
2  0.726409  
3  0.917526  
4  0.886520  
5 

## Training Summary

In [13]:
# Final summary
print("\n" + "="*60)
print("TRAINING COMPLETE - SUMMARY")
print("="*60)
print(f"Backbone Model: {BACKBONE}")
print(f"Total Parameters: {total_params:,}")
print(f"Training Samples: {len(train_dataset)}")
print(f"Validation Samples: {len(val_dataset)}")
print(f"Number of Attributes: {num_attributes}")
print(f"Number of Epochs: {NUM_EPOCHS}")
print(f"Best Epoch: {3}")
print(f"\nModel saved at: {model_path}")
print(f"Results saved at: {csv_path}")
print("\n" + "="*60)
print("MEAN METRICS")
print("="*60)
for metric, value in mean_metrics.items():
    print(f"{metric.upper():20s}: {value:.4f}")
print("="*60)

# Display attribute distribution
print("\n" + "="*60)
print("ATTRIBUTE STATISTICS")
print("="*60)
print(f"Total attributes evaluated: {len(results_df)}")
print(f"\nMetrics Range:")
print(f"  AUC: {results_df['auc'].min():.4f} - {results_df['auc'].max():.4f}")
print(f"  Accuracy: {results_df['accuracy'].min():.4f} - {results_df['accuracy'].max():.4f}")
print(f"  Macro F1: {results_df['macro_f1'].min():.4f} - {results_df['macro_f1'].max():.4f}")
print("="*60)


TRAINING COMPLETE - SUMMARY
Backbone Model: efficientnet_b0
Total Parameters: 4,680,349
Training Samples: 46897
Validation Samples: 6029
Number of Attributes: 33
Number of Epochs: 3
Best Epoch: 3

Model saved at: ./Models\efficientnet_b0_best.pth
Results saved at: ./Results\efficientnet_b0_metrics.csv

MEAN METRICS
AUC                 : 0.9652
ACCURACY            : 0.9379
MACRO_F1            : 0.8720
MICRO_F1            : 0.9379
PRECISION           : 0.8231
RECALL              : 0.8128

ATTRIBUTE STATISTICS
Total attributes evaluated: 33

Metrics Range:
  AUC: 0.9017 - 0.9967
  Accuracy: 0.8295 - 0.9917
  Macro F1: 0.7385 - 0.9768
