In [3]:
import torch
import psutil
import os
import sys
sys.path.append(os.path.abspath(".."))
import torch
from models.change_classifier import ChangeClassifier

In [6]:
def calculate_memory_correct(batch_size=24, image_size=256):
    """
    CORRECT memory calculation including EfficientNet backbone
    """
    print("=== CORRECT MEMORY CALCULATION ===")
    
    # 1. Get TOTAL parameters (EfficientNet-B4 + your layers)
    # EfficientNet-B4: ~19M parameters
    # Your custom layers: ~285K parameters
    efficientnet_params = 19_000_000
    custom_params = 285_803
    total_params = efficientnet_params + custom_params
    
    print(f"EfficientNet-B4 parameters: {efficientnet_params:,}")
    print(f"Your custom layers: {custom_params:,}")
    print(f"TOTAL parameters: {total_params:,}")
    
    # 2. Calculate memory components
    bytes_per_float = 4  # float32
    
    # Model parameters memory
    params_memory_mb = (total_params * bytes_per_float) / (1024 * 1024)
    
    # Input data memory
    single_image_memory = (image_size * image_size * 3 * bytes_per_float) / (1024 * 1024)
    single_mask_memory = (image_size * image_size * 1 * bytes_per_float) / (1024 * 1024)
    single_sample_memory = (2 * single_image_memory) + single_mask_memory
    input_data_memory_mb = batch_size * single_sample_memory
    
    # Gradients memory
    gradients_memory_mb = params_memory_mb
    
    # Optimizer states (FAdam uses 2x parameters)
    optimizer_memory_mb = 2 * params_memory_mb
    
    # 3. EFFICIENTNET ACTIVATIONS MEMORY (THIS IS THE KILLER!)
    # EfficientNet-B4 feature maps at different resolutions:
    # These are approximate sizes for batch size 1:
    level1_memory = (128 * 128 * 48 * bytes_per_float) / (1024 * 1024)   # 3.0 MB
    level2_memory = (64 * 64 * 64 * bytes_per_float) / (1024 * 1024)     # 1.0 MB  
    level3_memory = (32 * 32 * 112 * bytes_per_float) / (1024 * 1024)    # 0.44 MB
    level4_memory = (16 * 16 * 192 * bytes_per_float) / (1024 * 1024)    # 0.19 MB
    level5_memory = (8 * 8 * 320 * bytes_per_float) / (1024 * 1024)      # 0.08 MB
    
    # Total activations per sample ≈ 5 MB
    # For batch size 24: 24 × 5 MB = 120 MB
    # But wait! During backward pass, we need to store ALL intermediate activations!
    # PyTorch keeps them for gradient calculation
    
    # REAL activations memory: 3-4x more due to:
    # - Intermediate computations
    # - Feature maps at all levels
    # - Gradient computation requirements
    activations_per_sample = 15 * 1024 * 1024  # ~15 MB per sample (conservative)
    activations_memory_mb = batch_size * activations_per_sample / (1024 * 1024)
    
    # 4. Total memory usage
    total_memory_mb = (
        params_memory_mb +
        input_data_memory_mb +
        activations_memory_mb +
        gradients_memory_mb +
        optimizer_memory_mb
    )
    
    # 5. Print results
    print(f"\n--- CORRECT Memory Breakdown for Batch Size {batch_size} ---")
    print(f"Model parameters: {params_memory_mb:.2f} MB")
    print(f"Input data: {input_data_memory_mb:.2f} MB")
    print(f"Activations (EfficientNet): {activations_memory_mb:.2f} MB")
    print(f"Gradients: {gradients_memory_mb:.2f} MB")
    print(f"Optimizer states: {optimizer_memory_mb:.2f} MB")
    print(f"TOTAL ESTIMATED: {total_memory_mb:.2f} MB")
    print(f"TOTAL ESTIMATED GB: {total_memory_mb/1024:.2f} GB")
    
    return total_memory_mb

# Run the correct calculation
if __name__ == "__main__":
    calculate_memory_correct(batch_size=24, image_size=256)

=== CORRECT MEMORY CALCULATION ===
EfficientNet-B4 parameters: 19,000,000
Your custom layers: 285,803
TOTAL parameters: 19,285,803

--- CORRECT Memory Breakdown for Batch Size 24 ---
Model parameters: 73.57 MB
Input data: 42.00 MB
Activations (EfficientNet): 360.00 MB
Gradients: 73.57 MB
Optimizer states: 147.14 MB
TOTAL ESTIMATED: 696.28 MB
TOTAL ESTIMATED GB: 0.68 GB
