In [1]:
# ============================================================================
# Cell 1: Setup
# ============================================================================
import sys
import os
project_root = os.path.abspath('..')
sys.path.insert(0, project_root)

import torch
import gc
from src.models import LeNet5, ResNet50, DenseNet121, VisionTransformer

print("‚úÖ Imports successful!")

# Check GPU
print("\n" + "=" * 70)
print("üîç DEVICE INFORMATION")
print("=" * 70)
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")

if torch.cuda.is_available():
    print(f"CUDA version: {torch.version.cuda}")
    print(f"GPU count: {torch.cuda.device_count()}")
    print(f"GPU name: {torch.cuda.get_device_name(0)}")
    print(f"GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")
else:
    print("‚ö†Ô∏è  Running on CPU")

print("=" * 70)

‚úÖ Imports successful!

üîç DEVICE INFORMATION
PyTorch version: 2.8.0+cu129
CUDA available: True
CUDA version: 12.9
GPU count: 2
GPU name: NVIDIA GeForce RTX 5070 Ti
GPU memory: 15.9 GB


In [2]:
# ============================================================================
# Cell 2: Helper functions
# ============================================================================
def test_model(model_class, model_name, num_classes=4, batch_size=2):
    """
    Test a model with dummy input
    
    Args:
        model_class: Model class to test
        model_name: Name for display
        num_classes: Number of output classes
        batch_size: Batch size for testing
    """
    print("\n" + "=" * 70)
    print(f"{model_name}")
    print("=" * 70)
    
    # Create model
    if 'pretrained' in model_class.__init__.__code__.co_varnames:
        model = model_class(num_classes=num_classes, pretrained=False)
    else:
        model = model_class(num_classes=num_classes)
    
    # Test input
    dummy_input = torch.randn(batch_size, 3, 224, 224)
    
    # CPU forward pass
    print("\nüìä Model Info:")
    try:
        with torch.no_grad():
            output = model(dummy_input)
        
        total_params = sum(p.numel() for p in model.parameters())
        trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
        
        print(f"  Input shape:  {dummy_input.shape}")
        print(f"  Output shape: {output.shape}")
        print(f"  Total params: {total_params:,}")
        print(f"  Trainable:    {trainable_params:,}")
        print(f"  Model size:   ~{total_params * 4 / 1024**2:.1f} MB")
        
        print(f"\n‚úÖ CPU test passed")
    except Exception as e:
        print(f"\n‚ùå CPU test failed: {e}")
        return False
    
    # GPU test if available
    if torch.cuda.is_available():
        try:
            # Clear cache first
            torch.cuda.empty_cache()
            
            # Move to GPU
            model_gpu = model.cuda()
            input_gpu = dummy_input.cuda()
            
            # Warmup
            with torch.no_grad():
                _ = model_gpu(input_gpu)
            
            torch.cuda.synchronize()
            
            # Measure memory
            torch.cuda.reset_peak_memory_stats()
            
            with torch.no_grad():
                output_gpu = model_gpu(input_gpu)
            
            torch.cuda.synchronize()
            
            peak_memory = torch.cuda.max_memory_allocated(0) / 1024**2
            
            print(f"\nüî• GPU test:")
            print(f"  Peak memory: {peak_memory:.1f} MB")
            print(f"  Per sample:  {peak_memory / batch_size:.1f} MB")
            print(f"  ‚úÖ GPU test passed")
            
            # Cleanup
            del model_gpu, input_gpu, output_gpu
            torch.cuda.empty_cache()
            
        except RuntimeError as e:
            print(f"\n‚ö†Ô∏è  GPU test failed: {e}")
            if "out of memory" in str(e):
                print(f"   ‚Üí Try reducing batch size")
            torch.cuda.empty_cache()
    
    print("=" * 70)
    
    # Cleanup
    del model, dummy_input
    gc.collect()
    
    return True

print("‚úÖ Helper functions defined")

‚úÖ Helper functions defined


In [3]:
# ============================================================================
# Cell 3: Test LeNet-5
# ============================================================================
test_model(LeNet5, "1. LeNet-5 (1998)", num_classes=4, batch_size=4)


1. LeNet-5 (1998)

üìä Model Info:
  Input shape:  torch.Size([4, 3, 224, 224])
  Output shape: torch.Size([4, 4])
  Total params: 5,612,216
  Trainable:    5,612,216
  Model size:   ~21.4 MB

‚úÖ CPU test passed

üî• GPU test:
  Peak memory: 42.1 MB
  Per sample:  10.5 MB
  ‚úÖ GPU test passed


True

In [4]:
# ============================================================================
# Cell 4: Test ResNet-50
# ============================================================================
test_model(ResNet50, "2. ResNet-50 (2015)", num_classes=4, batch_size=4)


2. ResNet-50 (2015)

üìä Model Info:




  Input shape:  torch.Size([4, 3, 224, 224])
  Output shape: torch.Size([4, 4])
  Total params: 23,516,228
  Trainable:    23,516,228
  Model size:   ~89.7 MB

‚úÖ CPU test passed

üî• GPU test:
  Peak memory: 141.6 MB
  Per sample:  35.4 MB
  ‚úÖ GPU test passed


True

In [5]:
# ============================================================================
# Cell 5: Test DenseNet-121
# ============================================================================
test_model(DenseNet121, "3. DenseNet-121 (2017)", num_classes=4, batch_size=4)


3. DenseNet-121 (2017)

üìä Model Info:
  Input shape:  torch.Size([4, 3, 224, 224])
  Output shape: torch.Size([4, 4])
  Total params: 6,957,956
  Trainable:    6,957,956
  Model size:   ~26.5 MB

‚úÖ CPU test passed

üî• GPU test:
  Peak memory: 76.9 MB
  Per sample:  19.2 MB
  ‚úÖ GPU test passed


True

In [6]:
# ============================================================================
# Cell 6: Test Vision Transformer
# ============================================================================
# ViT requires smaller batch due to memory
test_model(VisionTransformer, "4. Vision Transformer (2020)", num_classes=4, batch_size=2)


4. Vision Transformer (2020)

üìä Model Info:
  Input shape:  torch.Size([2, 3, 224, 224])
  Output shape: torch.Size([2, 4])
  Total params: 85,801,732
  Trainable:    85,801,732
  Model size:   ~327.3 MB

‚úÖ CPU test passed

üî• GPU test:
  Peak memory: 355.3 MB
  Per sample:  177.6 MB
  ‚úÖ GPU test passed


True

In [7]:
# ============================================================================
# Cell 7: Model Comparison Summary
# ============================================================================
import pandas as pd

print("\n" + "=" * 70)
print("üìä MODEL COMPARISON SUMMARY")
print("=" * 70)

models_info = {
    'LeNet-5': (LeNet5, 1998, False),
    'ResNet-50': (ResNet50, 2015, True),
    'DenseNet-121': (DenseNet121, 2017, True),
    'ViT-Base': (VisionTransformer, 2020, True),
}

comparison_data = []

for name, (model_class, year, pretrained) in models_info.items():
    try:
        # Create model without pretrained weights for fair comparison
        if pretrained:
            model = model_class(num_classes=4, pretrained=False)
        else:
            model = model_class(num_classes=4)
        
        total_params = sum(p.numel() for p in model.parameters())
        model_size = total_params * 4 / 1024**2  # MB
        
        comparison_data.append({
            'Model': name,
            'Year': year,
            'Parameters': f"{total_params:,}",
            'Size (MB)': f"{model_size:.1f}",
            'Pretrained': '‚úÖ' if pretrained else '‚ùå'
        })
        
        del model
        gc.collect()
        
    except Exception as e:
        print(f"‚ö†Ô∏è  Error with {name}: {e}")

# Create DataFrame
df = pd.DataFrame(comparison_data)
print("\n" + df.to_string(index=False))

print("\n" + "=" * 70)


üìä MODEL COMPARISON SUMMARY

       Model  Year Parameters Size (MB) Pretrained
     LeNet-5  1998  5,612,216      21.4          ‚ùå
   ResNet-50  2015 23,516,228      89.7          ‚úÖ
DenseNet-121  2017  6,957,956      26.5          ‚úÖ
    ViT-Base  2020 85,801,732     327.3          ‚úÖ



In [8]:
# ============================================================================
# Cell 8: Architecture Visualization
# ============================================================================
print("\n" + "=" * 70)
print("üèóÔ∏è  MODEL ARCHITECTURES")
print("=" * 70)

print("\n1Ô∏è‚É£  LeNet-5:")
model = LeNet5(num_classes=4)
print(model)

print("\n" + "-" * 70)
print("\n2Ô∏è‚É£  ResNet-50 (simplified):")
print("""
ResNet50(
  (model): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=7, stride=2, padding=3)
    (bn1): BatchNorm2d(64)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1)
    (layer1): Bottleneck blocks x3
    (layer2): Bottleneck blocks x4
    (layer3): Bottleneck blocks x6
    (layer4): Bottleneck blocks x3
    (avgpool): AdaptiveAvgPool2d(output_size=(1, 1))
    (fc): Linear(in_features=2048, out_features=4)
  )
)
""")

print("-" * 70)
print("\n3Ô∏è‚É£  DenseNet-121 (simplified):")
print("""
DenseNet121(
  (model): DenseNet(
    (features): Sequential(
      (conv0): Conv2d(3, 64, kernel_size=7, stride=2, padding=3)
      (norm0): BatchNorm2d(64)
      (relu0): ReLU(inplace=True)
      (pool0): MaxPool2d(kernel_size=3, stride=2, padding=1)
      (denseblock1-4): Dense blocks with growth_rate=32
      (transition1-3): Transition layers
    )
    (classifier): Linear(in_features=1024, out_features=4)
  )
)
""")

print("-" * 70)
print("\n4Ô∏è‚É£  Vision Transformer (simplified):")
print("""
VisionTransformer(
  (model): VisionTransformer(
    (patch_embed): PatchEmbed(
      (proj): Conv2d(3, 768, kernel_size=16, stride=16)
    )
    (cls_token): Parameter[1, 1, 768]
    (pos_embed): Parameter[1, 197, 768]
    (blocks): ModuleList(
      (0-11): 12 x TransformerBlock(
        (attn): MultiheadAttention
        (mlp): MLP
        (norm1, norm2): LayerNorm
      )
    )
    (head): Linear(in_features=768, out_features=4)
  )
)
""")

print("=" * 70)

del model
gc.collect()


üèóÔ∏è  MODEL ARCHITECTURES

1Ô∏è‚É£  LeNet-5:
LeNet5(
  (features): Sequential(
    (0): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=46656, out_features=120, bias=True)
    (2): ReLU()
    (3): Linear(in_features=120, out_features=84, bias=True)
    (4): ReLU()
    (5): Linear(in_features=84, out_features=4, bias=True)
  )
)

----------------------------------------------------------------------

2Ô∏è‚É£  ResNet-50 (simplified):

ResNet50(
  (model): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=7, stride=2, padding=3)
    (bn1): BatchNorm2d(64)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_s

0

In [9]:
# ============================================================================
# Cell 9: Memory Requirements Estimation
# ============================================================================
print("\n" + "=" * 70)
print("üíæ MEMORY REQUIREMENTS ESTIMATION")
print("=" * 70)

batch_sizes = [8, 16, 32, 64]
models_to_test = {
    'LeNet-5': LeNet5,
    'ResNet-50': ResNet50,
    'DenseNet-121': DenseNet121,
    'ViT-Base': VisionTransformer
}

print("\nEstimated GPU memory per batch (MB):")
print(f"{'Model':<20} {'Batch=8':<12} {'Batch=16':<12} {'Batch=32':<12} {'Batch=64':<12}")
print("-" * 70)

for model_name, model_class in models_to_test.items():
    if torch.cuda.is_available():
        try:
            memory_usage = []
            
            for bs in batch_sizes:
                torch.cuda.empty_cache()
                torch.cuda.reset_peak_memory_stats()
                
                # Test with smaller batch if needed
                test_bs = min(bs, 4)
                
                if 'pretrained' in model_class.__init__.__code__.co_varnames:
                    model = model_class(num_classes=4, pretrained=False).cuda()
                else:
                    model = model_class(num_classes=4).cuda()
                
                dummy = torch.randn(test_bs, 3, 224, 224).cuda()
                
                with torch.no_grad():
                    _ = model(dummy)
                
                torch.cuda.synchronize()
                
                peak = torch.cuda.max_memory_allocated(0) / 1024**2
                # Extrapolate for actual batch size
                estimated = peak * (bs / test_bs)
                memory_usage.append(f"{estimated:.0f}")
                
                del model, dummy
                torch.cuda.empty_cache()
            
            print(f"{model_name:<20} {memory_usage[0]:<12} {memory_usage[1]:<12} {memory_usage[2]:<12} {memory_usage[3]:<12}")
            
        except Exception as e:
            print(f"{model_name:<20} Error: {str(e)[:40]}")
    else:
        print(f"{model_name:<20} GPU not available")

print("\n" + "=" * 70)
print("üí° Tips:")
print("  - Start with smaller batch sizes for larger models")
print("  - Use gradient accumulation if batch size is limited")
print("  - Monitor GPU memory during training")
print("=" * 70)


üíæ MEMORY REQUIREMENTS ESTIMATION

Estimated GPU memory per batch (MB):
Model                Batch=8      Batch=16     Batch=32     Batch=64    
----------------------------------------------------------------------
LeNet-5              84           168          336          673         




ResNet-50            284          569          1138         2276        
DenseNet-121         154          307          615          1230        
ViT-Base             732          1464         2928         5857        

üí° Tips:
  - Start with smaller batch sizes for larger models
  - Use gradient accumulation if batch size is limited
  - Monitor GPU memory during training


In [10]:
# ============================================================================
# Cell 10: Speed Benchmark (Optional)
# ============================================================================
import time

if torch.cuda.is_available():
    print("\n" + "=" * 70)
    print("‚ö° SPEED BENCHMARK")
    print("=" * 70)
    
    batch_size = 16
    num_iterations = 50
    
    print(f"\nSettings: batch_size={batch_size}, iterations={num_iterations}")
    print(f"\n{'Model':<20} {'Time/batch (ms)':<20} {'Throughput (img/s)':<20}")
    print("-" * 70)
    
    for model_name, model_class in models_to_test.items():
        try:
            torch.cuda.empty_cache()
            
            if 'pretrained' in model_class.__init__.__code__.co_varnames:
                model = model_class(num_classes=4, pretrained=False).cuda()
            else:
                model = model_class(num_classes=4).cuda()
            
            model.eval()
            dummy = torch.randn(batch_size, 3, 224, 224).cuda()
            
            # Warmup
            with torch.no_grad():
                for _ in range(10):
                    _ = model(dummy)
            
            torch.cuda.synchronize()
            
            # Benchmark
            start = time.time()
            with torch.no_grad():
                for _ in range(num_iterations):
                    _ = model(dummy)
            
            torch.cuda.synchronize()
            elapsed = time.time() - start
            
            time_per_batch = (elapsed / num_iterations) * 1000  # ms
            throughput = (batch_size * num_iterations) / elapsed  # img/s
            
            print(f"{model_name:<20} {time_per_batch:<20.2f} {throughput:<20.1f}")
            
            del model, dummy
            torch.cuda.empty_cache()
            
        except Exception as e:
            print(f"{model_name:<20} Error: {str(e)[:40]}")
    
    print("=" * 70)
else:
    print("\n‚ö†Ô∏è  GPU not available - skipping speed benchmark")


‚ö° SPEED BENCHMARK

Settings: batch_size=16, iterations=50

Model                Time/batch (ms)      Throughput (img/s)  
----------------------------------------------------------------------
LeNet-5              1.41                 11312.6             
ResNet-50            9.81                 1630.4              
DenseNet-121         16.42                974.1               
ViT-Base             29.95                534.2               


In [11]:
# ============================================================================
# Cell 11: Final Summary
# ============================================================================
print("\n" + "=" * 70)
print("‚úÖ MODEL TESTING COMPLETE")
print("=" * 70)

print("\nüìù Summary:")
print("  ‚úÖ All 4 models tested successfully")
print("  ‚úÖ CPU forward pass verified")

if torch.cuda.is_available():
    print("  ‚úÖ GPU compatibility verified")
    print(f"  ‚úÖ GPU: {torch.cuda.get_device_name(0)}")
else:
    print("  ‚ö†Ô∏è  GPU not available (CPU only)")

print("\nüéØ Next steps:")
print("  1. Run 02b_balance_data.ipynb (if not done)")
print("  2. Run 03_test_dataloader.ipynb to verify data pipeline")
print("  3. Start training with 05_train_lenet.ipynb")

print("\n" + "=" * 70)

# Final cleanup
gc.collect()
if torch.cuda.is_available():
    torch.cuda.empty_cache()
    
print("üßπ Memory cleaned up")


‚úÖ MODEL TESTING COMPLETE

üìù Summary:
  ‚úÖ All 4 models tested successfully
  ‚úÖ CPU forward pass verified
  ‚úÖ GPU compatibility verified
  ‚úÖ GPU: NVIDIA GeForce RTX 5070 Ti

üéØ Next steps:
  1. Run 02b_balance_data.ipynb (if not done)
  2. Run 03_test_dataloader.ipynb to verify data pipeline
  3. Start training with 05_train_lenet.ipynb

üßπ Memory cleaned up
