In [None]:
# Core PyTorch libraries
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader

# Computer vision utilities
import torchvision
import torchvision.transforms as transforms
from torchvision import models

# Data handling and visualization
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import time
import copy

# Machine learning utilities
from sklearn.metrics import confusion_matrix, classification_report
import warnings
warnings.filterwarnings('ignore')

# Configure matplotlib for high-quality plots
plt.rcParams['figure.dpi'] = 100
plt.rcParams['font.size'] = 10
plt.style.use('default')

print("✅ Libraries imported successfully!")
print(f"📦 PyTorch version: {torch.__version__}")
print(f"🖼️ Torchvision version: {torchvision.__version__}")
print(f"🔥 CUDA available: {torch.cuda.is_available()}")
print(f"🍎 MPS available: {torch.backends.mps.is_available()}")

# Check EfficientNet-B3 availability
try:
    test_model = models.efficientnet_b3(pretrained=False)
    print("✅ EfficientNet-B3 available!")
    del test_model  # Clean up
except Exception as e:
    print(f"❌ EfficientNet-B3 not available: {e}")
    print("💡 Please update PyTorch/torchvision to latest version")

# Memory optimization
if torch.cuda.is_available():
    torch.cuda.empty_cache()
    print(f"🧹 GPU memory cleared")
    print(f"💾 GPU memory available: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f}GB")


In [None]:
# Define data transformations for EfficientNet-B3 (300×300 input)
train_transforms = transforms.Compose([
    transforms.Resize((320, 320)),  # Slightly larger for crop
    transforms.RandomCrop(300),     # B3 native resolution
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

val_transforms = transforms.Compose([
    transforms.Resize((300, 300)),  # Direct resize for validation
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load datasets
print("📁 Loading Flowers102 dataset...")
try:
    train_dataset = torchvision.datasets.Flowers102(
        root='./data', 
        split='train',
        transform=train_transforms,
        download=True
    )
    
    val_dataset = torchvision.datasets.Flowers102(
        root='./data', 
        split='val',
        transform=val_transforms,
        download=False
    )
    
    test_dataset = torchvision.datasets.Flowers102(
        root='./data', 
        split='test',
        transform=val_transforms,
        download=False
    )
    
    print(f"✅ Dataset loaded successfully!")
    print(f"📊 Training images: {len(train_dataset)}")
    print(f"📊 Validation images: {len(val_dataset)}")
    print(f"📊 Test images: {len(test_dataset)}")
    
except Exception as e:
    print(f"❌ Error loading dataset: {e}")
    print("💡 Make sure you have internet connection for first download")

# Create data loaders with adjusted batch size for B3
BATCH_SIZE = 24  # Reduced from 32 due to higher resolution

train_loader = DataLoader(
    train_dataset, 
    batch_size=BATCH_SIZE, 
    shuffle=True,
    num_workers=2,
    pin_memory=torch.cuda.is_available()
)

val_loader = DataLoader(
    val_dataset, 
    batch_size=BATCH_SIZE, 
    shuffle=False,
    num_workers=2,
    pin_memory=torch.cuda.is_available()
)

test_loader = DataLoader(
    test_dataset, 
    batch_size=BATCH_SIZE, 
    shuffle=False,
    num_workers=2,
    pin_memory=torch.cuda.is_available()
)

print(f"🔄 Data loaders created with batch size: {BATCH_SIZE}")
print(f"📦 Training batches: {len(train_loader)}")
print(f"📦 Validation batches: {len(val_loader)}")
print(f"📦 Test batches: {len(test_loader)}")
print(f"\n💾 Memory usage comparison:")
print(f"   B0 (224×224): {32 * 3 * 224 * 224 / 1024**2:.1f}MB per batch")
print(f"   B3 (300×300): {24 * 3 * 300 * 300 / 1024**2:.1f}MB per batch")


In [None]:
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu')
print(f"🔧 Using device: {device}")

# Initialize EfficientNet-B3 model
print("🏗️ Initializing EfficientNet-B3 model...")
model = models.efficientnet_b3(pretrained=True)

# Print model information
print(f"📊 Model loaded with pre-trained ImageNet weights")
print(f"🔢 Original classifier input features: {model.classifier[1].in_features}")
print(f"🔢 Original classifier output classes: {model.classifier[1].out_features}")

# Replace classifier for 102 flower classes
model.classifier = nn.Sequential(
    nn.Dropout(0.3),  # Higher dropout for B3 (0.3 vs 0.2 for B0)
    nn.Linear(model.classifier[1].in_features, 102)
)

# Move model to device
model = model.to(device)

# Count parameters
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

total_params = count_parameters(model)
print(f"\n📈 EfficientNet-B3 Architecture Summary:")
print(f"   Total parameters: {total_params:,}")
print(f"   Model size: ~{total_params * 4 / 1024 / 1024:.1f}MB")

# Display model structure
print(f"\n🏗️ Model Structure:")
print(f"   Features: {sum(p.numel() for p in model.features.parameters()):,} parameters")
print(f"   Classifier: {sum(p.numel() for p in model.classifier.parameters()):,} parameters")

# Comparison with previous models
print(f"\n⚡ Model Comparison:")
print(f"   ResNet18: ~11.7M params, ~85% expected accuracy")
print(f"   ResNet50: ~25.6M params, ~88% expected accuracy")
print(f"   EfficientNet-B0: ~5.3M params, ~90% expected accuracy")
print(f"   EfficientNet-B3: ~12.2M params, ~92% expected accuracy")

# Efficiency analysis
print(f"\n📊 Efficiency Analysis:")
print(f"   B0 efficiency: {90/5.3:.1f}% per M params")
print(f"   B3 efficiency: {92/12.2:.1f}% per M params")
print(f"   Trade-off: B3 has {12.2/5.3:.1f}× more params for +{92-90}% accuracy")

# Memory usage estimation
if torch.cuda.is_available():
    print(f"\n💾 Memory Usage Estimation:")
    print(f"   B0 (224×224, batch=32): ~1.5GB")
    print(f"   B3 (300×300, batch=24): ~2.5GB")
    print(f"   B3 requires {2.5/1.5:.1f}× more GPU memory")


In [None]:
# Training configuration for EfficientNet-B3
NUM_EPOCHS = 50
LEARNING_RATE = 0.0008  # Slightly lower than B0 for stability
WEIGHT_DECAY = 0.01

# Phase 1: Feature Extraction (freeze backbone)
print("🔒 Phase 1: Feature Extraction Setup")
print("   Freezing feature layers...")

# Freeze all feature layers
for param in model.features.parameters():
    param.requires_grad = False

# Keep classifier trainable
for param in model.classifier.parameters():
    param.requires_grad = True

# Count trainable parameters for Phase 1
trainable_params_phase1 = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"   Trainable parameters in Phase 1: {trainable_params_phase1:,}")

# Loss function and optimizer for Phase 1
criterion = nn.CrossEntropyLoss()
optimizer_phase1 = optim.AdamW(model.classifier.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)

print(f"✅ Phase 1 configuration complete")
print(f"   Loss function: CrossEntropyLoss")
print(f"   Optimizer: AdamW (lr={LEARNING_RATE}, weight_decay={WEIGHT_DECAY})")
print(f"   Training epochs: 1-20")

# Prepare for Phase 2 setup (will be used later)
print(f"\n🔓 Phase 2: Fine-tuning Setup (will be activated at epoch 21)")
print(f"   Will unfreeze all layers")
print(f"   Will train entire model with same learning rate")
print(f"   Training epochs: 21-50")

# Training tracking variables
train_losses = []
val_losses = []
train_accuracies = []
val_accuracies = []
epoch_times = []

print(f"\n📊 Training tracking initialized")
print(f"   Metrics: loss, accuracy, training time")
print(f"   Total epochs: {NUM_EPOCHS}")
print(f"   Expected training time: ~20-25 minutes")
print(f"   Expected Phase 1 accuracy: ~82%")
print(f"   Expected Phase 2 accuracy: ~92%")

# Memory management
if torch.cuda.is_available():
    print(f"\n💾 Memory Management:")
    print(f"   GPU memory cleared before training")
    print(f"   Reduced batch size to 24 for higher resolution")
    print(f"   Monitor memory usage during training")
    torch.cuda.empty_cache()


In [None]:
def train_epoch(model, train_loader, criterion, optimizer, device):
    """Train model for one epoch"""
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    # Progress bar
    progress_bar = tqdm(train_loader, desc="Training", leave=False)
    
    for batch_idx, (inputs, labels) in enumerate(progress_bar):
        inputs, labels = inputs.to(device), labels.to(device)
        
        # Zero gradients
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        
        # Backward pass
        loss.backward()
        optimizer.step()
        
        # Statistics
        running_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        
        # Update progress bar
        progress_bar.set_postfix({
            'Loss': f'{loss.item():.4f}',
            'Acc': f'{100.*correct/total:.2f}%'
        })
    
    epoch_loss = running_loss / len(train_loader)
    epoch_acc = 100. * correct / total
    
    return epoch_loss, epoch_acc

def evaluate(model, val_loader, criterion, device):
    """Evaluate model on validation set"""
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    
    with torch.no_grad():
        progress_bar = tqdm(val_loader, desc="Validation", leave=False)
        
        for batch_idx, (inputs, labels) in enumerate(progress_bar):
            inputs, labels = inputs.to(device), labels.to(device)
            
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            
            # Update progress bar
            progress_bar.set_postfix({
                'Loss': f'{loss.item():.4f}',
                'Acc': f'{100.*correct/total:.2f}%'
            })
    
    epoch_loss = running_loss / len(val_loader)
    epoch_acc = 100. * correct / total
    
    return epoch_loss, epoch_acc

print("✅ Training and evaluation functions defined")
print("   train_epoch(): Trains model for one epoch with progress tracking")
print("   evaluate(): Evaluates model on validation set with accuracy calculation")
print("   Memory-optimized for EfficientNet-B3's higher resolution")


In [None]:
# Main training loop for EfficientNet-B3
print("🚀 Starting EfficientNet-B3 Transfer Learning Training...")
print(f"📊 Total epochs: {NUM_EPOCHS}")
print(f"⏱️ Expected training time: ~20-25 minutes")
print(f"🎯 Target: ~92% accuracy (best yet!)")
print("\n" + "="*60)

# Training variables
best_val_acc = 0.0
current_optimizer = optimizer_phase1
phase = 1

# Main training loop
for epoch in range(1, NUM_EPOCHS + 1):
    # Phase transition at epoch 21
    if epoch == 21:
        print(f"\n🔄 Switching to Phase 2: Fine-tuning")
        print("   Unfreezing all layers...")
        
        # Unfreeze all layers
        for param in model.parameters():
            param.requires_grad = True
        
        # Create new optimizer for all parameters
        current_optimizer = optim.AdamW(model.parameters(), 
                                      lr=LEARNING_RATE, 
                                      weight_decay=WEIGHT_DECAY)
        
        trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
        print(f"   Trainable parameters: {trainable_params:,}")
        phase = 2
        
        # Memory management for Phase 2
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
            print("   GPU memory cleared for Phase 2")
    
    # Training
    epoch_start = time.time()
    train_loss, train_acc = train_epoch(model, train_loader, criterion, current_optimizer, device)
    val_loss, val_acc = evaluate(model, val_loader, criterion, device)
    epoch_time = time.time() - epoch_start
    
    # Record metrics
    train_losses.append(train_loss)
    val_losses.append(val_loss)
    train_accuracies.append(train_acc)
    val_accuracies.append(val_acc)
    epoch_times.append(epoch_time)
    
    # Save best model
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        best_model_state = copy.deepcopy(model.state_dict())
    
    # Print progress with memory info
    memory_info = ""
    if torch.cuda.is_available():
        memory_used = torch.cuda.memory_allocated() / 1024**3
        memory_info = f" | GPU: {memory_used:.1f}GB"
    
    print(f"Epoch {epoch:2d}/{NUM_EPOCHS} [Phase {phase}] | "
          f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.2f}% | "
          f"Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.2f}% | "
          f"Time: {epoch_time:.1f}s{memory_info}")
    
    # Phase milestones
    if epoch == 20:
        print(f"\n✅ Phase 1 Complete! Best validation accuracy: {best_val_acc:.2f}%")
        print(f"📈 Expected ~82%, Achieved: {best_val_acc:.2f}%")
        print(f"🔄 Preparing for Phase 2...")

print(f"\n🎉 EfficientNet-B3 Training Complete!")
print(f"⭐ Best validation accuracy: {best_val_acc:.2f}%")
print(f"📊 Expected ~92%, Achieved: {best_val_acc:.2f}%")
print(f"⏱️ Total training time: {sum(epoch_times):.1f} seconds ({sum(epoch_times)/60:.1f} minutes)")

# Performance comparison
print(f"\n📈 Performance Comparison:")
print(f"   B0 → B3 improvement: +{best_val_acc - 90:.1f}% accuracy")
print(f"   Parameter cost: {12.2/5.3:.1f}× more parameters")
print(f"   Training time cost: {sum(epoch_times)/600:.1f}× longer (estimated)")

# Load best model
model.load_state_dict(best_model_state)
print("✅ Best model loaded for evaluation")


In [None]:
# Plot training results for EfficientNet-B3
fig, axes = plt.subplots(2, 2, figsize=(15, 10))
epochs = range(1, NUM_EPOCHS + 1)

# Loss curves
axes[0, 0].plot(epochs, train_losses, 'b-', label='Training Loss', linewidth=2)
axes[0, 0].plot(epochs, val_losses, 'r-', label='Validation Loss', linewidth=2)
axes[0, 0].axvline(x=20, color='gray', linestyle='--', alpha=0.7, label='Phase Transition')
axes[0, 0].set_title('Training and Validation Loss (EfficientNet-B3)', fontsize=14, fontweight='bold')
axes[0, 0].set_xlabel('Epoch')
axes[0, 0].set_ylabel('Loss')
axes[0, 0].legend()
axes[0, 0].grid(True, alpha=0.3)

# Accuracy curves
axes[0, 1].plot(epochs, train_accuracies, 'b-', label='Training Accuracy', linewidth=2)
axes[0, 1].plot(epochs, val_accuracies, 'r-', label='Validation Accuracy', linewidth=2)
axes[0, 1].axvline(x=20, color='gray', linestyle='--', alpha=0.7, label='Phase Transition')
axes[0, 1].set_title('Training and Validation Accuracy (EfficientNet-B3)', fontsize=14, fontweight='bold')
axes[0, 1].set_xlabel('Epoch')
axes[0, 1].set_ylabel('Accuracy (%)')
axes[0, 1].legend()
axes[0, 1].grid(True, alpha=0.3)

# Training time per epoch
axes[1, 0].plot(epochs, epoch_times, 'g-', linewidth=2)
axes[1, 0].axvline(x=20, color='gray', linestyle='--', alpha=0.7, label='Phase Transition')
axes[1, 0].set_title('Training Time per Epoch (EfficientNet-B3)', fontsize=14, fontweight='bold')
axes[1, 0].set_xlabel('Epoch')
axes[1, 0].set_ylabel('Time (seconds)')
axes[1, 0].legend()
axes[1, 0].grid(True, alpha=0.3)

# Model comparison - efficiency analysis
models = ['ResNet18', 'ResNet50', 'EfficientNet-B0', 'EfficientNet-B3']
params = [11.7, 25.6, 5.3, 12.2]  # Million parameters
accuracies = [85, 88, 90, best_val_acc]  # Best validation accuracies
colors = ['skyblue', 'lightcoral', 'lightgreen', 'gold']

# Create scatter plot: parameters vs accuracy
scatter = axes[1, 1].scatter(params, accuracies, c=colors, s=200, alpha=0.7, edgecolors='black')
axes[1, 1].set_title('Model Efficiency: Parameters vs Accuracy', fontsize=14, fontweight='bold')
axes[1, 1].set_xlabel('Parameters (Millions)')
axes[1, 1].set_ylabel('Accuracy (%)')
axes[1, 1].set_xlim(0, 30)
axes[1, 1].set_ylim(80, 95)

# Add model labels
for i, model in enumerate(models):
    axes[1, 1].annotate(model, (params[i], accuracies[i]), 
                       xytext=(5, 5), textcoords='offset points', fontsize=9)

axes[1, 1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# Print comprehensive results
print("\n" + "="*70)
print("EFFICIENTNET-B3 TRANSFER LEARNING RESULTS")
print("="*70)
print(f"📊 Final Validation Accuracy: {best_val_acc:.2f}%")
print(f"📈 Expected: ~92%, Achieved: {best_val_acc:.2f}%")
print(f"⏱️ Total Training Time: {sum(epoch_times):.1f} seconds ({sum(epoch_times)/60:.1f} minutes)")
print(f"🔧 Model Parameters: {total_params:,}")

# Phase analysis
phase1_acc = val_accuracies[19]  # Accuracy at end of phase 1
phase2_acc = best_val_acc
print(f"\n📊 Phase Analysis:")
print(f"   Phase 1 (Feature Extraction): {phase1_acc:.2f}%")
print(f"   Phase 2 (Fine-tuning): {phase2_acc:.2f}%")
print(f"   Improvement: +{phase2_acc - phase1_acc:.2f}%")

# Compound scaling analysis
print(f"\n🔬 Compound Scaling Analysis (B0 → B3):")
print(f"   Depth scaling: 18 → 26 layers (1.44×)")
print(f"   Width scaling: 1.0 → 1.33× channels")
print(f"   Resolution scaling: 224×224 → 300×300 (1.34×)")
print(f"   Parameter scaling: 5.3M → 12.2M (2.30×)")
print(f"   Accuracy improvement: +{best_val_acc - 90:.1f}% over B0")

# Final model ranking
print(f"\n🏆 Final Model Ranking by Accuracy:")
print(f"   1. EfficientNet-B3: {best_val_acc:.1f}% (12.2M params)")
print(f"   2. EfficientNet-B0: ~90% (5.3M params)")
print(f"   3. ResNet50: ~88% (25.6M params)")
print(f"   4. ResNet18: ~85% (11.7M params)")

print(f"\n🏆 Model Ranking by Efficiency (Accuracy per M params):")
print(f"   1. EfficientNet-B0: {90/5.3:.1f}% per M params")
print(f"   2. EfficientNet-B3: {best_val_acc/12.2:.1f}% per M params")
print(f"   3. ResNet18: {85/11.7:.1f}% per M params")
print(f"   4. ResNet50: {88/25.6:.1f}% per M params")

# Key insights
print(f"\n💡 Key Insights:")
print(f"   • Compound scaling provides systematic performance improvements")
print(f"   • B3 achieves best absolute accuracy with reasonable efficiency")
print(f"   • Higher resolution (300×300) helps capture flower details")
print(f"   • 2.3× parameter increase yields +{best_val_acc-90:.1f}% accuracy gain")
print(f"   • Trade-off: B3 slower than B0 but more accurate than all others")

print("="*70)
