# Memory-Efficient Binary Neural Network (BNN) for Plant Disease Classification - 300x300 Images

This notebook implements a Binary Neural Network using PyTorch for multiclass plant disease classification, optimized for handling 300x300 images on GPUs with limited memory.

## Memory Management Guide

This notebook has been optimized for running on GPUs with limited memory (under 4GB). The settings have been automatically adjusted to "aggressive" mode to prevent CUDA out-of-memory errors.

### Current Memory-Optimized Settings:
- Image resolution: 300x300 (higher resolution)
- Batch size: 2 (reduced for larger images)
- Hidden size: 256 (compact model architecture)
- Progressive dimensionality reduction (multiple embedding steps)
- Gradient accumulation: 12 steps (effective batch size of 24)
- Mixed precision training (FP16)
- Sample limiting: 50 per class (for faster training with high-res images)

### Memory Monitoring
You can monitor GPU memory usage by checking the output of the memory monitoring tools included at the beginning of the notebook.

# Binary Neural Network (BNN) for Plant Disease Classification

This notebook implements a Binary Neural Network using PyTorch for multiclass plant disease classification. The BNN uses binary weights and activations to reduce model size and computational requirements while maintaining reasonable accuracy.

## Features:
- Binary weights and activations using sign function
- Processes 300x300 RGB images (higher resolution)
- Progressive dimensionality reduction for memory efficiency
- Binary hidden layers with binary weights
- Batch normalization for improved stability
- Dropout for regularization
- Learning rate scheduling for better convergence
- Multiclass output with softmax activation
- Mixed precision training
- Gradient accumulation

In [None]:
# Import necessary libraries
import os
import time
import gc
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# PyTorch imports
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset, random_split, Subset
import torchvision
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder

# Check for CUDA availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
print(f"PyTorch version: {torch.__version__}")

# Set random seeds for reproducibility
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
    os.environ['PYTHONHASHSEED'] = str(seed)

set_seed()

# Memory configuration - optimized for large images (300x300)
memory_config = {
    'batch_size': 12,  # Smaller batch size for larger images
    'gradient_accumulation_steps': 4,  # Accumulate gradients over multiple batches
    'gc_frequency': 5,  # Garbage collection frequency
    'memory_efficient': True,  # Use memory-efficient techniques
    'use_mixed_precision': True  # Use mixed precision training if available
}

# Use memory configuration
batch_size = memory_config['batch_size']
gradient_accumulation_steps = memory_config['gradient_accumulation_steps'] 
gc_frequency = memory_config['gc_frequency']
memory_efficient = memory_config['memory_efficient']
use_mixed_precision = memory_config['use_mixed_precision']
memory_mode = 'High Memory Optimization (300x300)'

print(f"Memory Configuration Mode: {memory_mode}")
print(f"Batch Size: {batch_size}")
print(f"Gradient Accumulation Steps: {gradient_accumulation_steps}")
print(f"Effective Batch Size: {batch_size * gradient_accumulation_steps}")
print(f"GC Frequency: {gc_frequency}")
print(f"Memory Efficient: {memory_efficient}")
print(f"Mixed Precision: {use_mixed_precision}")

In [None]:
# Additional imports for enhanced visualization and data export
import pandas as pd
import seaborn as sns
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.metrics import precision_recall_fscore_support
import time
import datetime
import os

# Set style for better plots
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

# Create results directory
os.makedirs('results', exist_ok=True)
print("Results directory created: ./results/")

In [None]:
def evaluate_bnn_memory_efficient(model, test_loader, criterion, device, class_names, batch_limit=None, use_mixed_precision=False):
    """
    Memory-efficient evaluation function for BNN model
    
    Args:
        model: The trained BNN model
        test_loader: DataLoader for the test dataset
        criterion: Loss function
        device: Device to run evaluation on
        class_names: List of class names
        batch_limit: Limit the number of batches to evaluate (for debugging)
        use_mixed_precision: Whether to use mixed precision
        
    Returns:
        Dictionary with evaluation metrics
    """
    model.eval()
    
    # Set up metrics
    test_loss = 0.0
    correct = 0
    total = 0
    
    # Initialize confusion matrix
    num_classes = len(class_names)
    confusion_matrix = torch.zeros(num_classes, num_classes)
    
    # Prepare for per-class metrics
    class_correct = [0] * num_classes
    class_total = [0] * num_classes
    
    # Set up for precision, recall, F1
    true_positives = [0] * num_classes
    false_positives = [0] * num_classes
    false_negatives = [0] * num_classes
    
    # Store some sample images for visualization
    sample_images = []
    sample_labels = []
    sample_preds = []
    samples_collected = 0
    max_samples = 10  # Maximum number of samples to collect
    
    # For ROC curve
    all_targets = []
    all_probs = []
    
    # Check if mixed precision is available
    mixed_precision_available = use_mixed_precision and hasattr(torch, 'autocast')
    
    # Determine the appropriate autocast context manager based on PyTorch version
    if mixed_precision_available:
        try:
            from torch.cuda.amp import autocast
            # Check if the version supports device_type parameter
            import torch
            torch_version = torch.__version__
            supports_device_type = int(torch_version.split('.')[0]) >= 1 and int(torch_version.split('.')[1]) >= 10
            
            # Define context manager with appropriate parameters
            if supports_device_type:
                autocast_context = lambda: autocast(device_type=device.type)
            else:
                # Older PyTorch versions only support CUDA and don't need device_type
                autocast_context = lambda: autocast()
        except ImportError:
            try:
                from torch.amp import autocast
                autocast_context = lambda: autocast(device_type=device.type)
            except ImportError:
                mixed_precision_available = False
                import contextlib
                autocast_context = contextlib.nullcontext
    else:
        import contextlib
        autocast_context = contextlib.nullcontext

    # Track processing time
    start_time = time.time()
    
    # Disable gradient computation for evaluation
    with torch.no_grad():
        # Process batches
        for batch_idx, (data, targets) in enumerate(test_loader):
            # Respect batch limit if specified
            if batch_limit is not None and batch_idx >= batch_limit:
                print(f"Evaluating model on {batch_limit} batches (limited)...")
                break
                
            if batch_idx == 0:
                print(f"Evaluating model on {len(test_loader)} batches (all)...")
                
            # Move data to device
            data, targets = data.to(device), targets.to(device)
            
            # Use mixed precision if available
            if mixed_precision_available:
                with autocast_context():
                    outputs = model(data)
                    loss = criterion(outputs, targets)
            else:
                outputs = model(data)
                loss = criterion(outputs, targets)
                
            # Accumulate loss
            test_loss += loss.item()
            
            # Get predictions
            _, predicted = outputs.max(1)
            
            # Update metrics
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()
            
            # Store probabilities for ROC curve (using softmax)
            probs = F.softmax(outputs, dim=1)
            all_targets.extend(targets.cpu().numpy())
            all_probs.extend(probs.cpu().numpy())
            
            # Update confusion matrix
            for t, p in zip(targets.view(-1), predicted.view(-1)):
                confusion_matrix[t.long(), p.long()] += 1
                
            # Update per-class metrics
            for i in range(len(targets)):
                label = targets[i].item()
                pred = predicted[i].item()
                class_total[label] += 1
                if label == pred:
                    class_correct[label] += 1
                    
            # Update precision, recall metrics
            for c in range(num_classes):
                true_positives[c] += ((predicted == c) & (targets == c)).sum().item()
                false_positives[c] += ((predicted == c) & (targets != c)).sum().item()
                false_negatives[c] += ((predicted != c) & (targets == c)).sum().item()
                
            # Collect sample images for visualization
            if samples_collected < max_samples:
                # Get a few samples from this batch
                num_to_collect = min(max_samples - samples_collected, data.size(0))
                sample_images.extend(data[:num_to_collect].cpu())
                sample_labels.extend(targets[:num_to_collect].cpu().numpy())
                sample_preds.extend(predicted[:num_to_collect].cpu().numpy())
                samples_collected += num_to_collect
                
            # Clean up memory
            del data, targets, outputs, predicted
            
    # Compute average loss
    test_loss /= len(test_loader)
    
    # Compute accuracy
    accuracy = 100. * correct / total
    
    # Compute per-class accuracy
    class_accuracy = [100. * class_correct[i] / max(1, class_total[i]) for i in range(num_classes)]
    
    # Compute precision, recall, F1
    precision = [true_positives[i] / max(1, true_positives[i] + false_positives[i]) for i in range(num_classes)]
    recall = [true_positives[i] / max(1, true_positives[i] + false_negatives[i]) for i in range(num_classes)]
    f1_score = [2 * precision[i] * recall[i] / max(1e-6, precision[i] + recall[i]) for i in range(num_classes)]
    
    # Calculate macro averages
    macro_precision = sum(precision) / num_classes
    macro_recall = sum(recall) / num_classes
    macro_f1 = sum(f1_score) / num_classes
    
    # Convert confusion matrix to percentage
    confusion_percentage = confusion_matrix.diag() / confusion_matrix.sum(1)
    
    # Track total processing time
    eval_time = time.time() - start_time
    
    # Return a comprehensive metrics dictionary
    metrics = {
        'loss': test_loss,
        'accuracy': accuracy,
        'class_accuracy': dict(zip(class_names, class_accuracy)),
        'precision': dict(zip(class_names, precision)),
        'recall': dict(zip(class_names, recall)),
        'f1_score': dict(zip(class_names, f1_score)),
        'macro_precision': macro_precision,
        'macro_recall': macro_recall,
        'macro_f1': macro_f1,
        'confusion_matrix': confusion_matrix.cpu().numpy(),
        'confusion_percentage': confusion_percentage.cpu().numpy(),
        'class_distribution': dict(zip(class_names, class_total)),
        'evaluation_time': eval_time,
        'samples': {
            'images': sample_images,
            'true_labels': sample_labels,
            'predicted_labels': sample_preds,
        },
        'roc_data': {
            'targets': all_targets,
            'probs': all_probs,
        }
    }
    
    return metrics

In [None]:
# Memory-Efficient Training Function with Version-Compatible Autocast
def train_memory_efficient(model, train_loader, criterion, optimizer, num_epochs, device,
                          scheduler=None, gradient_accumulation_steps=1, memory_efficient=True,
                          gc_frequency=10, use_mixed_precision=False, 
                          early_stopping_patience=None):
    """
    Memory-efficient training function for BNN model
    
    Args:
        model: The BNN model
        train_loader: DataLoader for the training dataset
        criterion: Loss function
        optimizer: Optimizer for training
        num_epochs: Number of training epochs
        device: Device to train on (cpu or cuda)
        scheduler: Learning rate scheduler (optional)
        gradient_accumulation_steps: Number of steps to accumulate gradients
        memory_efficient: Whether to use memory efficiency techniques
        gc_frequency: How often to perform garbage collection
        use_mixed_precision: Whether to use mixed precision training
        early_stopping_patience: Patience for early stopping (optional)
        
    Returns:
        Dictionary with training metrics
    """
    # Training history
    history = []
    train_losses = []
    train_accuracies = []
    
    # For early stopping
    best_loss = float('inf')
    patience_counter = 0
    
    # For timing
    epoch_times = []
    
    # Mixed precision setup
    mixed_precision_available = use_mixed_precision and hasattr(torch, 'autocast')
    
    # Setup mixed precision tools if available
    if mixed_precision_available:
        try:
            from torch.cuda.amp import autocast, GradScaler
            scaler = GradScaler()
        except ImportError:
            try:
                from torch.amp import autocast, GradScaler
                scaler = GradScaler()
            except ImportError:
                mixed_precision_available = False
                scaler = None
    else:
        scaler = None

    # Determine the appropriate autocast context manager based on PyTorch version
    if mixed_precision_available:
        try:
            from torch.cuda.amp import autocast
            # Check if the version supports device_type parameter
            import torch
            torch_version = torch.__version__
            supports_device_type = int(torch_version.split('.')[0]) >= 1 and int(torch_version.split('.')[1]) >= 10
            
            # Define context manager with appropriate parameters
            if supports_device_type:
                autocast_context = lambda: autocast(device_type=device.type)
            else:
                # Older PyTorch versions only support CUDA and don't need device_type
                autocast_context = lambda: autocast()
        except ImportError:
            try:
                from torch.amp import autocast
                autocast_context = lambda: autocast(device_type=device.type)
            except ImportError:
                mixed_precision_available = False
                import contextlib
                autocast_context = contextlib.nullcontext
    else:
        import contextlib
        autocast_context = contextlib.nullcontext
    
    # Main training loop
    print(f"Starting training for {num_epochs} epochs with mixed precision: {mixed_precision_available}")
    
    for epoch in range(num_epochs):
        epoch_start_time = time.time()
        
        model.train()
        running_loss = 0.0
        running_corrects = 0
        total_samples = 0
        
        # Reset gradients at the start of each epoch for consistent behavior
        optimizer.zero_grad()
        
        # Process batches
        for batch_idx, (inputs, labels) in enumerate(train_loader):
            # Move data to device
            inputs, labels = inputs.to(device), labels.to(device)
            
            # Forward pass with mixed precision if available
            if mixed_precision_available:
                with autocast_context():
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)
                    
                    # Adjust loss for gradient accumulation
                    loss = loss / gradient_accumulation_steps
                
                # Backward pass with gradient scaling
                scaler.scale(loss).backward()
                
                # Step with gradient accumulation
                if (batch_idx + 1) % gradient_accumulation_steps == 0:
                    scaler.step(optimizer)
                    scaler.update()
                    optimizer.zero_grad()
            else:
                # Standard forward pass
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                
                # Adjust loss for gradient accumulation
                loss = loss / gradient_accumulation_steps
                
                # Standard backward pass
                loss.backward()
                
                # Step with gradient accumulation
                if (batch_idx + 1) % gradient_accumulation_steps == 0:
                    optimizer.step()
                    optimizer.zero_grad()
            
            # Calculate metrics
            _, preds = torch.max(outputs, 1)
            running_loss += loss.item() * gradient_accumulation_steps  # Rescale loss for reporting
            running_corrects += torch.sum(preds == labels.data).item()
            total_samples += labels.size(0)
            
            # Memory cleanup
            if memory_efficient and (batch_idx + 1) % gc_frequency == 0:
                del inputs, labels, outputs, preds, loss
                torch.cuda.empty_cache()
                gc.collect()
                
        # Make sure to step optimizer for the last batch if not divisible
        if mixed_precision_available and train_loader.__len__() % gradient_accumulation_steps != 0:
            scaler.step(optimizer)
            scaler.update()
            optimizer.zero_grad()
        elif not mixed_precision_available and train_loader.__len__() % gradient_accumulation_steps != 0:
            optimizer.step()
            optimizer.zero_grad()
            
        # Calculate epoch metrics
        epoch_loss = running_loss / len(train_loader)
        epoch_acc = running_corrects / total_samples * 100.0
        
        # Step scheduler if provided
        if scheduler is not None:
            if isinstance(scheduler, torch.optim.lr_scheduler.ReduceLROnPlateau):
                scheduler.step(epoch_loss)
            else:
                scheduler.step()
            
        # Record metrics
        train_losses.append(epoch_loss)
        train_accuracies.append(epoch_acc)
        
        # Record epoch time
        epoch_end_time = time.time()
        epoch_time = epoch_end_time - epoch_start_time
        epoch_times.append(epoch_time)
        
        # Print epoch results
        print(f'Epoch {epoch+1}/{num_epochs} - Loss: {epoch_loss:.4f}, Acc: {epoch_acc:.2f}%, Time: {epoch_time:.2f}s')
        
        # Save epoch history
        history.append({
            'epoch': epoch + 1,
            'loss': epoch_loss,
            'accuracy': epoch_acc,
            'time': epoch_time,
            'learning_rate': optimizer.param_groups[0]['lr']
        })
        
        # Memory cleanup at the end of epoch
        if memory_efficient:
            torch.cuda.empty_cache()
            gc.collect()
            
        # Early stopping check
        if early_stopping_patience is not None:
            if epoch_loss < best_loss:
                best_loss = epoch_loss
                patience_counter = 0
            else:
                patience_counter += 1
                
            if patience_counter >= early_stopping_patience:
                print(f'Early stopping at epoch {epoch+1}')
                break
                
    # Return training metrics
    training_summary = {
        'history': history,
        'train_losses': train_losses,
        'train_accuracies': train_accuracies,
        'epoch_times': epoch_times,
        'total_time': sum(epoch_times),
        'final_loss': train_losses[-1],
        'final_accuracy': train_accuracies[-1]
    }
    
    return training_summary

In [None]:
# Memory Monitoring and Optimization Utilities
import gc
import psutil

def print_gpu_memory_stats():
    """Print detailed GPU memory statistics"""
    if torch.cuda.is_available():
        print(f"GPU: {torch.cuda.get_device_name(0)}")
        allocated = torch.cuda.memory_allocated() / (1024 ** 3)
        reserved = torch.cuda.memory_reserved() / (1024 ** 3)
        max_allocated = torch.cuda.max_memory_allocated() / (1024 ** 3)
        
        print(f"Memory allocated: {allocated:.2f} GB")
        print(f"Memory reserved: {reserved:.2f} GB")
        print(f"Max memory allocated: {max_allocated:.2f} GB")
        
        if hasattr(torch.cuda, 'memory_summary'):
            print("\nMemory Summary:")
            print(torch.cuda.memory_summary(abbreviated=True))
    else:
        print("CUDA not available")

def print_system_memory():
    """Print system memory usage"""
    vm = psutil.virtual_memory()
    print(f"System memory: {vm.total / (1024**3):.1f} GB total, " 
          f"{vm.available / (1024**3):.1f} GB available, "
          f"{vm.percent}% used")

def optimize_memory(mode='aggressive'):
    """Apply memory optimization settings based on selected mode"""
    if mode == 'aggressive':
        # Most aggressive memory saving settings for 300x300 images
        os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True,max_split_size_mb:32'
        torch.backends.cudnn.benchmark = False
        torch.backends.cudnn.deterministic = True
        return {
            'image_size': 300,  # 300x300 for this notebook
            'batch_size': 2,  # Very small batch size for larger images
            'hidden_size': 256,
            'embedding_size': 512,
            'num_hidden_layers': 1,
            'gradient_accumulation': 12,
            'max_samples_per_class': 50  # Limit samples for 300x300 images
        }
    elif mode == 'moderate':
        # Balanced memory saving
        os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True,max_split_size_mb:64'
        torch.backends.cudnn.benchmark = True
        return {
            'image_size': 300,  # 300x300 for this notebook
            'batch_size': 3,
            'hidden_size': 320,
            'embedding_size': 640,
            'num_hidden_layers': 1,
            'gradient_accumulation': 8,
            'max_samples_per_class': 100
        }
    else:  # 'performance' mode
        # Optimized for performance, higher memory usage
        os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
        torch.backends.cudnn.benchmark = True
        return {
            'image_size': 300,  # 300x300 for this notebook
            'batch_size': 4,
            'hidden_size': 512,
            'embedding_size': 1024,
            'num_hidden_layers': 2,
            'gradient_accumulation': 4,
            'max_samples_per_class': None
        }

# Check current memory status
print("Initial memory status:")
print_system_memory()
print_gpu_memory_stats()

# Memory optimization mode - set this to 'performance', 'moderate', or 'aggressive'
memory_mode = 'aggressive'  # Using aggressive mode to avoid CUDA OOM errors
print(f"\nUsing {memory_mode} memory optimization settings")
memory_config = optimize_memory(memory_mode)
print(f"Recommended settings: {memory_config}")

# Create results directory
os.makedirs('results', exist_ok=True)
print("\nResults directory created: ./results/")