In [1]:
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, random_split
import torchvision
import torchvision.transforms as transforms
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import time
import json
from collections import defaultdict

In [2]:
class ResidualBlock(nn.Module):
    """Residual block component for building ResNet architecture"""
    expansion = 1
    
    def __init__(self, input_channels, output_channels, stride=1, downsample_layer=None):
        super(BasicBlock, self).__init__()
        
        # First convolution layer
        self.conv1 = nn.Conv2d(input_channels, output_channels, kernel_size=3, 
                              stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(output_channels)
        
        # Second convolution layer
        self.conv2 = nn.Conv2d(output_channels, output_channels, kernel_size=3,
                              stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(output_channels)
        
        self.relu = nn.ReLU(inplace=True)
        self.downsample_layer = downsample_layer
        
    def forward(self, x):
        residual_connection = x
        
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        
        out = self.conv2(out)
        out = self.bn2(out)
        
        if self.downsample_layer is not None:
            residual_connection = self.downsample_layer(x)
            
        out += residual_connection
        out = self.relu(out)
        
        return out


In [3]:
class CustomResNet(nn.Module):
    """Custom ResNet implementation from scratch for CIFAR-100 classification"""
    
    def __init__(self, residual_block, layer_config, num_classes=100, dropout_rate=0.0):
        super(CustomResNet, self).__init__()
        
        self.current_channels = 32
        self.dropout_rate = dropout_rate
        
        # Initial convolution layer - optimized for CIFAR-100 (32x32 images)
        self.initial_conv = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False)
        self.initial_bn = nn.BatchNorm2d(32)
        self.relu = nn.ReLU(inplace=True)
        
        # Residual layer groups
        self.conv2_group = self._build_residual_layer(residual_block, 32, layer_config[0], stride=1)   # conv2_x
        self.conv3_group = self._build_residual_layer(residual_block, 64, layer_config[1], stride=2)   # conv3_x
        self.conv4_group = self._build_residual_layer(residual_block, 128, layer_config[2], stride=2)  # conv4_x
        self.conv5_group = self._build_residual_layer(residual_block, 256, layer_config[3], stride=2)  # conv5_x
        
        # Global average pooling and final classifier
        self.global_avg_pool = nn.AdaptiveAvgPool2d((1, 1))
        if dropout_rate > 0:
            self.dropout_layer = nn.Dropout(dropout_rate)
        self.final_classifier = nn.Linear(256 * residual_block.expansion, num_classes)
        
        # Initialize network weights
        self._initialize_network_weights()
        
    def _build_residual_layer(self, residual_block, output_channels, num_blocks, stride=1):
        downsample_layer = None
        
        # Create downsample layer if dimensions change
        if stride != 1 or self.current_channels != output_channels * residual_block.expansion:
            downsample_layer = nn.Sequential(
                nn.Conv2d(self.current_channels, output_channels * residual_block.expansion,
                         kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(output_channels * residual_block.expansion),
            )
        
        block_layers = []
        block_layers.append(residual_block(self.current_channels, output_channels, stride, downsample_layer))
        self.current_channels = output_channels * residual_block.expansion
        
        for _ in range(1, num_blocks):
            block_layers.append(residual_block(self.current_channels, output_channels))
            
        return nn.Sequential(*block_layers)
    
    def _initialize_network_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
    
    def forward(self, x):
        # Initial convolution and normalization
        x = self.initial_conv(x)
        x = self.initial_bn(x)
        x = self.relu(x)
        
        # Pass through residual layer groups
        x = self.conv2_group(x)  # 32x32 -> 32x32
        x = self.conv3_group(x)  # 32x32 -> 16x16
        x = self.conv4_group(x)  # 16x16 -> 8x8
        x = self.conv5_group(x)  # 8x8 -> 4x4
        
        # Global average pooling
        x = self.global_avg_pool(x)  # 4x4 -> 1x1
        x = torch.flatten(x, 1)
        
        # Apply dropout if specified
        if self.dropout_rate > 0:
            x = self.dropout_layer(x)
            
        # Final classification
        x = self.final_classifier(x)
        
        return x

In [4]:
def build_custom_resnet(layer_structure=[2, 4, 4, 2], num_classes=100, dropout_rate=0.0):
    """Build custom ResNet with specified layer configuration"""
    return CustomResNet(ResidualBlock, layer_structure, num_classes, dropout_rate)

In [5]:
def calculate_layer_output_dimensions():
    """Calculate and return output dimensions for each convolution group"""
    # For CIFAR-100 input images (32x32 pixels)
    layer_dimensions = {
        'input_image': (3, 32, 32),
        'initial_conv': (32, 32, 32),
        'conv2_group': (32, 32, 32),  # conv2_x
        'conv3_group': (64, 16, 16),  # conv3_x
        'conv4_group': (128, 8, 8),   # conv4_x  
        'conv5_group': (256, 4, 4),   # conv5_x
        'global_avg_pool': (256, 1, 1),
        'final_classifier': (100,)
    }
    return layer_dimensions

In [6]:
class NetworkTrainer:
    """Main trainer class for CIFAR-100 ResNet experiments"""
    def __init__(self, device=None):
        self.device = device or torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        print(f"Using device: {self.device}")
        
    def prepare_cifar100_data(self, batch_size=128, validation_ratio=0.1):
        """Prepare CIFAR-100 data loaders with augmentation and train/val/test splits"""
        
        # Data augmentation transforms for training
        training_transforms = transforms.Compose([
            transforms.RandomHorizontalFlip(p=0.5),
            transforms.RandomCrop(32, padding=4),
            transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.05),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.5071, 0.4867, 0.4408], 
                               std=[0.2675, 0.2565, 0.2761])
        ])
        
        # Simple transforms for validation/testing
        evaluation_transforms = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.5071, 0.4867, 0.4408], 
                               std=[0.2675, 0.2565, 0.2761])
        ])
        
        # Load CIFAR-100 datasets
        complete_training_set = torchvision.datasets.CIFAR100(
            root='./data', train=True, download=True, transform=training_transforms)
        testing_set = torchvision.datasets.CIFAR100(
            root='./data', train=False, download=True, transform=evaluation_transforms)
        
        # Split training data into train/validation
        train_size = int((1 - validation_ratio) * len(complete_training_set))
        val_size = len(complete_training_set) - train_size
        training_subset, validation_subset = random_split(complete_training_set, [train_size, val_size])
        
        # Create validation set with evaluation transforms
        validation_subset.dataset = torchvision.datasets.CIFAR100(
            root='./data', train=True, download=False, transform=evaluation_transforms)
        val_indices = validation_subset.indices
        validation_subset = torch.utils.data.Subset(validation_subset.dataset, val_indices)
        
        # Create data loaders
        self.train_loader = DataLoader(train_dataset, batch_size=batch_size, 
                                     shuffle=True, num_workers=2, pin_memory=True)
        self.val_loader = DataLoader(val_dataset, batch_size=batch_size, 
                                   shuffle=False, num_workers=2, pin_memory=True)
        self.test_loader = DataLoader(test_dataset, batch_size=batch_size, 
                                    shuffle=False, num_workers=2, pin_memory=True)
        
        print(f"Train samples: {len(train_dataset)}")
        print(f"Validation samples: {len(val_dataset)}")
        print(f"Test samples: {len(test_dataset)}")
        
        return self.train_loader, self.val_loader, self.test_loader
    
    def train_epoch(self, model, optimizer, criterion):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        
        pbar = tqdm(self.train_loader, desc='Training')
        for inputs, labels in pbar:
            inputs, labels = inputs.to(self.device), labels.to(self.device)
            
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            
            pbar.set_postfix({
                'Loss': f'{running_loss/len(pbar):.4f}',
                'Acc': f'{100.*correct/total:.2f}%'
            })
        
        epoch_loss = running_loss / len(self.train_loader)
        epoch_acc = 100. * correct / total
        return epoch_loss, epoch_acc
    
    def validate_epoch(self, model, criterion):
        model.eval()
        running_loss = 0.0
        correct = 0
        total = 0
        
        with torch.no_grad():
            for inputs, labels in self.val_loader:
                inputs, labels = inputs.to(self.device), labels.to(self.device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                
                running_loss += loss.item()
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        
        epoch_loss = running_loss / len(self.val_loader)
        epoch_acc = 100. * correct / total
        return epoch_loss, epoch_acc
    
    def test_model(self, model):
        """Evaluate trained model performance on test dataset"""
        model.eval()
        correct_predictions = 0
        total_samples = 0
        total_test_loss = 0.0
        loss_function = nn.CrossEntropyLoss()
        
        with torch.no_grad():
            for batch_inputs, batch_labels in self.testing_loader:
                batch_inputs, batch_labels = batch_inputs.to(self.device), batch_labels.to(self.device)
                model_outputs = model(batch_inputs)
                loss = loss_function(model_outputs, batch_labels)
                
                total_test_loss += loss.item()
                _, predicted_classes = torch.max(model_outputs, 1)
                total_samples += batch_labels.size(0)
                correct_predictions += (predicted_classes == batch_labels).sum().item()
        
        average_test_loss = total_test_loss / len(self.testing_loader)
        test_accuracy = 100. * correct_predictions / total_samples
        return average_test_loss, test_accuracy
    
    def train_model(self, config, epochs=200, patience=20):
        """Train model with given hyperparameter configuration"""
        
        # Create model
        model = create_resnet(
            layers=config['layers'],
            num_classes=100,
            dropout_rate=config.get('dropout', 0.0)
        ).to(self.device)
        
        # Setup training components
        criterion = nn.CrossEntropyLoss()
        
        if config['optimizer'] == 'sgd':
            optimizer = optim.SGD(model.parameters(), lr=config['lr'], 
                                momentum=config.get('momentum', 0.9),
                                weight_decay=config.get('weight_decay', 1e-4))
        elif config['optimizer'] == 'adam':
            optimizer = optim.Adam(model.parameters(), lr=config['lr'],
                                 weight_decay=config.get('weight_decay', 1e-4))
        
        # Learning rate scheduler
        if config.get('scheduler') == 'step':
            scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=60, gamma=0.1)
        elif config.get('scheduler') == 'cosine':
            scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epochs)
        else:
            scheduler = None
        
        # Training tracking
        train_losses, train_accs = [], []
        val_losses, val_accs = [], []
        best_val_acc = 0.0
        patience_counter = 0
        
        print(f"\nTraining with config: {config}")
        print("-" * 60)
        
        for epoch in range(epochs):
            start_time = time.time()
            
            # Train
            train_loss, train_acc = self.train_epoch(model, optimizer, criterion)
            
            # Validate
            val_loss, val_acc = self.validate_epoch(model, criterion)
            
            # Update scheduler
            if scheduler:
                scheduler.step()
            
            # Save metrics
            train_losses.append(train_loss)
            train_accs.append(train_acc)
            val_losses.append(val_loss)
            val_accs.append(val_acc)
            
            epoch_time = time.time() - start_time
            
            if (epoch + 1) % 10 == 0 or epoch < 5:
                print(f'Epoch {epoch+1}/{epochs} ({epoch_time:.1f}s)')
                print(f'Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%')
                print(f'Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%')
                if scheduler:
                    print(f'LR: {optimizer.param_groups[0]["lr"]:.2e}')
                print("-" * 40)
            
            # Early stopping check
            if val_acc > best_val_acc:
                best_val_acc = val_acc
                patience_counter = 0
                # Save best model
                torch.save(model.state_dict(), f'best_model_{hash(str(config))}.pth')
            else:
                patience_counter += 1
            
            if patience_counter >= patience:
                print(f'Early stopping at epoch {epoch+1}')
                break
        
        # Load best model for final evaluation
        model.load_state_dict(torch.load(f'best_model_{hash(str(config))}.pth'))
        
        # Test on test set
        test_loss, test_acc = self.test_model(model)
        
        results = {
            'config': config,
            'train_losses': train_losses,
            'train_accs': train_accs,
            'val_losses': val_losses,
            'val_accs': val_accs,
            'best_val_acc': best_val_acc,
            'final_test_loss': test_loss,
            'final_test_acc': test_acc,
            'epochs_trained': len(train_losses)
        }
        
        print(f'Best validation accuracy: {best_val_acc:.2f}%')
        print(f'Final test accuracy: {test_acc:.2f}%')
        
        return results, model



In [7]:
def hyperparameter_search():
    """Perform hyperparameter search"""
    
    # Define hyperparameter configurations to test
    configs = [
        {
            'name': 'Baseline SGD',
            'layers': [2, 4, 4, 2],
            'optimizer': 'sgd',
            'lr': 0.1,
            'momentum': 0.9,
            'weight_decay': 1e-4,
            'scheduler': 'step',
            'dropout': 0.0
        },
        {
            'name': 'SGD with Dropout',
            'layers': [2, 4, 4, 2],
            'optimizer': 'sgd',
            'lr': 0.1,
            'momentum': 0.9,
            'weight_decay': 1e-4,
            'scheduler': 'step',
            'dropout': 0.2
        },
        {
            'name': 'Adam Optimizer',
            'layers': [2, 4, 4, 2],
            'optimizer': 'adam',
            'lr': 0.001,
            'weight_decay': 1e-4,
            'scheduler': 'cosine',
            'dropout': 0.1
        },
        {
            'name': 'Higher Learning Rate',
            'layers': [2, 4, 4, 2],
            'optimizer': 'sgd',
            'lr': 0.2,
            'momentum': 0.9,
            'weight_decay': 5e-4,
            'scheduler': 'step',
            'dropout': 0.1
        },
        {
            'name': 'Deeper Network',
            'layers': [3, 6, 6, 3],
            'optimizer': 'sgd',
            'lr': 0.1,
            'momentum': 0.9,
            'weight_decay': 1e-4,
            'scheduler': 'step',
            'dropout': 0.1
        }
    ]
    
    trainer = CIFAR100Trainer()
    trainer.load_data(batch_size=128)
    
    all_results = []
    
    for config in configs:
        print(f"\n{'='*60}")
        print(f"Testing configuration: {config['name']}")
        print(f"{'='*60}")
        
        results, model = trainer.train_model(config, epochs=200, patience=20)
        all_results.append(results)
        
        # Clean up GPU memory
        del model
        torch.cuda.empty_cache() if torch.cuda.is_available() else None
    
    return all_results

In [8]:
def plot_results(results_list):
    """Plot training curves for multiple configurations"""
    
    # Create subplots
    fig, axes = plt.subplots(2, 2, figsize=(15, 12))
    fig.suptitle('ResNet Training Results on CIFAR-100', fontsize=16)
    
    # Plot training accuracy
    ax1 = axes[0, 0]
    for results in results_list:
        epochs = range(1, len(results['train_accs']) + 1)
        ax1.plot(epochs, results['train_accs'], 
                label=f"{results['config']['name']}", linewidth=2)
    ax1.set_title('Training Accuracy')
    ax1.set_xlabel('Epoch')
    ax1.set_ylabel('Accuracy (%)')
    ax1.legend()
    ax1.grid(True, alpha=0.3)
    
    # Plot validation accuracy
    ax2 = axes[0, 1]
    for results in results_list:
        epochs = range(1, len(results['val_accs']) + 1)
        ax2.plot(epochs, results['val_accs'], 
                label=f"{results['config']['name']}", linewidth=2)
    ax2.set_title('Validation Accuracy')
    ax2.set_xlabel('Epoch')
    ax2.set_ylabel('Accuracy (%)')
    ax2.legend()
    ax2.grid(True, alpha=0.3)
    
    # Plot training loss
    ax3 = axes[1, 0]
    for results in results_list:
        epochs = range(1, len(results['train_losses']) + 1)
        ax3.plot(epochs, results['train_losses'], 
                label=f"{results['config']['name']}", linewidth=2)
    ax3.set_title('Training Loss')
    ax3.set_xlabel('Epoch')
    ax3.set_ylabel('Loss')
    ax3.legend()
    ax3.grid(True, alpha=0.3)
    
    # Plot validation loss
    ax4 = axes[1, 1]
    for results in results_list:
        epochs = range(1, len(results['val_losses']) + 1)
        ax4.plot(epochs, results['val_losses'], 
                label=f"{results['config']['name']}", linewidth=2)
    ax4.set_title('Validation Loss')
    ax4.set_xlabel('Epoch')
    ax4.set_ylabel('Loss')
    ax4.legend()
    ax4.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()

In [9]:
def print_results_summary(results_list):
    """Print summary of all hyperparameter configurations"""
    
    print("\n" + "="*80)
    print("HYPERPARAMETER SEARCH RESULTS SUMMARY")
    print("="*80)
    
    # Print table header
    print(f"{'Configuration':<20} {'Best Val Acc':<12} {'Test Acc':<10} {'Test Loss':<10} {'Epochs':<8}")
    print("-" * 80)
    
    # Sort by test accuracy
    sorted_results = sorted(results_list, key=lambda x: x['final_test_acc'], reverse=True)
    
    for results in sorted_results:
        config_name = results['config']['name']
        best_val = results['best_val_acc']
        test_acc = results['final_test_acc']
        test_loss = results['final_test_loss']
        epochs = results['epochs_trained']
        
        print(f"{config_name:<20} {best_val:<12.2f} {test_acc:<10.2f} {test_loss:<10.4f} {epochs:<8}")
    
    print("-" * 80)
    
    # Best configuration details
    best_config = sorted_results[0]
    print(f"\nBEST CONFIGURATION: {best_config['config']['name']}")
    print(f"Test Accuracy: {best_config['final_test_acc']:.2f}%")
    print(f"Test Loss: {best_config['final_test_loss']:.4f}")
    print(f"Configuration details:")
    for key, value in best_config['config'].items():
        if key != 'name':
            print(f"  {key}: {value}")

In [12]:
def print_output_sizes():
    """Print output sizes for each layer"""
    sizes = calculate_layer_output_dimensions()
    
    print("\n" + "="*50)
    print("LAYER OUTPUT SIZES")
    print("="*50)
    print(f"{'Layer Name':<15} {'Output Size':<20} {'Description'}")
    print("-" * 50)
    
    descriptions = {
        'input': 'Input images',
        'conv1': 'Initial convolution',
        'conv2_x': 'Residual blocks (no downsampling)',
        'conv3_x': 'Residual blocks (downsample 2x)',
        'conv4_x': 'Residual blocks (downsample 2x)',
        'conv5_x': 'Residual blocks (downsample 2x)',
        'avgpool': 'Global average pooling',
        'fc': 'Final classification layer'
    }
    
    for layer, size in sizes.items():
        if len(size) == 3:
            size_str = f"({size[0]}, {size[1]}, {size[2]})"
        else:
            size_str = f"({size[0]},)"
        print(f"{layer:<15} {size_str:<20} {descriptions[layer]}")

In [13]:
# Main execution
if __name__ == "__main__":
    # Print network architecture details
    print_output_sizes()
    
    # Set random seeds for reproducibility
    torch.manual_seed(42)
    np.random.seed(42)
    
    # Run hyperparameter search
    print("\nStarting hyperparameter search...")
    all_results = hyperparameter_search()
    
    # Print summary
    print_results_summary(all_results)
    
    # Plot results
    plot_results(all_results[:3])  # Plot first 3 configurations
    
    # Save results
    with open('resnet_cifar100_results.json', 'w') as f:
        # Convert to serializable format
        serializable_results = []
        for result in all_results:
            serializable_result = result.copy()
            # Convert numpy arrays to lists if any
            for key in ['train_losses', 'train_accs', 'val_losses', 'val_accs']:
                if key in serializable_result:
                    serializable_result[key] = [float(x) for x in serializable_result[key]]
            serializable_results.append(serializable_result)
        
        json.dump(serializable_results, f, indent=2)
    
    print(f"\nResults saved to 'resnet_cifar100_results.json'")

# Quick test to verify model construction
def test_model_construction():
    """Test that the model can be constructed and forward pass works"""
    print("\nTesting model construction...")
    
    model = create_resnet(layers=[2, 4, 4, 2], num_classes=100)
    
    # Test forward pass
    x = torch.randn(1, 3, 32, 32)
    output = model(x)
    
    print(f"Input shape: {x.shape}")
    print(f"Output shape: {output.shape}")
    print(f"Model parameters: {sum(p.numel() for p in model.parameters()):,}")
    print("✓ Model construction successful!")
    
    return model

# Uncomment to test model construction
# test_model_construction()


LAYER OUTPUT SIZES
Layer Name      Output Size          Description
--------------------------------------------------


KeyError: 'input_image'

In [None]:
def train_with_hyperparameters(self, hyperparameter_config, max_epochs=200, early_stop_patience=20):
        """Train model using specified hyperparameter configuration"""
        
        # Build model with configuration
        model = build_custom_resnet(
            layer_structure=hyperparameter_config['layers'],
            num_classes=100,
            dropout_rate=hyperparameter_config.get('dropout', 0.0)
        ).to(self.device)
        
        # Setup loss function
        loss_function = nn.CrossEntropyLoss()
        
        # Setup optimizer based on configuration
        if hyperparameter_config['optimizer'] == 'sgd':
            optimizer = optim.SGD(model.parameters(), lr=hyperparameter_config['lr'], 
                                momentum=hyperparameter_config.get('momentum', 0.9),
                                weight_decay=hyperparameter_config.get('weight_decay', 1e-4))
        elif hyperparameter_config['optimizer'] == 'adam':
            optimizer = optim.Adam(model.parameters(), lr=hyperparameter_config['lr'],
                                 weight_decay=hyperparameter_config.get('weight_decay', 1e-4))
        
        # Setup learning rate scheduler
        if hyperparameter_config.get('scheduler') == 'step':
            lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=60, gamma=0.1)
        elif hyperparameter_config.get('scheduler') ==import torch


















# Main execution
if __name__ == "__main__":
    # Print network architecture details
    print_output_sizes()
    
    # Set random seeds for reproducibility
    torch.manual_seed(42)
    np.random.seed(42)
    
    # Run hyperparameter search
    print("\nStarting hyperparameter search...")
    all_results = hyperparameter_search()
    
    # Print summary
    print_results_summary(all_results)
    
    # Plot results
    plot_results(all_results[:3])  # Plot first 3 configurations
    
    # Save results
    with open('resnet_cifar100_results.json', 'w') as f:
        # Convert to serializable format
        serializable_results = []
        for result in all_results:
            serializable_result = result.copy()
            # Convert numpy arrays to lists if any
            for key in ['train_losses', 'train_accs', 'val_losses', 'val_accs']:
                if key in serializable_result:
                    serializable_result[key] = [float(x) for x in serializable_result[key]]
            serializable_results.append(serializable_result)
        
        json.dump(serializable_results, f, indent=2)
    
    print(f"\nResults saved to 'resnet_cifar100_results.json'")

# Quick test to verify model construction
def test_model_construction():
    """Test that the model can be constructed and forward pass works"""
    print("\nTesting model construction...")
    
    model = create_resnet(layers=[2, 4, 4, 2], num_classes=100)
    
    # Test forward pass
    x = torch.randn(1, 3, 32, 32)
    output = model(x)
    
    print(f"Input shape: {x.shape}")
    print(f"Output shape: {output.shape}")
    print(f"Model parameters: {sum(p.numel() for p in model.parameters()):,}")
    print("✓ Model construction successful!")
    
    return model

# Uncomment to test model construction
# test_model_construction()