# CIFAR-10 GAP Sweep (GoogLeNet, ResNet50, VGG19)

Colab-ready notebook for probing how different output sizes of the final Global Average Pooling layer affect performance on CIFAR-10. Each model is trained briefly so you can compare validation accuracy across pooling configurations.


## Setup
Run the next cell to import packages and configure lightweight hyperparameters. If you're in Colab, enable GPU acceleration via *Runtime → Change runtime type → GPU* first.


In [1]:
import time
from collections import defaultdict
from dataclasses import dataclass
from typing import Dict, Iterable, List, Optional

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, models, transforms

print(f"PyTorch version: {torch.__version__}")


PyTorch version: 2.8.0+cu126


In [2]:
def get_device() -> torch.device:
    return torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
@dataclass
class ExperimentConfig:
    model_name: str
    pool_size: Optional[int]
    epochs: int = 2
    lr: float = 0.01
    weight_decay: float = 5e-4


In [4]:
def _infer_flatten_dim(model: nn.Module, classifier_attr: str, input_size: int = 224) -> int:
    classifier = getattr(model, classifier_attr)
    setattr(model, classifier_attr, nn.Identity())
    was_training = model.training
    model.eval()
    with torch.no_grad():
        dummy_input = torch.zeros(1, 3, input_size, input_size)
        features = model(dummy_input)
    setattr(model, classifier_attr, classifier)
    if was_training:
        model.train()
    return features.shape[1]


def build_model(name: str, pool_size: Optional[int], num_classes: int) -> nn.Module:
    name = name.lower()
    if name == "resnet50":
        model = models.resnet50(weights=None)
        if pool_size is None:
            model.avgpool = nn.Identity()
            in_features = _infer_flatten_dim(model, "fc")
            model.fc = nn.Linear(in_features, num_classes)
        else:
            model.avgpool = nn.AdaptiveAvgPool2d((pool_size, pool_size))
            in_features = model.fc.in_features * pool_size * pool_size
            model.fc = nn.Linear(in_features, num_classes)
    elif name in {"googlenet", "inceptionv1", "inception_v1"}:
        model = models.googlenet(weights=None, aux_logits=False)
        if pool_size is None:
            model.avgpool = nn.Identity()
            in_features = _infer_flatten_dim(model, "fc")
            model.fc = nn.Linear(in_features, num_classes)
        else:
            model.avgpool = nn.AdaptiveAvgPool2d((pool_size, pool_size))
            in_features = model.fc.in_features * pool_size * pool_size
            model.fc = nn.Linear(in_features, num_classes)
    elif name == "vgg19":
        model = models.vgg19(weights=None)
        if pool_size is None:
            model.avgpool = nn.Identity()
            in_features = _infer_flatten_dim(model, "classifier")
        else:
            model.avgpool = nn.AdaptiveAvgPool2d((pool_size, pool_size))
            in_features = 512 * pool_size * pool_size
        classifier = nn.Sequential(
            nn.Linear(in_features, 1024),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.3),
            nn.Linear(1024, num_classes),
        )
        model.classifier = classifier
    else:
        raise ValueError(f"Unsupported model name: {name}")

    return model


In [5]:
def get_dataloaders(batch_size: int = 128, num_workers: int = 2) -> Dict[str, DataLoader]:
    normalize = transforms.Normalize(mean=(0.4914, 0.4822, 0.4465), std=(0.2470, 0.2435, 0.2616))

    transform_train = transforms.Compose([
        transforms.Resize(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        normalize,
    ])

    transform_val = transforms.Compose([
        transforms.Resize(224),
        transforms.ToTensor(),
        normalize,
    ])

    train_dataset = datasets.CIFAR10(root="./data", train=True, transform=transform_train, download=True)
    val_dataset = datasets.CIFAR10(root="./data", train=False, transform=transform_val, download=True)

    loaders = {
        "train": DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers),
        "val": DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers),
    }
    return loaders


In [6]:
def train_one_epoch(
    model: nn.Module,
    loader: DataLoader,
    criterion: nn.Module,
    optimizer: optim.Optimizer,
    device: torch.device,
) -> Dict[str, float]:
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for inputs, targets in loader:
        inputs = inputs.to(device)
        targets = targets.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * inputs.size(0)
        _, preds = outputs.max(1)
        correct += preds.eq(targets).sum().item()
        total += targets.size(0)

    return {"loss": running_loss / total, "acc": correct / total}


@torch.no_grad()
def evaluate(
    model: nn.Module,
    loader: DataLoader,
    criterion: nn.Module,
    device: torch.device,
) -> Dict[str, float]:
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    for inputs, targets in loader:
        inputs = inputs.to(device)
        targets = targets.to(device)

        outputs = model(inputs)
        loss = criterion(outputs, targets)

        running_loss += loss.item() * inputs.size(0)
        _, preds = outputs.max(1)
        correct += preds.eq(targets).sum().item()
        total += targets.size(0)

    return {"loss": running_loss / total, "acc": correct / total}


In [7]:
def run_experiment(config: ExperimentConfig, loaders: Dict[str, DataLoader]) -> Dict[str, float]:
    device = get_device()
    model = build_model(config.model_name, config.pool_size, num_classes=10)
    model.to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=config.lr, momentum=0.9, weight_decay=config.weight_decay)

    history = defaultdict(list)
    for epoch in range(config.epochs):
        start_time = time.time()
        train_metrics = train_one_epoch(model, loaders["train"], criterion, optimizer, device)
        val_metrics = evaluate(model, loaders["val"], criterion, device)
        epoch_time = time.time() - start_time

        history["train_loss"].append(train_metrics["loss"])
        history["train_acc"].append(train_metrics["acc"])
        history["val_loss"].append(val_metrics["loss"])
        history["val_acc"].append(val_metrics["acc"])

        pool_label = config.pool_size if config.pool_size is not None else "none"
        print(
            f"[{config.model_name} | pool={pool_label}] "
            f"Epoch {epoch + 1}/{config.epochs} "
            f"train_loss={train_metrics['loss']:.4f} "
            f"train_acc={train_metrics['acc']*100:.2f}% "
            f"val_loss={val_metrics['loss']:.4f} "
            f"val_acc={val_metrics['acc']*100:.2f}% "
            f"time={epoch_time:.1f}s"
        )

    final_metrics = {
        "train_loss": history["train_loss"][-1],
        "train_acc": history["train_acc"][-1],
        "val_loss": history["val_loss"][-1],
        "val_acc": history["val_acc"][-1],
    }
    return final_metrics


def grid_search(
    models_to_try: Iterable[str], pool_sizes: Iterable[Optional[int]], epochs: int = 2
) -> List[Dict[str, object]]:
    loaders = get_dataloaders()
    results = []
    for name in models_to_try:
        for pool_size in pool_sizes:
            cfg = ExperimentConfig(model_name=name, pool_size=pool_size, epochs=epochs)
            metrics = run_experiment(cfg, loaders)
            results.append(
                {
                    "model": name,
                    "pool_size": pool_size if pool_size is not None else "none",
                    "val_acc": metrics["val_acc"],
                    "val_loss": metrics["val_loss"],
                    "train_acc": metrics["train_acc"],
                    "train_loss": metrics["train_loss"],
                }
            )
    return results


## Run the sweep

In [None]:
MODELS = ["googlenet", "resnet50", "vgg19"]
POOL_SIZES = [1]  # None을 사용하면 GAP을 제거
EPOCHS = 5

summary = grid_search(MODELS, POOL_SIZES, epochs=EPOCHS)
summary


### 실험 1 - GAP None

```
[googlenet | pool=none] Epoch 1/5 train_loss=4.3566 train_acc=17.85% val_loss=1.9445 val_acc=31.10% time=55.2s
[googlenet | pool=none] Epoch 2/5 train_loss=1.8909 train_acc=37.74% val_loss=1.6542 val_acc=43.88% time=54.6s
[googlenet | pool=none] Epoch 3/5 train_loss=1.5450 train_acc=45.46% val_loss=1.4195 val_acc=50.15% time=54.7s
[googlenet | pool=none] Epoch 4/5 train_loss=1.3953 train_acc=50.32% val_loss=1.3109 val_acc=53.19% time=54.8s
[googlenet | pool=none] Epoch 5/5 train_loss=1.2819 train_acc=54.06% val_loss=1.2942 val_acc=53.61% time=54.9s
[resnet50 | pool=none] Epoch 1/5 train_loss=7.2307 train_acc=19.30% val_loss=1.9333 val_acc=30.84% time=77.2s
[resnet50 | pool=none] Epoch 2/5 train_loss=1.7646 train_acc=36.91% val_loss=1.6628 val_acc=43.54% time=77.1s
[resnet50 | pool=none] Epoch 3/5 train_loss=1.5644 train_acc=43.76% val_loss=1.4947 val_acc=45.80% time=77.1s
[resnet50 | pool=none] Epoch 4/5 train_loss=1.4672 train_acc=47.34% val_loss=1.4114 val_acc=49.24% time=77.1s
[resnet50 | pool=none] Epoch 5/5 train_loss=1.3683 train_acc=50.91% val_loss=1.7889 val_acc=50.62% time=77.1s
[vgg19 | pool=none] Epoch 1/5 train_loss=1.7766 train_acc=34.40% val_loss=1.3715 val_acc=50.26% time=115.6s
[vgg19 | pool=none] Epoch 2/5 train_loss=1.2519 train_acc=55.40% val_loss=1.1114 val_acc=60.45% time=115.7s
[vgg19 | pool=none] Epoch 3/5 train_loss=0.9521 train_acc=66.99% val_loss=0.9195 val_acc=68.14% time=115.7s
[vgg19 | pool=none] Epoch 4/5 train_loss=0.7523 train_acc=74.11% val_loss=0.7343 val_acc=74.30% time=115.7s
[vgg19 | pool=none] Epoch 5/5 train_loss=0.6300 train_acc=78.14% val_loss=0.6748 val_acc=77.27% time=115.8s
```

### 실험 2 - GAP (pool: 1)

```
[googlenet | pool=1] Epoch 1/5 train_loss=1.4601 train_acc=46.31% val_loss=1.2282 val_acc=55.69% time=53.1s
[googlenet | pool=1] Epoch 2/5 train_loss=0.9547 train_acc=66.11% val_loss=1.2028 val_acc=59.15% time=52.9s
[googlenet | pool=1] Epoch 3/5 train_loss=0.7161 train_acc=74.77% val_loss=0.7434 val_acc=73.68% time=52.9s
[googlenet | pool=1] Epoch 4/5 train_loss=0.5712 train_acc=80.15% val_loss=0.6181 val_acc=78.16% time=52.8s
[googlenet | pool=1] Epoch 5/5 train_loss=0.4804 train_acc=83.29% val_loss=0.6487 val_acc=78.60% time=53.5s
[resnet50 | pool=1] Epoch 1/5 train_loss=1.9458 train_acc=30.72% val_loss=1.8685 val_acc=41.23% time=77.5s
[resnet50 | pool=1] Epoch 2/5 train_loss=1.3987 train_acc=48.98% val_loss=1.7754 val_acc=44.00% time=77.4s
[resnet50 | pool=1] Epoch 3/5 train_loss=1.1520 train_acc=58.29% val_loss=1.0552 val_acc=61.25% time=77.3s
[resnet50 | pool=1] Epoch 4/5 train_loss=0.9158 train_acc=67.53% val_loss=0.9634 val_acc=66.03% time=77.5s
[resnet50 | pool=1] Epoch 5/5 train_loss=0.7634 train_acc=73.05% val_loss=0.8352 val_acc=72.08% time=77.5s
[vgg19 | pool=1] Epoch 1/5 train_loss=2.2234 train_acc=15.02% val_loss=2.0774 val_acc=20.49% time=114.8s
[vgg19 | pool=1] Epoch 2/5 train_loss=1.9098 train_acc=27.21% val_loss=1.7287 val_acc=34.79% time=114.8s
[vgg19 | pool=1] Epoch 3/5 train_loss=1.6704 train_acc=36.96% val_loss=1.5463 val_acc=41.73% time=114.9s
[vgg19 | pool=1] Epoch 4/5 train_loss=1.4952 train_acc=44.35% val_loss=1.4130 val_acc=47.12% time=114.8s
[vgg19 | pool=1] Epoch 5/5 train_loss=1.3083 train_acc=52.27% val_loss=1.1218 val_acc=59.15% time=114.8s
```

