# CSE144 Code Exercise SENet

This example implements the [Squeeze-and-Excitation Networks (SENet)](https://arxiv.org/abs/1709.01507)
model for image classification,
and demonstrates it on the CIFAR-100 dataset.

## Setup

In [4]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torchvision.datasets import CIFAR100
import matplotlib.pyplot as plt

# Set random seed for reproducibility
torch.manual_seed(42)

# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("cuda availability:", torch.cuda.is_available())

cuda availability: False


## Configure the hyperparameters

In [None]:
learning_rate = 0.001
weight_decay = 0.0001
batch_size = 256
image_size = 32 # Updated image size
image_channels = 3
num_epochs = 10 # short training for demo

## Prepare the data

In [None]:
# Prepare the data
num_classes = 100
# input_shape = (32, 32, 3)


train_transform = transforms.Compose([
    transforms.Resize((image_size, image_size)),  # Resize to 72x72
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(degrees=7),
    # transforms.RandomResizedCrop(size=image_size),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.507, 0.487, 0.441], std=[0.267, 0.256, 0.276])
])

test_transform = transforms.Compose([
    transforms.Resize((image_size, image_size)),  # Resize to 72x72
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.507, 0.487, 0.441], std=[0.267, 0.256, 0.276])
])

train_dataset = CIFAR100(root="./data", train=True, transform=train_transform, download=True)
test_dataset = CIFAR100(root="./data", train=False, transform=test_transform, download=True)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=4)

Downloading https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz to ./data/cifar-100-python.tar.gz


100%|██████████| 169001437/169001437 [00:01<00:00, 104958761.38it/s]


Extracting ./data/cifar-100-python.tar.gz to ./data
Files already downloaded and verified




## Define SE Block

In [None]:
def conv3x3(in_planes, out_planes, stride=1):
    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False)

class SELayer(nn.Module):
    def __init__(self, channel, reduction=16):
        super(SELayer, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Sequential(
            nn.Linear(channel, channel // reduction, bias=False),
            nn.ReLU(inplace=True),
            nn.Linear(channel // reduction, channel, bias=False),
            nn.Sigmoid()
        )

    def forward(self, x):
        b, c, _, _ = x.size()
        y = self.avg_pool(x).view(b, c)
        y = self.fc(y).view(b, c, 1, 1)
        return x * y.expand_as(x)

In [None]:
class CifarSEBasicBlock(nn.Module):
    def __init__(self, inplanes, planes, stride=1, reduction=16):
        super(CifarSEBasicBlock, self).__init__()
        self.conv1 = conv3x3(inplanes, planes, stride)
        self.bn1 = nn.BatchNorm2d(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(planes, planes)
        self.bn2 = nn.BatchNorm2d(planes)
        self.se = SELayer(planes, reduction)
        if inplanes != planes:
            self.downsample = nn.Sequential(nn.Conv2d(inplanes, planes, kernel_size=1, stride=stride, bias=False),
                                            nn.BatchNorm2d(planes))
        else:
            self.downsample = lambda x: x
        self.stride = stride

    def forward(self, x):
        residual = self.downsample(x)
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.se(out)

        out += residual
        out = self.relu(out)

        return out

## Define SE ResNet

In [None]:
class CifarSEResNet(nn.Module):
    def __init__(self, block, n_size, num_classes=10, reduction=16):
        super(CifarSEResNet, self).__init__()
        self.inplane = 16
        self.conv1 = nn.Conv2d(
            3, self.inplane, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(self.inplane)
        self.relu = nn.ReLU(inplace=True)
        self.layer1 = self._make_layer(
            block, 16, blocks=n_size, stride=1, reduction=reduction)
        self.layer2 = self._make_layer(
            block, 32, blocks=n_size, stride=2, reduction=reduction)
        self.layer3 = self._make_layer(
            block, 64, blocks=n_size, stride=2, reduction=reduction)
        self.avgpool = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Linear(64, num_classes)
        self.initialize()

    def initialize(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

    def _make_layer(self, block, planes, blocks, stride, reduction):
        strides = [stride] + [1] * (blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.inplane, planes, stride, reduction))
            self.inplane = planes

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)

        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)

        return x

In [None]:
def se_resnet20(num_classes=10):
    model = CifarSEResNet(CifarSEBasicBlock, 3, num_classes=num_classes)
    return model

## Compile, train, and evaluate the mode

In [None]:
# Create and train the PyTorch model

def run_experiment(model, train_loader, test_loader, num_epochs=10, learning_rate=0.001, weight_decay=0.0001):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

    model = model.to(device)
    criterion = criterion.to(device)

    for epoch in range(num_epochs):
        model.train()
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

        model.eval()
        correct, total = 0, 0
        with torch.no_grad():
            for inputs, labels in test_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        accuracy = correct / total
        print(f'Epoch [{epoch + 1}/{num_epochs}], Test Accuracy: {accuracy * 100:.2f}%')

model = se_resnet20(num_classes=100)
run_experiment(model, train_loader, test_loader, num_epochs=num_epochs, learning_rate=learning_rate, weight_decay=weight_decay)

Epoch [1/10], Test Accuracy: 13.28%
Epoch [2/10], Test Accuracy: 20.35%
Epoch [3/10], Test Accuracy: 25.38%
Epoch [4/10], Test Accuracy: 27.92%
Epoch [5/10], Test Accuracy: 30.30%
Epoch [6/10], Test Accuracy: 33.36%
Epoch [7/10], Test Accuracy: 35.35%
Epoch [8/10], Test Accuracy: 37.92%
Epoch [9/10], Test Accuracy: 37.28%
Epoch [10/10], Test Accuracy: 39.16%
