In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models
from torch.utils.data import DataLoader


In [6]:
mean = [0.5071, 0.4867, 0.4408]
std  = [0.2675, 0.2565, 0.2761]

transform = transforms.Compose([
  
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])


In [7]:
train_data = datasets.CIFAR100(root='./data', train=True, download=False, transform=transform)
test_data = datasets.CIFAR100(root='./data', train=False, download=False, transform=transform)

train_loader = DataLoader(train_data, batch_size=64, shuffle=True)
test_loader = DataLoader(test_data, batch_size=64, shuffle=False)


In [8]:
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, downsample=False):
        super(ResidualBlock, self).__init__()
        stride = 2 if downsample else 1

        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)

        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(out_channels)

        self.downsample = None
        if downsample or in_channels != out_channels:
            self.downsample = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride),
                nn.BatchNorm2d(out_channels)
            )

    def forward(self, x):
        identity = x
        out = self.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))

        if self.downsample:
            identity = self.downsample(identity)

        out = out + identity
        out = self.relu(out)
        return out


In [9]:
class ClassifierHead(nn.Module):
    def __init__(self, input_dim, num_classes):
        super(ClassifierHead, self).__init__()
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(input_dim, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(512, num_classes)
        )

    def forward(self, x):
        return self.classifier(x)

In [10]:
class BlockCNN(nn.Module):
    def __init__(self, num_classes=100):
        super(BlockCNN, self).__init__()
        self.layer0 = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, padding=1, stride=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True)
        )

        self.layer1 = ResidualBlock(64, 64)
        self.layer2 = ResidualBlock(64, 128, downsample=True)
        self.layer3 = ResidualBlock(128, 256, downsample=True)
        self.layer4 = ResidualBlock(256, 512, downsample=True)

        self.pool = nn.AdaptiveAvgPool2d((1, 1))
        self.classifier = ClassifierHead(input_dim=512, num_classes=num_classes)

    def forward(self, x):
        x = self.layer0(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.pool(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = BlockCNN(num_classes=100).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, weight_decay=5e-4, momentum=0.9)


In [12]:
epochs = 100

for epoch in range(epochs):
    # Training
    model.train()
    train_loss = 0.0
    correct = 0
    total = 0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

    train_accuracy = 100. * correct / total
    avg_train_loss = train_loss / len(train_loader)

    # Evaluation
    model.eval()
    val_loss = 0.0
    val_correct = 0
    val_total = 0

    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)

            outputs = model(images)
            loss = criterion(outputs, labels)

            val_loss += loss.item()
            _, predicted = outputs.max(1)
            val_total += labels.size(0)
            val_correct += predicted.eq(labels).sum().item()

    val_accuracy = 100. * val_correct / val_total
    avg_val_loss = val_loss / len(test_loader)

    # Print epoch summary
    print(f"Epoch [{epoch+1}/{epochs}]")
    print(f"Train Loss: {avg_train_loss:.4f}, Train Acc: {train_accuracy:.2f}%")
    print(f"Val   Loss: {avg_val_loss:.4f}, Val   Acc: {val_accuracy:.2f}%\n")


Epoch [1/100]
Train Loss: 4.3345, Train Acc: 5.03%
Val   Loss: 4.0135, Val   Acc: 9.74%

Epoch [2/100]
Train Loss: 3.9018, Train Acc: 9.83%
Val   Loss: 3.6776, Val   Acc: 13.31%

Epoch [3/100]
Train Loss: 3.6182, Train Acc: 13.66%
Val   Loss: 3.4001, Val   Acc: 17.48%

Epoch [4/100]
Train Loss: 3.3769, Train Acc: 17.74%
Val   Loss: 3.2217, Val   Acc: 20.69%

Epoch [5/100]
Train Loss: 3.1697, Train Acc: 21.06%
Val   Loss: 3.0230, Val   Acc: 24.08%

Epoch [6/100]
Train Loss: 2.9924, Train Acc: 24.57%
Val   Loss: 2.8640, Val   Acc: 26.27%

Epoch [7/100]
Train Loss: 2.8435, Train Acc: 27.27%
Val   Loss: 2.7312, Val   Acc: 29.78%

Epoch [8/100]
Train Loss: 2.7168, Train Acc: 29.82%
Val   Loss: 2.5870, Val   Acc: 33.14%

Epoch [9/100]
Train Loss: 2.6047, Train Acc: 31.92%
Val   Loss: 2.4294, Val   Acc: 35.77%

Epoch [10/100]
Train Loss: 2.4865, Train Acc: 34.55%
Val   Loss: 2.3743, Val   Acc: 36.96%

Epoch [11/100]
Train Loss: 2.3848, Train Acc: 36.82%
Val   Loss: 2.2695, Val   Acc: 39.87%

