In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models
from torch.utils.data import DataLoader


In [2]:
mean = [0.5071, 0.4867, 0.4408]
std  = [0.2675, 0.2565, 0.2761]

transform = transforms.Compose([
  
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])


In [3]:
train_data = datasets.CIFAR100(root='./data', train=True, download=False, transform=transform)
test_data = datasets.CIFAR100(root='./data', train=False, download=False, transform=transform)

train_loader = DataLoader(train_data, batch_size=64, shuffle=True)
test_loader = DataLoader(test_data, batch_size=64, shuffle=False)


In [4]:
class DenseLayer(nn.Module):
    def __init__(self, in_channels, growth_rate):
        super(DenseLayer, self).__init__()
        self.layer = nn.Sequential(
            nn.BatchNorm2d(in_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels, growth_rate, kernel_size=3, padding=1, bias=False)
        )

    def forward(self, x):
        out = self.layer(x)
        return torch.cat([x, out], dim=1)  # Concatenate input and output


In [5]:
class TwoLayerDenseBlock(nn.Module):
    def __init__(self, in_channels, growth_rate, use_dropout=False):
        super(TwoLayerDenseBlock, self).__init__()
        self.layer1 = DenseLayer(in_channels, growth_rate)
        self.layer2 = DenseLayer(in_channels + growth_rate, growth_rate)  # concat adds channels

        total_channels = in_channels + 2 * growth_rate
        self.post = nn.Sequential(
            nn.MaxPool2d(kernel_size=2, stride=2),
        )

        if use_dropout:
            self.post.add_module("dropout", nn.Dropout(p=0.2))

        self.out_channels = total_channels  # helpful for tracking channels later

    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        return self.post(x)


In [6]:
class DenseNet(nn.Module):
    def __init__(self, num_classes=100):
        super(DenseNet, self).__init__()
        growth = 32
        self.block1 = TwoLayerDenseBlock(3, growth)
        self.block2 = TwoLayerDenseBlock(self.block1.out_channels, growth)
        self.block3 = TwoLayerDenseBlock(self.block2.out_channels, growth, use_dropout=True)
        self.block4 = TwoLayerDenseBlock(self.block3.out_channels, growth, use_dropout=True)

        self.classifier = nn.Sequential(
            nn.AdaptiveAvgPool2d((1, 1)),
            nn.Flatten(),
            nn.Linear(self.block4.out_channels, num_classes)
        )

    def forward(self, x):
        x = self.block1(x)
        x = self.block2(x)
        x = self.block3(x)
        x = self.block4(x)
        x = self.classifier(x)
        return x


In [10]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = DenseNet(num_classes=100).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, weight_decay=5e-4, momentum=0.9)


In [None]:
epochs = 100

for epoch in range(epochs):
    # Training
    model.train()
    train_loss = 0.0
    correct = 0
    total = 0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

    train_accuracy = 100. * correct / total
    avg_train_loss = train_loss / len(train_loader)

    # Evaluation
    model.eval()
    val_loss = 0.0
    val_correct = 0
    val_total = 0

    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)

            outputs = model(images)
            loss = criterion(outputs, labels)

            val_loss += loss.item()
            _, predicted = outputs.max(1)
            val_total += labels.size(0)
            val_correct += predicted.eq(labels).sum().item()

    val_accuracy = 100. * val_correct / val_total
    avg_val_loss = val_loss / len(test_loader)

    # Print epoch summary
    print(f"Epoch [{epoch+1}/{epochs}]")
    print(f"Train Loss: {avg_train_loss:.4f}, Train Acc: {train_accuracy:.2f}%")
    print(f"Val   Loss: {avg_val_loss:.4f}, Val   Acc: {val_accuracy:.2f}%\n")


Epoch [1/100]
Train Loss: 4.3586, Train Acc: 4.22%
Val   Loss: 4.1549, Val   Acc: 8.23%

Epoch [2/100]
Train Loss: 3.9945, Train Acc: 9.45%
Val   Loss: 3.8549, Val   Acc: 12.40%

Epoch [3/100]
Train Loss: 3.7204, Train Acc: 13.08%
Val   Loss: 3.6294, Val   Acc: 15.80%

Epoch [4/100]
Train Loss: 3.5164, Train Acc: 16.19%
Val   Loss: 3.4065, Val   Acc: 19.03%

Epoch [5/100]
Train Loss: 3.3486, Train Acc: 19.07%
Val   Loss: 3.2871, Val   Acc: 20.74%

Epoch [6/100]
Train Loss: 3.1878, Train Acc: 21.66%
Val   Loss: 3.1232, Val   Acc: 24.06%

Epoch [7/100]
Train Loss: 3.0485, Train Acc: 24.16%
Val   Loss: 3.0496, Val   Acc: 24.49%

Epoch [8/100]
Train Loss: 2.9315, Train Acc: 26.27%
Val   Loss: 2.9404, Val   Acc: 26.70%

Epoch [9/100]
Train Loss: 2.8317, Train Acc: 28.22%
Val   Loss: 2.8892, Val   Acc: 27.58%

Epoch [10/100]
Train Loss: 2.7520, Train Acc: 29.76%
Val   Loss: 2.7387, Val   Acc: 30.13%

Epoch [11/100]
Train Loss: 2.6775, Train Acc: 31.12%
Val   Loss: 2.6510, Val   Acc: 31.38%

