<a href="https://colab.research.google.com/github/AnuruddhaPaul/INCEPTION_NET_From_Scratch/blob/main/INCEPTION_NET_V1(GOOGLE_NET)_From_Scratch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

# ==========================================
# 1. MODEL ARCHITECTURE (From previous step)
# ==========================================

class ConvBlock(nn.Module):
    def __init__(self, in_channels, out_channels, **kwargs):
        super(ConvBlock, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, **kwargs)
        self.bn = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)

    def forward(self, x):
        return self.relu(self.bn(self.conv(x)))

class InceptionBlock(nn.Module):
    def __init__(self, in_channels, out_1x1, red_3x3, out_3x3, red_5x5, out_5x5, out_pool):
        super(InceptionBlock, self).__init__()
        self.branch1 = ConvBlock(in_channels, out_1x1, kernel_size=1)
        self.branch2 = nn.Sequential(
            ConvBlock(in_channels, red_3x3, kernel_size=1),
            ConvBlock(red_3x3, out_3x3, kernel_size=3, padding=1)
        )
        self.branch3 = nn.Sequential(
            ConvBlock(in_channels, red_5x5, kernel_size=1),
            ConvBlock(red_5x5, out_5x5, kernel_size=5, padding=2)
        )
        self.branch4 = nn.Sequential(
            nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
            ConvBlock(in_channels, out_pool, kernel_size=1)
        )

    def forward(self, x):
        return torch.cat([self.branch1(x), self.branch2(x), self.branch3(x), self.branch4(x)], dim=1)

class GoogLeNet(nn.Module):
    def __init__(self, num_classes=10, in_channels=1): # Defaults changed for MNIST
        super(GoogLeNet, self).__init__()

        # Stem
        self.conv1 = ConvBlock(in_channels, 64, kernel_size=7, stride=2, padding=3)
        self.maxpool1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.conv2 = ConvBlock(64, 64, kernel_size=1)
        self.conv3 = ConvBlock(64, 192, kernel_size=3, padding=1)
        self.maxpool2 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        # Inception Blocks
        self.inception3a = InceptionBlock(192, 64, 96, 128, 16, 32, 32)
        self.inception3b = InceptionBlock(256, 128, 128, 192, 32, 96, 64)
        self.maxpool3 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.inception4a = InceptionBlock(480, 192, 96, 208, 16, 48, 64)
        self.inception4b = InceptionBlock(512, 160, 112, 224, 24, 64, 64)
        self.inception4c = InceptionBlock(512, 128, 128, 256, 24, 64, 64)
        self.inception4d = InceptionBlock(512, 112, 144, 288, 32, 64, 64)
        self.inception4e = InceptionBlock(528, 256, 160, 320, 32, 128, 128)
        self.maxpool4 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.inception5a = InceptionBlock(832, 256, 160, 320, 32, 128, 128)
        self.inception5b = InceptionBlock(832, 384, 192, 384, 48, 128, 128)

        # Classifier
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.dropout = nn.Dropout(p=0.4)
        self.fc = nn.Linear(1024, num_classes)

    def forward(self, x):
        x = self.conv1(x)
        x = self.maxpool1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.maxpool2(x)
        x = self.inception3a(x)
        x = self.inception3b(x)
        x = self.maxpool3(x)
        x = self.inception4a(x)
        x = self.inception4b(x)
        x = self.inception4c(x)
        x = self.inception4d(x)
        x = self.inception4e(x)
        x = self.maxpool4(x)
        x = self.inception5a(x)
        x = self.inception5b(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.dropout(x)
        x = self.fc(x)
        return x

# ==========================================
# 2. TRAINING SETUP (MNIST)
# ==========================================

# Configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

BATCH_SIZE = 64
LEARNING_RATE = 0.001
NUM_EPOCHS = 5 # Kept low for demonstration

# Transforms
# IMPORTANT: Resizing to 96x96 to ensure valid feature map sizes after pooling
transform = transforms.Compose([
    transforms.Resize((96, 96)),
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

# Load Data
train_dataset = torchvision.datasets.MNIST(root='./data', train=True,
                                           transform=transform, download=True)
test_dataset = torchvision.datasets.MNIST(root='./data', train=False,
                                          transform=transform, download=True)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

# Initialize Model
# in_channels=1 for Grayscale MNIST
# num_classes=10 for Digits 0-9
model = GoogLeNet(num_classes=10, in_channels=1).to(device)

# Loss and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

# ==========================================
# 3. TRAINING LOOP
# ==========================================

print(f"Starting training on {len(train_dataset)} images...")

for epoch in range(NUM_EPOCHS):
    model.train() # Set to training mode
    running_loss = 0.0

    for i, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        if (i+1) % 100 == 0:
            print(f'Epoch [{epoch+1}/{NUM_EPOCHS}], Step [{i+1}/{len(train_loader)}], '
                  f'Loss: {loss.item():.4f}')

# ==========================================
# 4. EVALUATION
# ==========================================

print("Starting Evaluation...")
model.eval() # Set to evaluation mode
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print(f'Accuracy of the model on the 10,000 test images: {100 * correct / total:.2f}%')

# Save the model
torch.save(model.state_dict(), 'googlenet_mnist.pth')
print("Model saved to googlenet_mnist.pth")

Using device: cuda


100%|██████████| 9.91M/9.91M [00:02<00:00, 4.92MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 126kB/s]
100%|██████████| 1.65M/1.65M [00:01<00:00, 1.20MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 11.9MB/s]


Starting training on 60000 images...
Epoch [1/5], Step [100/938], Loss: 0.1700
Epoch [1/5], Step [200/938], Loss: 0.1396
Epoch [1/5], Step [300/938], Loss: 0.0404
Epoch [1/5], Step [400/938], Loss: 0.0275
Epoch [1/5], Step [500/938], Loss: 0.0411
Epoch [1/5], Step [600/938], Loss: 0.0421
Epoch [1/5], Step [700/938], Loss: 0.0940
Epoch [1/5], Step [800/938], Loss: 0.0234
Epoch [1/5], Step [900/938], Loss: 0.0643
Epoch [2/5], Step [100/938], Loss: 0.0135
Epoch [2/5], Step [200/938], Loss: 0.0395
Epoch [2/5], Step [300/938], Loss: 0.0493
Epoch [2/5], Step [400/938], Loss: 0.0338
Epoch [2/5], Step [500/938], Loss: 0.1584
Epoch [2/5], Step [600/938], Loss: 0.0368
Epoch [2/5], Step [700/938], Loss: 0.1040
Epoch [2/5], Step [800/938], Loss: 0.0181
Epoch [2/5], Step [900/938], Loss: 0.0173
Epoch [3/5], Step [100/938], Loss: 0.0018
Epoch [3/5], Step [200/938], Loss: 0.0641
Epoch [3/5], Step [300/938], Loss: 0.0675
Epoch [3/5], Step [400/938], Loss: 0.0301
Epoch [3/5], Step [500/938], Loss: 0.06