In [1]:
from helper_functions import train, predict
from dataset import train_cats_dogs, test_cats_dogs
import torch
from torch import nn, optim
import torch.nn.functional as F

In [18]:
class VGGNet(nn.Module):
    def __init__(self, *args, **kwargs) -> None:
        super().__init__(*args, **kwargs)
        self.conv1_block = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, stride=1, padding='same'),
            nn.ReLU(),
            nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding='same'),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),
        )
        self.conv2_block = nn.Sequential(
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding='same'),
            nn.ReLU(),
            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding='same'),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),
        )
        self.conv3_block = nn.Sequential(
            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding='same'),
            nn.ReLU(),
            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding='same'),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),
        )
        self.conv4_block = nn.Sequential(
            nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding='same'),
            nn.ReLU(),
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding='same'),
            nn.ReLU(),
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding='same'),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),
        )
        self.conv5_block = nn.Sequential(
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding='same'),
            nn.ReLU(),
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding='same'),
            nn.ReLU(),
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding='same'),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),
        )
        self.classification_head = nn.Sequential(
            nn.Linear(in_features=7*7*512, out_features=4096),
            nn.ReLU(),
            nn.Linear(in_features=4096, out_features=4096),
            nn.ReLU(),
            nn.Linear(in_features=4096, out_features=2),
            nn.Softmax(dim=1)
        )

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.conv1_block(x)
        x = self.conv2_block(x)
        x = self.conv3_block(x)
        x = self.conv4_block(x)
        x = self.conv5_block(x)
        x = x.unsqueeze(dim=0) if x.dim() != 4 else x
        x = x.view(x.size(0), -1)
        return self.classification_head(x)
        

image, labels = next(iter(test_cats_dogs))
model = VGGNet()
print(f"Image Shape: {image.shape}")
print(f"Prediction: {predict(model, image)}")

Image Shape: torch.Size([3, 224, 224])
Prediction: tensor([[0.5042, 0.4958]])


In [19]:
from torchinfo import summary

summary(model, input_size=(3, 224, 224), col_names=["input_size", "output_size", "num_params", "trainable"], row_settings=["var_names"])

Layer (type (var_name))                  Input Shape               Output Shape              Param #                   Trainable
VGGNet (VGGNet)                          [3, 224, 224]             [1, 2]                    --                        True
├─Sequential (conv1_block)               [3, 224, 224]             [64, 112, 112]            --                        True
│    └─Conv2d (0)                        [3, 224, 224]             [64, 224, 224]            1,792                     True
│    └─ReLU (1)                          [64, 224, 224]            [64, 224, 224]            --                        --
│    └─Conv2d (2)                        [64, 224, 224]            [64, 224, 224]            36,928                    True
│    └─ReLU (3)                          [64, 224, 224]            [64, 224, 224]            --                        --
│    └─MaxPool2d (4)                     [64, 224, 224]            [64, 112, 112]            --                        --
├─Sequent