<a href="https://colab.research.google.com/github/Redcoder815/Deep_Learning_PyTorch/blob/main/18VGG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
from torch.utils import data
from torchvision import datasets

The * before layers in return nn.Sequential(*layers) is the Python "unpacking" or "splat" operator. Here's what it does:

layers is a list: In the vgg_block function, layers is built up as a Python list containing nn.LazyConv2d and nn.ReLU modules, followed by nn.MaxPool2d.
nn.Sequential expects individual arguments: The nn.Sequential module in PyTorch expects its components as separate, individual arguments, not as a single list containing all components. For example, you would normally define it like nn.Sequential(layer1, layer2, layer3).
*layers unpacks the list: When you use *layers, Python unpacks the elements of the layers list and passes each element as a separate argument to the nn.Sequential constructor. So, if layers contained [conv1, relu1, pool1], *layers would effectively become conv1, relu1, pool1 when passed to nn.Sequential.

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

In [3]:
def vgg_block(num_convs, out_channels):
    layers = []
    for _ in range(num_convs):
        layers.append(nn.LazyConv2d(out_channels, kernel_size=3, padding=1))
        layers.append(nn.ReLU())
    layers.append(nn.MaxPool2d(kernel_size=2,stride=2))
    return nn.Sequential(*layers)

In [4]:
class VGG(nn.Module):
    def __init__(self, arch, lr=0.1, num_classes=10):
        super().__init__()
        conv_blks = []
        for (num_convs, out_channels) in arch:
            conv_blks.append(vgg_block(num_convs, out_channels))
        self.net = nn.Sequential(
            *conv_blks, nn.Flatten(),
            nn.LazyLinear(4096), nn.ReLU(), nn.Dropout(0.5),
            nn.LazyLinear(4096), nn.ReLU(), nn.Dropout(0.5),
            nn.LazyLinear(num_classes))
    def forward(self, X):
      return self.net(X)

This line (0): LazyConv2d(0, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) defines the first convolutional layer within a VGG block. Let me break it down:

LazyConv2d: This is a PyTorch convolutional layer that automatically infers the in_channels (input channels) dimension when the model is first passed data. The 0 you see as the first argument indicates that the in_channels are not explicitly defined yet and will be determined dynamically.
16: This is the out_channels argument, meaning this convolutional layer will produce 16 output channels.
kernel_size=(3, 3): This specifies that the convolutional filter (kernel) has a size of 3x3 pixels.
stride=(1, 1): This indicates that the kernel moves 1 pixel at a time horizontally and vertically across the input.
padding=(1, 1): This adds a border of 1 pixel of zeros around the input image. For a 3x3 kernel and a stride of 1, padding of 1 typically helps to maintain the spatial dimensions of the output feature map the same as the input.

In [5]:
model = VGG(arch=((1, 16), (1, 32), (2, 64), (2, 128), (2, 128)), lr=0.01)
model.to(device)
model

VGG(
  (net): Sequential(
    (0): Sequential(
      (0): LazyConv2d(0, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): ReLU()
      (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    )
    (1): Sequential(
      (0): LazyConv2d(0, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): ReLU()
      (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    )
    (2): Sequential(
      (0): LazyConv2d(0, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): ReLU()
      (2): LazyConv2d(0, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (3): ReLU()
      (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    )
    (3): Sequential(
      (0): LazyConv2d(0, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): ReLU()
      (2): LazyConv2d(0, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (3): ReLU()
      (4): MaxPool2d(ker

In [6]:
def load_cifar10_dataset(batch_size=32):
    # Define data transformations
    transform = transforms.Compose([
        transforms.Resize((224, 224)),  # Resize images to 224x224
        transforms.ToTensor(),  # Convert images to tensors
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize
    ])

    # Load CIFAR-10 dataset
    train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, transform=transform, download=True)
    test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, transform=transform, download=True)

    train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

    return train_loader, test_loader

In [7]:
train_loader, test_loader = load_cifar10_dataset(batch_size=32)

learning_rate = 0.001

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [8]:
num_epochs = 3
for epoch in range(num_epochs):
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        optimizer.zero_grad()  # Zero the gradients
        loss.backward()  # Backpropagate
        optimizer.step()  # Update weights

    print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [1/3], Loss: 2.3072
Epoch [2/3], Loss: 1.5964
Epoch [3/3], Loss: 1.3876


In [9]:
model.eval()
test_accuracy_sum = 0
test_n = 0
with torch.no_grad():
  for images, labels in test_loader:
    images, labels = images.to(device), labels.to(device)
    outputs = model(images)
    pred_outputs = torch.argmax(outputs, dim=1)
    test_accuracy_sum += (pred_outputs == labels).float().sum()
    test_n += labels.numel()
  test_accuracy = test_accuracy_sum/test_n
  print('test accuracy ', test_accuracy)

test accuracy  tensor(0.4788, device='cuda:0')


Using Fashion Mnist images

In [10]:
batch_size = 256

In [11]:
Transform = transforms.Compose([
    transforms.Resize((227, 227)),
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

In [12]:
mnist_train = datasets.FashionMNIST(root="../data", train=True, transform=Transform, download=True)
mnist_val = datasets.FashionMNIST(root="../data", train=False, transform=Transform, download=True)

train_iter = data.DataLoader(mnist_train, batch_size, shuffle=True, num_workers=4)
val_iter = data.DataLoader(mnist_val, batch_size, shuffle=False, num_workers=4)



In [13]:
modelFashion = VGG(arch=((1, 16), (1, 32), (2, 64), (2, 128), (2, 128)), lr=0.01)
modelFashion.to(device)

VGG(
  (net): Sequential(
    (0): Sequential(
      (0): LazyConv2d(0, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): ReLU()
      (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    )
    (1): Sequential(
      (0): LazyConv2d(0, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): ReLU()
      (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    )
    (2): Sequential(
      (0): LazyConv2d(0, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): ReLU()
      (2): LazyConv2d(0, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (3): ReLU()
      (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    )
    (3): Sequential(
      (0): LazyConv2d(0, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): ReLU()
      (2): LazyConv2d(0, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (3): ReLU()
      (4): MaxPool2d(ker

In [14]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(modelFashion.parameters(), lr=learning_rate)

In [15]:
num_epochs = 3
for epoch in range(num_epochs):
    for images, labels in train_iter:
        images, labels = images.to(device), labels.to(device)
        # Forward pass
        outputs = modelFashion(images)
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        optimizer.zero_grad()  # Zero the gradients
        loss.backward()  # Backpropagate
        optimizer.step()  # Update weights

    print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [1/3], Loss: 0.2405
Epoch [2/3], Loss: 0.4039
Epoch [3/3], Loss: 0.2140


In [16]:
model.eval()
test_accuracy_sum = 0
test_n = 0
with torch.no_grad():
  for images, labels in val_iter:
    images, labels = images.to(device), labels.to(device)
    outputs = modelFashion(images)
    pred_outputs = torch.argmax(outputs, dim=1)
    test_accuracy_sum += (pred_outputs == labels).float().sum()
    test_n += labels.numel()
  test_accuracy = test_accuracy_sum/test_n
  print('test accuracy ', test_accuracy)

test accuracy  tensor(0.9070, device='cuda:0')
