In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

In [2]:
def vgg_block(in_channels, out_channels, num_convs):
    layers = []
    for _ in range(num_convs):
        layers.append(nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1))
        layers.append(nn.ReLU(inplace=True))
        in_channels = out_channels
    layers.append(nn.MaxPool2d(kernel_size=2, stride=2))  # 2x2 Max Pooling with stride 2
    return nn.Sequential(*layers)

In [3]:
class VGGNet(nn.Module):
    def __init__(self, architecture, num_classes=14):  # Adjusted for 14 classes
        super(VGGNet, self).__init__()
        self.features = self._make_layers(architecture)
        self.avgpool = nn.AdaptiveAvgPool2d((7, 7))
        self.classifier = nn.Sequential(
            nn.Linear(512 * 7 * 7, 4096),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(4096, num_classes),
        )

    def _make_layers(self, architecture):
        layers = []
        in_channels = 3
        for x in architecture:
            layers.append(vgg_block(in_channels, x[1], x[0]))
            in_channels = x[1]
        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

In [5]:
vgg16_config = [
    (2, 64),   # 2 conv layers with 64 filters
    (2, 128),  # 2 conv layers with 128 filters
    (3, 256),  # 3 conv layers with 256 filters
    (3, 512),  # 3 conv layers with 512 filters
    (3, 512)   # 3 conv layers with 512 filters
]

# Hyperparameters
batch_size = 128
learning_rate = 0.01
num_epochs = 20

# Data preparation
transform = transforms.Compose([
    transforms.Resize(224),  # Standard VGG input size is 224x224
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
])

# Load your custom dataset
train_dataset = datasets.ImageFolder(root='../data/Train', transform=transform)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

test_dataset = datasets.ImageFolder(root='../data/Test', transform=transform)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Initialize model, loss, and optimizer
model = VGGNet(vgg16_config, num_classes=14)  # 14 classes
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9)

In [None]:
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)

    epoch_loss = running_loss / len(train_loader.dataset)
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}')

In [None]:
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f'Test Accuracy: {accuracy:.2f}%')

In [None]:
torch.save(model.state_dict(), 'vgg_model.pth')
print("Model saved as vgg_model.pth")