In [14]:
import torchvision.transforms as transforms
import torch.optim as optim
import time
from torch import nn
from torch.utils.data import DataLoader
from torchvision.datasets import FashionMNIST
from tqdm.notebook import tqdm

device = 'cpu'

In [15]:
def vgg_block(num_convs, in_channels, out_channels):
    layers = []
    for _ in range(num_convs):
        if len(layers) < 2:
            layers.append(nn.Conv2d(in_channels, out_channels, 3, 1, 1))
            layers.append(nn.ReLU())
        else:
            layers.append(nn.Conv2d(out_channels, out_channels, 3, 1, 1))
            layers.append(nn.ReLU())

    layers.append(nn.MaxPool2d(2, 2))
    return nn.Sequential(*layers)

In [32]:
class VGG(nn.Module):
    def __init__(self, arch: tuple[tuple[int, int, int], ...], num_classes=10):
        super().__init__()
        conv_blks = []
        for num_convs, in_channels, out_channels in arch:
            conv_blks.append(vgg_block(num_convs, in_channels, out_channels))
        self.net = nn.Sequential(
            *conv_blks, nn.Flatten(), # 128x25088
            nn.Linear(512 * 7 * 7, 4096), nn.ReLU(), nn.Dropout(0.5),
            nn.Linear(4096, 4096), nn.ReLU(), nn.Dropout(0.5),
            nn.Linear(4096, num_classes)
        )

    def forward(self, x):
        x = self.net(x)
        return x

In [33]:
import torch
torch.log2(torch.tensor(224))

tensor(7.8074)

In [34]:
# Define the transformation to resize images to 224x224
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])
batch_size = 128

# Download the Fashion MNIST dataset
train_dataset = FashionMNIST(root='./data', train=True, download=True, transform=transform)

# Create a DataLoader for training data
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# Create a DataLoader for validation data
val_dataset = FashionMNIST(root='./data', train=False, download=True, transform=transform)

val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

In [42]:
model = VGG(arch=((1, 1, 64), (1, 64, 128), (2, 128, 256), (2, 256, 512), (2, 512, 512))).to(device)

optimizer = optim.SGD(model.parameters(), lr=0.01)
loss_fn = nn.CrossEntropyLoss()

# Train the model
num_epochs = 5
total_steps = len(train_loader)
for epoch in range(num_epochs):
    running_loss = 0.0

    # Training
    model.train()  # Set the model in training mode
    for i, (images, labels) in tqdm(enumerate(train_loader, 0), total=total_steps, desc=f'epoch = {epoch}'):
        inputs = images.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = loss_fn(outputs, labels)

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        # Print statistics every 10 mini-batches
        if i % 10 == 0:
            print(f'Training: Epoch [{epoch+1}/{num_epochs}], Mini-batch: [{i+1}/{len(train_loader)}], Loss: {running_loss/10:.3f}')
            running_loss = 0.0#%%

epoch = 0:   0%|          | 0/469 [00:00<?, ?it/s]

Training: Epoch [1/5], Mini-batch: [1/469], Loss: 0.231
Training: Epoch [1/5], Mini-batch: [11/469], Loss: 2.303


KeyboardInterrupt: 