In [1]:
import torch
from torchvision.transforms import ToTensor, Resize, Compose
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader

transforms = Compose([Resize((224, 224)), ToTensor()])
dataset = ImageFolder('../data/train/', transform=transforms)
train_dataloader = DataLoader(dataset, 64, True)

In [2]:
# device = 'cpu'
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

In [3]:
from torch import nn
from typing import List, Tuple

class ConvNet(nn.Module):
    """
    Implements a simple convolutional neural network, with 5 convolutional layers, each followed by maxpooling and optionally batchnorm, and two fully-connected layers.

    Args:
        filters (List[Tuple[int, int]]): Details of all the convolutional layers, each given by a tuple (num_filters, kernel_size), where num_filters is the number of convolutional filters and kernel_size is the size of the filter.
        width_dense (int): The number of units in the hidden fully-connected/dense layer.
        input_size (Tuple[int, int], optional): The size of the input images (images are assumed RGB, i.e., 3 channels).
            Defaults to (224, 224).
        activation_conv (torch.nn.Module, optional): The activation/non-linearity to use for the convolutional layers.
            Defaults to torch.nn.ReLU.
        activation_conv (torch.nn.Module, optional): The activation/non-linearity to use for the hidden dense layer.
            Defaults to torch.nn.ReLU.
    """
    def __init__(
        self,
        filters: List[Tuple[int, int]],
        width_dense: int,
        input_size: Tuple[int, int] = (224, 224),
        activation_conv: nn.Module = nn.ReLU,
        activation_dense: nn.Module = nn.ReLU,
        batch_norm: bool = True,
        dropout: float = None
    ) -> None:
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=filters[0][0], kernel_size=filters[0][1], padding='same'),
            activation_conv(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.BatchNorm2d(num_features=filters[0][0]) if batch_norm else nn.Identity(),
            nn.Conv2d(in_channels=filters[0][0], out_channels=filters[1][0], kernel_size=filters[1][1], padding='same'),
            activation_conv(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.BatchNorm2d(num_features=filters[1][0]) if batch_norm else nn.Identity(),
            nn.Conv2d(in_channels=filters[1][0], out_channels=filters[2][0], kernel_size=filters[2][1], padding='same'),
            activation_conv(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.BatchNorm2d(num_features=filters[2][0]) if batch_norm else nn.Identity(),
            nn.Conv2d(in_channels=filters[2][0], out_channels=filters[3][0], kernel_size=filters[3][1], padding='same'),
            activation_conv(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.BatchNorm2d(num_features=filters[3][0]) if batch_norm else nn.Identity(),
            nn.Conv2d(in_channels=filters[3][0], out_channels=filters[4][0], kernel_size=filters[4][1], padding='same'),
            activation_conv(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.BatchNorm2d(num_features=filters[4][0]) if batch_norm else nn.Identity(),
        )
        self.dense = nn.Sequential(
            nn.Flatten(),
            nn.Identity() if dropout is None else nn.Dropout(p=dropout),
            nn.Linear(in_features=int(filters[4][0]*(input_size[0] * input_size[1])/32**2), out_features=width_dense),
            activation_dense(),
            nn.Identity() if dropout is None else nn.Dropout(p=dropout),
            nn.Linear(in_features=width_dense, out_features=10),
            nn.Softmax(dim=1)
        )

    def forward(self, x):
        return self.dense(self.conv(x))

In [15]:
model = ConvNet(filters=[(16, 3), (32, 3), (64, 3), (128, 3), (64, 3)], width_dense=256, dropout=0.2).to(device, non_blocking=True)

In [9]:
class CategoricalAccuracy(nn.Module):
    def __init__(self) -> None:
        super().__init__()
    
    def forward(self, y_pred, y_true):
        return (y_pred.detach().cpu().argmax(dim=1) == y_true.cpu()).sum() / len(y_true.cpu())

In [16]:
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params=model.parameters(), lr=1e-4)
metric = CategoricalAccuracy()

In [17]:
EPOCHS = 1
for epoch in range(EPOCHS):
    # loss = torch.zeros(EPOCHS)
    # score = torch.zeros(EPOCHS)
    for batch, (X, y) in enumerate(train_dataloader):
        X, y = X.to(device, non_blocking=True), y.to(device, non_blocking=True)
        y_pred = model(X)
        loss_curr = loss_fn(y_pred, y)
        score_curr = metric(y_pred, y)
        optimizer.zero_grad()
        loss_curr.backward()
        optimizer.step()
        print(f'[{batch+1}/{len(train_dataloader)}]\tLoss: {loss_curr.cpu().detach():.6f}, Score: {score_curr.cpu():.4f}')
        # loss[epoch] += loss_curr.cpu().detach()
        # score[epoch] += score_curr.cpu()
    # loss[epoch] /= len(train_dataloader)
    # score[epoch] /= len(train_dataloader)
    # print(f'[{epoch+1}/{EPOCHS}]\tLoss: {loss[epoch]:.6f}, Score: {score[epoch]:.4f}')

[1/157]	Loss: 2.304157, Score: 0.0781
[2/157]	Loss: 2.297452, Score: 0.1250
[3/157]	Loss: 2.296964, Score: 0.1094
[4/157]	Loss: 2.296141, Score: 0.1250
[5/157]	Loss: 2.305525, Score: 0.1094
[6/157]	Loss: 2.298108, Score: 0.1406
[7/157]	Loss: 2.294183, Score: 0.1094
[8/157]	Loss: 2.294595, Score: 0.1562
[9/157]	Loss: 2.293852, Score: 0.1250
[10/157]	Loss: 2.298033, Score: 0.0781
[11/157]	Loss: 2.289092, Score: 0.1406
[12/157]	Loss: 2.289972, Score: 0.2031
[13/157]	Loss: 2.282388, Score: 0.2031
[14/157]	Loss: 2.289270, Score: 0.1094
[15/157]	Loss: 2.264889, Score: 0.2344
[16/157]	Loss: 2.283979, Score: 0.1719
[17/157]	Loss: 2.269090, Score: 0.1562
[18/157]	Loss: 2.259615, Score: 0.2344
[19/157]	Loss: 2.258291, Score: 0.1875
[20/157]	Loss: 2.228948, Score: 0.2969
[21/157]	Loss: 2.306071, Score: 0.1406
[22/157]	Loss: 2.229174, Score: 0.2500
[23/157]	Loss: 2.231511, Score: 0.2344
[24/157]	Loss: 2.225268, Score: 0.1875
[25/157]	Loss: 2.257912, Score: 0.2031
[26/157]	Loss: 2.255679, Score: 0.