In [1]:
import torch
from torchvision.transforms import ToTensor, Resize, Compose
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader

transforms = Compose([Resize((224, 224)), ToTensor()])
dataset = ImageFolder('../data/train/', transform=transforms)
train_dataloader = DataLoader(dataset, 128, True)

In [2]:
device = 'cpu'
# device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cpu'

In [3]:
from torch import nn
from typing import List, Tuple

class ConvNet(nn.Module):
    """
    Implements a simple convolutional neural network, with 5 convolutional layers, each followed by maxpooling and optionally batchnorm, and two fully-connected layers.

    Args:
        filters (List[Tuple[int, int]]): Details of all the convolutional layers, each given by a tuple (num_filters, kernel_size), where num_filters is the number of convolutional filters and kernel_size is the size of the filter.
        width_dense (int): The number of units in the hidden fully-connected/dense layer.
        input_size (Tuple[int, int], optional): The size of the input images (images are assumed RGB, i.e., 3 channels).
            Defaults to (224, 224).
        activation_conv (torch.nn.Module, optional): The activation/non-linearity to use for the convolutional layers.
            Defaults to torch.nn.ReLU.
        activation_conv (torch.nn.Module, optional): The activation/non-linearity to use for the hidden dense layer.
            Defaults to torch.nn.ReLU.
    """
    def __init__(
        self,
        filters: List[Tuple[int, int]],
        width_dense: int,
        input_size: Tuple[int, int] = (224, 224),
        activation_conv: nn.Module = nn.ReLU,
        activation_dense: nn.Module = nn.ReLU,
        batch_norm: bool = True,
        dropout: float = None
    ) -> None:
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=filters[0][0], kernel_size=filters[0][1], padding='same'),
            activation_conv(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.BatchNorm2d(num_features=filters[0][0]) if batch_norm else nn.Identity(),
            nn.Conv2d(in_channels=filters[0][0], out_channels=filters[1][0], kernel_size=filters[1][1], padding='same'),
            activation_conv(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.BatchNorm2d(num_features=filters[1][0]) if batch_norm else nn.Identity(),
            nn.Conv2d(in_channels=filters[1][0], out_channels=filters[2][0], kernel_size=filters[2][1], padding='same'),
            activation_conv(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.BatchNorm2d(num_features=filters[2][0]) if batch_norm else nn.Identity(),
            nn.Conv2d(in_channels=filters[2][0], out_channels=filters[3][0], kernel_size=filters[3][1], padding='same'),
            activation_conv(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.BatchNorm2d(num_features=filters[3][0]) if batch_norm else nn.Identity(),
            nn.Conv2d(in_channels=filters[3][0], out_channels=filters[4][0], kernel_size=filters[4][1], padding='same'),
            activation_conv(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.BatchNorm2d(num_features=filters[4][0]) if batch_norm else nn.Identity(),
        )
        self.dense = nn.Sequential(
            nn.Flatten(),
            nn.Identity() if dropout is None else nn.Dropout(p=dropout),
            nn.Linear(in_features=int(filters[4][0]*(input_size[0] * input_size[1])/32**2), out_features=width_dense),
            activation_dense(),
            nn.Identity() if dropout is None else nn.Dropout(p=dropout),
            nn.Linear(in_features=width_dense, out_features=10),
            nn.Softmax(dim=1)
        )

    def forward(self, x):
        return self.dense(self.conv(x))

In [10]:
from model import ConvNet
from metrics import CategoricalAccuracy

model = ConvNet(filters=[(16, 3), (32, 3), (64, 3), (128, 3), (64, 3)], width_dense=64, dropout=0.2).to(device, non_blocking=True)

In [11]:
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params=model.parameters(), lr=1e-3)
metric = CategoricalAccuracy()

In [12]:
from torch.utils.data import random_split

train_dataset, val_dataset = random_split(dataset, lengths=[0.8, 0.2])
train_dataloader, val_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True), DataLoader(val_dataset, batch_size=64, shuffle=True)

In [None]:
from model import train

train(model, train_dataloader, val_dataloader, loss_fn, optimizer, metric, 10, device)

In [8]:
from model import ConvNet

model = ConvNet(filters=[(128, 3)]*5, width_dense=128, activation_conv=torch.nn.SiLU).to(device)
model.load_state_dict(torch.load('./models/[128, 128, 128, 128, 128]_filters_128_width__silu_2_poolsize_0.0001_lr_0.001_wd_bnorm_aug'))

<All keys matched successfully>

In [4]:
dataset_test = ImageFolder('../data/val/', transform=transforms)

In [5]:
full_dataloader = DataLoader(dataset, batch_size=len(dataset_test))
X_test, Y_test = next(iter(full_dataloader))

In [11]:
# preds = torch.tensor([])
# batch_size = 128

# from math import ceil
# batches = range(int(ceil(len(X_test / batch_size))))

# model.eval()
# with torch.inference_mode():
#     for batch in batches:
#         X_sub = X_test[batch * batch_size : min((batch+1) * batch_size, len(X_test))].to(device, non_blocking=True)
#         preds_batch = model(X_sub)
#         preds = torch.cat([preds, preds_batch])

from model import predict

preds = predict(model, X_test, device=device)

In [12]:
preds.shape

torch.Size([2000, 10])

In [13]:
from metrics import CategoricalAccuracy

metric = CategoricalAccuracy()
score = metric(preds, Y_test)

In [14]:
score

tensor(0.4825)