#GACNN Model Class

In [1]:
import torch.nn as nn
import torch.nn.functional as F

from typing import List

# default genotype, 13 tuples, one tuple defines one block of the network
# (conv_output_channels, pool_layer_present, conv_kernel_size)
DEFAULT_GENOTYPE = [
    (57, 0, 3),
    (96, 0, 1),
    (36, 0, 1),
    (104, 0, 1),
    (97, 0, 3),
    (18, 0, 1),
    (60, 0, 3),
    (66, 1, 3),
    (82, 0, 3),
    (94, 0, 3),
    (210, 1, 3),
    (17, 1, 1),
    (100, 0, 3)]


class GACNN(nn.Module):
    """
    Model architecture consisting of 13 blocks, defined by genotype found using genetic algorithm evolution.
    """
    def __init__(
            self,
            genotype=None,
            num_classes: int = 10,
            in_chans: int = 1,

    ):
        """Instantiates a GACNN model comprised of 13 blocks, each block is defined by number of convolutional output
        channels, presence of a pooling layer, and kernel size for the convolution

        Args:
            genotype (list, optional): list of 13 3tuples defining the model architecture
            num_classes (int, optional): number of classes. Defaults to 10.
            in_chans (int, optional): number of input channels. Defaults to 1.
            """
        super(GACNN, self).__init__()

        if genotype is None:
            genotype = DEFAULT_GENOTYPE

        self.num_classes = num_classes
        self.in_chans = in_chans

        self.features = self._make_layers(genotype)
        self.classifier = nn.Linear(round(genotype[-1][0]), num_classes)

    def forward(self, x):
        """
        The input is processed by the model body, then a global pooling operation processes the feature maps changing
        their size to 1x1, they get concatenated into a feature vector which enters the linear classifier.

        :param x: model input
        """
        out = self.features(x)
        out = F.max_pool2d(out, kernel_size=out.size()[2:])
        out = out.view(out.size(0), -1)
        out = self.classifier(out)
        return out

    def _make_layers(self, genotype):
        """
        Makes the model according to the provided genotype.

        :param genotype: model genotype - list containing 13 3tuples of parameters
        """

        layers: List[nn.Module] = []
        input_channel = self.in_chans
        for idx, (layer, pool, kernel_size) in enumerate(
                genotype
        ):
            if pool == 1:
                layers += [
                    nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2)),
                ]
            else:
                filters = round(layer)

                layers += [
                    nn.Conv2d(input_channel, filters, kernel_size=kernel_size, stride=1, padding=1),
                    nn.BatchNorm2d(filters, eps=1e-05, momentum=0.05, affine=True),
                    nn.ReLU(inplace=True),
                ]

                input_channel = filters

        model = nn.Sequential(*layers)
        for m in model.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.xavier_uniform_(m.weight.data, gain=nn.init.calculate_gain("relu"))
        return model

#Train function

In [2]:
import time

import torch
import torch.nn as nn
import torchvision
from torch import optim
from torch.utils.data import DataLoader, random_split
from torchvision.transforms import transforms
from tqdm import tqdm


def train(genotype=None, batch_size=32, epoch_limit=1000):
    """
    Loads MNIST dataset. Trains the model defined by genotype on MNIST for a number of epochs.
    Training will stop after epoch limit is reached or after the validation accuracy stops rising.

    :param genotype: Genotype for GACNN model
    :param batch_size: Batch size for data loader
    :param epoch_limit: Maximum number of epochs to train model
    """

    transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])
    trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
    trainset, validationset = random_split(trainset, [0.8, 0.2])
    testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)
    num_classes = 10
    in_chans = 1

    trainloader = DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2)
    validationloader = DataLoader(validationset, batch_size=batch_size, shuffle=False, num_workers=2)
    testloader = DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=2)

    model = GACNN(genotype=genotype, num_classes=num_classes, in_chans=in_chans)

    optimizer = optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss()
    device = device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

    model.to(device)

    acc_best, epoch_best, epoch, acc, loss = (0, 0, 0, 0, 0)

    t0 = time.time()
    while (acc >= acc_best or epoch - epoch_best < 5) and epoch < epoch_limit:

        for i, data in tqdm(enumerate(trainloader, 0), total=len(trainloader)):
            inputs, labels = data[0].to(device), data[1].to(device)

            optimizer.zero_grad()

            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

        acc = evaluate_model(model, validationloader)

        if acc > acc_best:
            acc_best, epoch_best = (acc, epoch)

        epoch += 1

        print(f"Epoch {epoch} - loss = {loss} | validation accuracy = {acc}")

    t1 = time.time()

    train_acc = evaluate_model(model, trainloader)
    test_acc = evaluate_model(model, testloader)
    print(f"Epoch {epoch} - train accuracy = {train_acc} | test accuracy = {test_acc}")
    print(f"Training time: {t1 - t0}")

    return model


def evaluate_model(model, testloader):
    """
    Evaluates model on given dataloader.

    :param model: PyTorch model to evaluate
    :param testloader: DataLoader object with evaluation dataset
    """
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

    correct = 0.0
    total = 0.0

    with torch.no_grad():
        for data in testloader:
            images, labels = data[0].to(device), data[1].to(device)

            outputs = model(images)

            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    return 100 * correct / total


#Training and saving the model

In [None]:
model = train()
torch.save(model.state_dict(), 'default.pth')

100%|██████████| 1500/1500 [00:27<00:00, 53.87it/s]


Epoch 1 - loss = 0.02643842063844204 | validation accuracy = 98.975


100%|██████████| 1500/1500 [00:28<00:00, 53.04it/s]


Epoch 2 - loss = 0.003444208763539791 | validation accuracy = 98.725


100%|██████████| 1500/1500 [00:27<00:00, 54.53it/s]


Epoch 3 - loss = 0.016698498278856277 | validation accuracy = 98.94166666666666


100%|██████████| 1500/1500 [00:28<00:00, 52.81it/s]


Epoch 4 - loss = 0.01131428312510252 | validation accuracy = 98.9


100%|██████████| 1500/1500 [00:27<00:00, 53.80it/s]


Epoch 5 - loss = 0.008561301045119762 | validation accuracy = 99.06666666666666


100%|██████████| 1500/1500 [00:27<00:00, 53.69it/s]


Epoch 6 - loss = 0.10798192024230957 | validation accuracy = 99.2


100%|██████████| 1500/1500 [00:27<00:00, 54.45it/s]


Epoch 7 - loss = 0.00010499104973860085 | validation accuracy = 99.25833333333334


100%|██████████| 1500/1500 [00:27<00:00, 54.61it/s]


Epoch 8 - loss = 0.00042891831253655255 | validation accuracy = 99.19166666666666


100%|██████████| 1500/1500 [00:27<00:00, 54.70it/s]


Epoch 9 - loss = 0.0015849834308028221 | validation accuracy = 99.475


100%|██████████| 1500/1500 [00:27<00:00, 54.60it/s]


Epoch 10 - loss = 0.0001247662876266986 | validation accuracy = 99.03333333333333


100%|██████████| 1500/1500 [00:27<00:00, 54.55it/s]


Epoch 11 - loss = 0.00028661233955062926 | validation accuracy = 99.125


100%|██████████| 1500/1500 [00:27<00:00, 54.61it/s]


Epoch 12 - loss = 0.06826110184192657 | validation accuracy = 99.2


100%|██████████| 1500/1500 [00:27<00:00, 54.62it/s]


Epoch 13 - loss = 0.0008903602720238268 | validation accuracy = 99.35
Epoch 13 - train accuracy = 99.86875 | test accuracy = 99.5
Training time: 414.4551966190338
