In [None]:
import time
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.tensorboard import SummaryWriter

# Check device configurations
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Create MNIST and CIFAR transform objects
mnist_transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize(0.5, 0.5)])
cifar_transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

# Define hyper-parameters
batch_size = 4
mnist_input_size = 1
mnist_dimensions = 28
cifar_input_size = 3
cifar_dimensions = 32
num_epochs = 20
learning_rate = 0.01

# Establish the MNIST data set and loaders
mnist_trainset = torchvision.datasets.MNIST(root='./mnist_data',
                                            transform=mnist_transform,
                                            train=True,
                                            download=True)
mnist_testset = torchvision.datasets.MNIST(root='./mnist_data',
                                           transform=mnist_transform,
                                           train=False,
                                           download=True)
mnist_trainloader = torch.utils.data.DataLoader(dataset=mnist_trainset,
                                                batch_size=batch_size,
                                                shuffle=True,
                                                num_workers=2)
mnist_testloader = torch.utils.data.DataLoader(dataset=mnist_testset,
                                               batch_size=batch_size,
                                               shuffle=False,
                                               num_workers=2)

# Establish the CIFAR datasets and loaders
cifar_trainset = torchvision.datasets.CIFAR10(root='./cifar_data',
                                              transform=cifar_transform,
                                              train=True,
                                              download=True)
cifar_testset = torchvision.datasets.CIFAR10(root='./cifar_data',
                                             transform=cifar_transform,
                                             train=False,
                                             download=True)
cifar_trainloader = torch.utils.data.DataLoader(cifar_trainset,
                                                batch_size=batch_size,
                                                shuffle=True,
                                                num_workers=2)
cifar_testloader = torch.utils.data.DataLoader(cifar_testset,
                                               batch_size=batch_size,
                                               shuffle=False,
                                               num_workers=2)

# Convoluted neural networks
class NeuralNet1(nn.Module):
    def __init__(self, input_size, dimensions):
        super(NeuralNet1, self).__init__()
        self.conv = nn.Conv2d(input_size, 16, 5, padding='same')
        self.fulc = nn.Linear(16*dimensions**2, 10)
        self.relu = nn.ReLU()
    def forward(self, x):
        x = self.relu(self.conv(x))
        x = torch.flatten(x, 1)
        x = self.fulc(x)
        return x
class NeuralNet2(nn.Module):
    def __init__(self, input_size, dimensions):
        super(NeuralNet2, self).__init__()
        self.conv1 = nn.Conv2d(input_size, 6, 5, padding='same')
        self.conv2 = nn.Conv2d(6, 16, 5, padding='same')
        self.fulc1 = nn.Linear(16*dimensions**2, 84)
        self.fulc2 = nn.Linear(84, 10)
        self.relu = nn.ReLU()
    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.relu(self.conv2(x))
        x = torch.flatten(x, 1)
        x = self.relu(self.fulc1(x))
        x = self.fulc2(x)
        return x
class NeuralNet3(nn.Module):
    def __init__(self, input_size, dimensions):
        super(NeuralNet3, self).__init__()
        self.conv1 = nn.Conv2d(input_size, 6, 5, padding='same')
        self.norm1 = nn.BatchNorm2d(6)
        self.conv2 = nn.Conv2d(6, 16, 5, padding='same')
        self.norm2 = nn.BatchNorm2d(16)
        self.fulc1 = nn.Linear(16*(dimensions//4)**2, 120)
        self.fulc2 = nn.Linear(120, 84)
        self.fulc3 = nn.Linear(84, 10)
        self.pool = nn.MaxPool2d(2)
        self.relu = nn.ReLU()
    def forward(self, x):
        x = self.pool(self.relu(self.norm1(self.conv1(x))))
        x = self.pool(self.relu(self.norm2(self.conv2(x))))
        x = torch.flatten(x, 1)
        x = self.relu(self.fulc1(x))
        x = self.relu(self.fulc2(x))
        x = self.fulc3(x)
        return x

# Create network models
mnistnet1 = NeuralNet1(mnist_input_size,mnist_dimensions).to(device)
mnistnet2 = NeuralNet2(mnist_input_size,mnist_dimensions).to(device)
mnistnet3 = NeuralNet3(mnist_input_size,mnist_dimensions).to(device)
mnistnets = [mnistnet1, mnistnet2, mnistnet3]
cifarnet1 = NeuralNet1(cifar_input_size,cifar_dimensions).to(device)
cifarnet2 = NeuralNet2(cifar_input_size,cifar_dimensions).to(device)
cifarnet3 = NeuralNet3(cifar_input_size,cifar_dimensions).to(device)
cifarnets = [cifarnet1, cifarnet2, cifarnet3]

# Train and test each MNIST net first
print('Initiating MNIST Training for', len(mnistnets), 'networks')
for mnistnet in mnistnets:
    # Initiate timer, TensorBoard writer, loss criterion, and optimizer
    time_start = time.time()
    writer = SummaryWriter()
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(mnistnet.parameters(), lr=learning_rate)

    # Train model by looping over the dataset per epoch per data
    for epoch in range(num_epochs):
        running_loss = 0.0
        for i, data in enumerate(mnist_trainloader, 0):
            # Extract data tuple to device in inputs and labels, respectively
            inputs, labels = data[0].to(device), data[1].to(device)
            # Zero out parameter gradients
            optimizer.zero_grad()
            # Forward pass then backpropagate and optimize
            outputs = mnistnet(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            # Print progress for every databatch of 2500
            running_loss += loss.item()
            if i % 2500 == 2499:
                print(f'[{epoch + 1}/{num_epochs}, {i + 1:5d}/{len(mnist_trainloader)}] loss: {running_loss / 2500:.3f}')
                writer.add_scalar("Loss/train", running_loss, epoch * len(mnist_trainloader) + i)
                running_loss = 0.0
    # Calculate time it took to train
    time_total = (time.time() - time_start)
    print('Finished MNIST Training', str(mnistnets.index(mnistnet)+1) + '/' + str(len(mnistnets)), 'in', str(int(time_total//60)) + 'm' + str(int(time_total%60)) + 's')

    # Model testing (gradients are not computed for memory efficiency)
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in mnist_testloader:
            # Parse data to our device, whether NVIDIA CUDA or the CPU
            images = images.to(device)
            labels = labels.to(device)
            # Run the images through the network to get our outputs
            outputs = mnistnet(images)
            # Predict the class with the highest energy
            _, predicted = torch.max(outputs.data, 1)
            # Adjust prediction counters and print the given accuracy
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        print('Accuracy of the network on the', len(mnist_testloader), 'test images: {} %'.format(100 * correct / total))
    print('Finished MNIST Testing', str(mnistnets.index(mnistnet)+1) + '/' + str(len(mnistnets)) + '/n')

    # Flush and close TensorBoard writer then save the model checkpoint
    writer.flush()
    writer.close()
    torch.save(mnistnet.state_dict(), 'mnistnet' + str(mnistnets.index(mnistnet)+1) + '.ckpt')

# Repeat train and test with CIFAR nets (retains same comments for clarity)
print('\n\nInitiating CIFAR Training for', len(cifarnets), 'networks')
for cifarnet in cifarnets:
    # Initiate timer, TensorBoard writer, loss criterion, and optimizer
    time_start = time.time()
    writer = SummaryWriter()
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(cifarnet.parameters(), lr=learning_rate)

    # Train model by looping over the dataset per epoch per data
    for epoch in range(num_epochs):
        running_loss = 0.0
        for i, data in enumerate(cifar_trainloader, 0):
            # Extract data tuple to device in inputs and labels, respectively
            inputs, labels = data[0].to(device), data[1].to(device)
            # Zero out parameter gradients
            optimizer.zero_grad()
            # Forward pass then backpropagate and optimize
            outputs = cifarnet(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            # Print progress for every databatch of 2500
            running_loss += loss.item()
            if i % 2500 == 2499:
                print(f'[{epoch + 1}/{num_epochs}, {i + 1:5d}/{len(cifar_trainloader)}] loss: {running_loss / 2500:.3f}')
                writer.add_scalar("Loss/train", running_loss, epoch * len(cifar_trainloader) + i)
                running_loss = 0.0
    # Calculate time it took to train
    time_total = (time.time() - time_start)
    print('Finished CIFAR Training', str(cifarnets.index(cifarnet)+1) + '/' + str(len(cifarnets)), 'in', str(int(time_total//60)) + 'm' + str(int(time_total%60)) + 's')

    # Model testing (gradients are not computed for memory efficiency)
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in cifar_testloader:
            # Parse data to our device, whether NVIDIA CUDA or the CPU
            images = images.to(device)
            labels = labels.to(device)
            # Run the images through the network to get our outputs
            outputs = cifarnet(images)
            # Predict the class with the highest energy
            _, predicted = torch.max(outputs.data, 1)
            # Adjust prediction counters and print the given accuracy
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        print('Accuracy of the network on the', len(cifar_testloader), 'test images: {} %'.format(100 * correct / total))
    print('Finished CIFAR Testing', str(cifarnets.index(cifarnet)+1) + '/' + str(len(cifarnets)) + '/n')

    # Flush and close TensorBoard writer then save the model checkpoint
    writer.flush()
    writer.close()
    torch.save(cifarnet.state_dict(), 'cifarnet' + str(cifarnets.index(cifarnet)+1) + '.ckpt')

Of the models that had the least amount of error for validation, the MNIST models were far more accurate than the CIFAR models were. The MNIST models were highly precise, with a range of 0.67. They also had near-perfect accuracy, that only improved with each succesive network. Conversely, the CIFAR models were less optimal, with a wider accuracy range of 9.96. Additionaly, their accuracy was 63.21% at best. It should be noted that their accuracy did increase with each successive model. This is especially the case with the last neural network, as it used batch normalization. It should be noted that the CIFAR models had computed faster than the MNIST models.

MNIST TIME AND ACCURACY
*   Net 1 - 18m45s (98.52%)
*   Net 2 - 22m09s (99.01%)
*   Net 3 - 25m53s (99.19%)

CIFAR TIME AND ACCURACY
*   Net 1 - 15m49s (53.25%)
*   Net 2 - 18m27s (55.37%)
*   Net 3 - 21m58s (63.21%)