<a href="https://colab.research.google.com/github/TheGreymanShow/pytorch-batch-norm-experiments/blob/master/batch_norm_experiments.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

1. Import Libraries

In [0]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable

import torchvision
import torchvision.transforms as transforms

2. Define CNN Architechture

In [0]:
class CNN(nn.Module):
    """
    CNN Class that defines the CNN Model
    """

    def __init__(self):
        """CNN Builder."""
        super(CNN, self).__init__()

        self.conv_layer = nn.Sequential(

            # Conv Layer block 1
            nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1),
            #nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            # Conv Layer block 2
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
            #nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Dropout2d(p=0.05),

            # Conv Layer block 3
            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1),
            #nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )

        self.fc_layer = nn.Sequential(
            nn.Dropout(p=0.1),
            nn.Linear(4096, 1024),
            nn.ReLU(inplace=True),
            nn.Linear(1024, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.1),
            nn.Linear(512, 10)
        )

    def forward(self, x):
        """Perform forward."""

        # conv layers
        x = self.conv_layer(x)

        # flatten
        x = x.view(x.size(0), -1)

        # fc layer
        x = self.fc_layer(x)

        return x

3. Load data into train and test

In [0]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=32,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=32,
                                         shuffle=False, num_workers=2)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


4. Load the model, loss function and optimizer

In [0]:
# Define CNN model
net = CNN()
net = net.cuda()
net = torch.nn.DataParallel(net, device_ids=range(torch.cuda.device_count()))
torch.backends.cudnn.benchmark = True

# Define the Loss Function
criterion = nn.CrossEntropyLoss()

# Define the Optimizer
optimizer = optim.SGD(net.parameters(), lr=0.1, momentum=0.5)

 5. Train the model

In [0]:
train_loss = []
test_loss = []

train_acc = []
test_acc = []

print("Starting Training...\n")
for epoch in range(20):  # loop over the dataset multiple times

    running_loss = 0.0
    train_correct = 0
    train_total =0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        inputs = inputs.cuda()
        labels = labels.cuda()

        # wrap them in Variable
        inputs, labels = Variable(inputs), Variable(labels)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        net.train()

        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # accumulate the training loss
        running_loss += loss.item()

        # accumulate the training acc score
        _, predicted = torch.max(outputs.data, 1)
        train_total += labels.size(0)
        train_correct += (predicted == labels).sum().item()
    
    train_acc.append(100 * train_correct/train_total)
    train_loss.append(running_loss / len(trainloader))

    # Calculate the test accuracy and loss
    correct = 0
    total = 0
    running_loss_test = 0.0
    with torch.no_grad():
        for data in testloader:
            images, labels = data

            images = images.cuda()
            labels = labels.cuda()

            inputs, labels = Variable(inputs), Variable(labels)

            net.eval()
            outputs = net(images)

            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
          
            loss = criterion(outputs, labels)
            running_loss_test += loss.item()

    test_acc.append(100 * correct/total)
    test_loss.append(running_loss_test / len(testloader))

    print(f"Epoch : {epoch}")
    print(f"Train accuracy: {100 * train_correct/train_total} ---- Test accuracy : {100 * correct/total}")
    print(f"Train loss : {running_loss / len(trainloader)} ------ Test Loss: {running_loss_test / len(testloader)}\n")

print('Finished Training')

Starting Training...

Epoch : 0
Train accuracy: 25.726 ---- Test accuracy : 44.97
Train loss : 1.9628937603644812 ------ Test Loss: 1.4990527043327355

Epoch : 1
Train accuracy: 54.072 ---- Test accuracy : 64.13
Train loss : 1.2692378385282066 ------ Test Loss: 1.0228394506076655

Epoch : 2
Train accuracy: 67.888 ---- Test accuracy : 71.76
Train loss : 0.9162798480238582 ------ Test Loss: 0.8215796382853779

Epoch : 3
Train accuracy: 74.952 ---- Test accuracy : 74.41
Train loss : 0.723680854339441 ------ Test Loss: 0.7446356900393392

Epoch : 4
Train accuracy: 79.37 ---- Test accuracy : 77.79
Train loss : 0.5954080391258135 ------ Test Loss: 0.6610594987393187

Epoch : 5
Train accuracy: 82.242 ---- Test accuracy : 76.56
Train loss : 0.508671383325175 ------ Test Loss: 0.7076823455266679

Epoch : 6
Train accuracy: 84.918 ---- Test accuracy : 77.49
Train loss : 0.43595426319428005 ------ Test Loss: 0.7103372829409834

Epoch : 7
Train accuracy: 86.358 ---- Test accuracy : 75.66
Train loss

6. Save the trained model

In [0]:
print('==> Saving model ...')
PATH = './cifar_net.pth'
torch.save(net.state_dict(), PATH)

==> Saving model ...


7. Load the saved model

In [0]:
net2 = CNN()
net2 = net2.cuda()
net2 = torch.nn.DataParallel(net2, device_ids=range(torch.cuda.device_count()))
torch.backends.cudnn.benchmark = True

net2.load_state_dict(torch.load(PATH))

<All keys matched successfully>

8. Evaluate Model accuracy

In [0]:
y_pred = []
y_true = []

correct = 0
total = 0
with torch.no_grad():
    for data in testloader:

        images, labels = data

        images = images.cuda()
        labels = labels.cuda()

        inputs, labels = Variable(inputs), Variable(labels)

        net2.eval()

        outputs = net2(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        y_pred.extend(predicted)
        y_true.extend(labels)

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))

Accuracy of the network on the 10000 test images: 83 %


9. Evaluate other performance metrics

In [0]:
from sklearn.metrics import *
y_pred[:10]

[tensor(3, device='cuda:0'),
 tensor(8, device='cuda:0'),
 tensor(8, device='cuda:0'),
 tensor(8, device='cuda:0'),
 tensor(6, device='cuda:0'),
 tensor(6, device='cuda:0'),
 tensor(1, device='cuda:0'),
 tensor(6, device='cuda:0'),
 tensor(3, device='cuda:0'),
 tensor(1, device='cuda:0')]

In [0]:
y_true[:10]

[tensor(3, device='cuda:0'),
 tensor(8, device='cuda:0'),
 tensor(8, device='cuda:0'),
 tensor(0, device='cuda:0'),
 tensor(6, device='cuda:0'),
 tensor(6, device='cuda:0'),
 tensor(1, device='cuda:0'),
 tensor(6, device='cuda:0'),
 tensor(3, device='cuda:0'),
 tensor(1, device='cuda:0')]