## Srushti Nayak

### Assignment 4: CNN

#### Importing required libraries and loading CIFAR10 data

In [3]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False, num_workers=2)


Files already downloaded and verified
Files already downloaded and verified


#### CNN with SGD + Momentum

In [6]:
class CNN_SGD(nn.Module):
    def __init__(self):
        super(CNN_SGD, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(128 * 4 * 4, 512)
        self.fc2 = nn.Linear(512, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = x.view(-1, 128 * 4 * 4)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x
    
    def train(self, trainloader):
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.SGD(self.parameters(), lr=0.001, momentum=0.9)

        for epoch in range(10):  

            running_loss = 0.0
            for i, data in enumerate(trainloader, 0):
                inputs, labels = data

                optimizer.zero_grad()

                outputs = self(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

                running_loss += loss.item()
                if i % 200 == 199:  
                    print('[%d, %5d] loss: %.3f' %
                          (epoch + 1, i + 1, running_loss / 200))
                    running_loss = 0.0

        print('Finished Training')

    def test(self, testloader):
        correct = 0
        total = 0
        with torch.no_grad():
            for data in testloader:
                images, labels = data
                outputs = self(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        print('Accuracy of the network on the test images: %d %%' % (
            100 * correct / total))


In [8]:
CNN_SGD_obj = CNN_SGD()
CNN_SGD_obj.train(trainloader)
CNN_SGD_obj.test(testloader)

[1,   200] loss: 2.302
[1,   400] loss: 2.297
[1,   600] loss: 2.288
[2,   200] loss: 2.159
[2,   400] loss: 2.063
[2,   600] loss: 2.000
[3,   200] loss: 1.923
[3,   400] loss: 1.882
[3,   600] loss: 1.842
[4,   200] loss: 1.770
[4,   400] loss: 1.725
[4,   600] loss: 1.704
[5,   200] loss: 1.635
[5,   400] loss: 1.608
[5,   600] loss: 1.586
[6,   200] loss: 1.547
[6,   400] loss: 1.539
[6,   600] loss: 1.508
[7,   200] loss: 1.486
[7,   400] loss: 1.478
[7,   600] loss: 1.455
[8,   200] loss: 1.412
[8,   400] loss: 1.427
[8,   600] loss: 1.409
[9,   200] loss: 1.377
[9,   400] loss: 1.357
[9,   600] loss: 1.366
[10,   200] loss: 1.316
[10,   400] loss: 1.321
[10,   600] loss: 1.294
Finished Training
Accuracy of the network on the test images: 56 %


#### CNN with SGD without momentum

In [15]:
class CNN_SGD_NO_MOM(nn.Module):
    def __init__(self):
        super(CNN_SGD_NO_MOM, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(128 * 4 * 4, 512)
        self.fc2 = nn.Linear(512, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = x.view(-1, 128 * 4 * 4)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x
    
    def train(self, trainloader):
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.SGD(self.parameters(), lr=0.001)

        for epoch in range(10):  

            running_loss = 0.0
            for i, data in enumerate(trainloader, 0):
                inputs, labels = data

                optimizer.zero_grad()

                outputs = self(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

                running_loss += loss.item()
                if i % 200 == 199:  
                    print('[%d, %5d] loss: %.3f' %
                          (epoch + 1, i + 1, running_loss / 200))
                    running_loss = 0.0

        print('Finished Training')

    def test(self, testloader):
        correct = 0
        total = 0
        with torch.no_grad():
            for data in testloader:
                images, labels = data
                outputs = self(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        print('Accuracy of the network on the test images: %d %%' % (
            100 * correct / total))


In [16]:
CNN_SGD__NO_MOM_obj = CNN_SGD_NO_MOM()
CNN_SGD__NO_MOM_obj.train(trainloader)
CNN_SGD__NO_MOM_obj.test(testloader)

[1,   200] loss: 2.302
[1,   400] loss: 2.301
[1,   600] loss: 2.301
[2,   200] loss: 2.300
[2,   400] loss: 2.299
[2,   600] loss: 2.298
[3,   200] loss: 2.297
[3,   400] loss: 2.296
[3,   600] loss: 2.295
[4,   200] loss: 2.294
[4,   400] loss: 2.292
[4,   600] loss: 2.292
[5,   200] loss: 2.289
[5,   400] loss: 2.287
[5,   600] loss: 2.286
[6,   200] loss: 2.281
[6,   400] loss: 2.280
[6,   600] loss: 2.277
[7,   200] loss: 2.270
[7,   400] loss: 2.266
[7,   600] loss: 2.260
[8,   200] loss: 2.248
[8,   400] loss: 2.238
[8,   600] loss: 2.229
[9,   200] loss: 2.204
[9,   400] loss: 2.187
[9,   600] loss: 2.173
[10,   200] loss: 2.144
[10,   400] loss: 2.127
[10,   600] loss: 2.115
Finished Training
Accuracy of the network on the test images: 26 %


#### CNN with ADAgrad

In [17]:
class CNN_ADA(nn.Module):
    def __init__(self):
        super(CNN_ADA, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(128 * 4 * 4, 512)
        self.fc2 = nn.Linear(512, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = x.view(-1, 128 * 4 * 4)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x
    
    def train(self, trainloader):
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adagrad(self.parameters(), lr=0.001)

        for epoch in range(10):  

            running_loss = 0.0
            for i, data in enumerate(trainloader, 0):
                inputs, labels = data

                optimizer.zero_grad()

                outputs = self(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

                running_loss += loss.item()
                if i % 200 == 199:  
                    print('[%d, %5d] loss: %.3f' %
                          (epoch + 1, i + 1, running_loss / 200))
                    running_loss = 0.0

        print('Finished Training')

    def test(self, testloader):
        correct = 0
        total = 0
        with torch.no_grad():
            for data in testloader:
                images, labels = data
                outputs = self(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        print('Accuracy of the network on the test images: %d %%' % (
            100 * correct / total))


In [18]:
CNN_ADA_obj = CNN_ADA()
CNN_ADA_obj.train(trainloader)
CNN_ADA_obj.test(testloader)

[1,   200] loss: 1.968
[1,   400] loss: 1.777
[1,   600] loss: 1.710
[2,   200] loss: 1.638
[2,   400] loss: 1.617
[2,   600] loss: 1.581
[3,   200] loss: 1.567
[3,   400] loss: 1.534
[3,   600] loss: 1.540
[4,   200] loss: 1.524
[4,   400] loss: 1.502
[4,   600] loss: 1.505
[5,   200] loss: 1.498
[5,   400] loss: 1.493
[5,   600] loss: 1.481
[6,   200] loss: 1.487
[6,   400] loss: 1.483
[6,   600] loss: 1.460
[7,   200] loss: 1.456
[7,   400] loss: 1.459
[7,   600] loss: 1.450
[8,   200] loss: 1.448
[8,   400] loss: 1.452
[8,   600] loss: 1.439
[9,   200] loss: 1.430
[9,   400] loss: 1.439
[9,   600] loss: 1.430
[10,   200] loss: 1.428
[10,   400] loss: 1.412
[10,   600] loss: 1.423
Finished Training
Accuracy of the network on the test images: 51 %


#### CNN with RMSProp

In [19]:
class CNN_RMS(nn.Module):
    def __init__(self):
        super(CNN_RMS, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(128 * 4 * 4, 512)
        self.fc2 = nn.Linear(512, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = x.view(-1, 128 * 4 * 4)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x
    
    def train(self, trainloader):
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.RMSprop(self.parameters(), lr=0.001)

        for epoch in range(10):  

            running_loss = 0.0
            for i, data in enumerate(trainloader, 0):
                inputs, labels = data

                optimizer.zero_grad()

                outputs = self(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

                running_loss += loss.item()
                if i % 200 == 199:  
                    print('[%d, %5d] loss: %.3f' %
                          (epoch + 1, i + 1, running_loss / 200))
                    running_loss = 0.0

        print('Finished Training')

    def test(self, testloader):
        correct = 0
        total = 0
        with torch.no_grad():
            for data in testloader:
                images, labels = data
                outputs = self(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        print('Accuracy of the network on the test images: %d %%' % (
            100 * correct / total))


In [20]:
CNN_RMSProp_obj = CNN_RMS()
CNN_RMSProp_obj.train(trainloader)
CNN_RMSProp_obj.test(testloader)

[1,   200] loss: 2.020
[1,   400] loss: 1.659
[1,   600] loss: 1.532
[2,   200] loss: 1.336
[2,   400] loss: 1.274
[2,   600] loss: 1.206
[3,   200] loss: 1.078
[3,   400] loss: 1.044
[3,   600] loss: 1.013
[4,   200] loss: 0.926
[4,   400] loss: 0.919
[4,   600] loss: 0.890
[5,   200] loss: 0.840
[5,   400] loss: 0.824
[5,   600] loss: 0.820
[6,   200] loss: 0.757
[6,   400] loss: 0.769
[6,   600] loss: 0.756
[7,   200] loss: 0.729
[7,   400] loss: 0.713
[7,   600] loss: 0.701
[8,   200] loss: 0.685
[8,   400] loss: 0.665
[8,   600] loss: 0.667
[9,   200] loss: 0.643
[9,   400] loss: 0.649
[9,   600] loss: 0.651
[10,   200] loss: 0.619
[10,   400] loss: 0.615
[10,   600] loss: 0.618
Finished Training
Accuracy of the network on the test images: 65 %


#### CNN with Adam

In [21]:
class CNN_ADAM(nn.Module):
    def __init__(self):
        super(CNN_ADAM, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(128 * 4 * 4, 512)
        self.fc2 = nn.Linear(512, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = x.view(-1, 128 * 4 * 4)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x
    
    def train(self, trainloader):
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(self.parameters(), lr=0.001)

        for epoch in range(10):  

            running_loss = 0.0
            for i, data in enumerate(trainloader, 0):
                inputs, labels = data

                optimizer.zero_grad()

                outputs = self(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

                running_loss += loss.item()
                if i % 200 == 199:  
                    print('[%d, %5d] loss: %.3f' %
                          (epoch + 1, i + 1, running_loss / 200))
                    running_loss = 0.0

        print('Finished Training')

    def test(self, testloader):
        correct = 0
        total = 0
        with torch.no_grad():
            for data in testloader:
                images, labels = data
                outputs = self(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        print('Accuracy of the network on the test images: %d %%' % (
            100 * correct / total))


In [22]:
CNN_Adem_obj = CNN_ADAM()
CNN_Adem_obj.train(trainloader)
CNN_Adem_obj.test(testloader)

[1,   200] loss: 1.856
[1,   400] loss: 1.525
[1,   600] loss: 1.388
[2,   200] loss: 1.231
[2,   400] loss: 1.149
[2,   600] loss: 1.094
[3,   200] loss: 0.991
[3,   400] loss: 0.966
[3,   600] loss: 0.934
[4,   200] loss: 0.874
[4,   400] loss: 0.864
[4,   600] loss: 0.854
[5,   200] loss: 0.787
[5,   400] loss: 0.771
[5,   600] loss: 0.786
[6,   200] loss: 0.739
[6,   400] loss: 0.728
[6,   600] loss: 0.723
[7,   200] loss: 0.685
[7,   400] loss: 0.689
[7,   600] loss: 0.697
[8,   200] loss: 0.644
[8,   400] loss: 0.646
[8,   600] loss: 0.643
[9,   200] loss: 0.638
[9,   400] loss: 0.626
[9,   600] loss: 0.614
[10,   200] loss: 0.588
[10,   400] loss: 0.588
[10,   600] loss: 0.597
Finished Training
Accuracy of the network on the test images: 78 %


#### RMSProp with l2 regularization

In [1]:
class CNN_RMS_Reg(nn.Module):
    def __init__(self):
        super(CNN_RMS_Reg, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(128 * 4 * 4, 512)
        self.fc2 = nn.Linear(512, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = x.view(-1, 128 * 4 * 4)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x
    
    def train(self, trainloader):
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.RMSprop(self.parameters(), lr=0.001, weight_decay=0.001)  

        for epoch in range(10):  
            running_loss = 0.0
            for i, data in enumerate(trainloader, 0):
                inputs, labels = data

                optimizer.zero_grad()

                outputs = self(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

                running_loss += loss.item()
                if i % 200 == 199:  
                    print('[%d, %5d] loss: %.3f' %
                          (epoch + 1, i + 1, running_loss / 200))
                    running_loss = 0.0

        print('Finished Training')

    def test(self, testloader):
        correct = 0
        total = 0
        with torch.no_grad():
            for data in testloader:
                images, labels = data
                outputs = self(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        print('Accuracy of the network on the test images: %d %%' % (
            100 * correct / total))


In [4]:
CNN_RMS_Reg_obj = CNN_RMS_Reg()
CNN_RMS_Reg_obj.train(trainloader)
CNN_RMS_Reg_obj.test(testloader)

[1,   200] loss: 2.004
[1,   400] loss: 1.676
[1,   600] loss: 1.554
[2,   200] loss: 1.394
[2,   400] loss: 1.341
[2,   600] loss: 1.292
[3,   200] loss: 1.214
[3,   400] loss: 1.173
[3,   600] loss: 1.142
[4,   200] loss: 1.075
[4,   400] loss: 1.072
[4,   600] loss: 1.051
[5,   200] loss: 1.006
[5,   400] loss: 0.986
[5,   600] loss: 0.977
[6,   200] loss: 0.939
[6,   400] loss: 0.941
[6,   600] loss: 0.908
[7,   200] loss: 0.876
[7,   400] loss: 0.883
[7,   600] loss: 0.882
[8,   200] loss: 0.846
[8,   400] loss: 0.844
[8,   600] loss: 0.849
[9,   200] loss: 0.815
[9,   400] loss: 0.810
[9,   600] loss: 0.820
[10,   200] loss: 0.802
[10,   400] loss: 0.786
[10,   600] loss: 0.777
Finished Training
Accuracy of the network on the test images: 65 %


In [5]:
class CNN_ADAM_Reg(nn.Module):
    def __init__(self):
        super(CNN_ADAM_Reg, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(128 * 4 * 4, 512)
        self.fc2 = nn.Linear(512, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = x.view(-1, 128 * 4 * 4)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x
    
    def train(self, trainloader):
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(self.parameters(), lr=0.001, weight_decay=0.001)  

        for epoch in range(10):  
            running_loss = 0.0
            for i, data in enumerate(trainloader, 0):
                inputs, labels = data

                optimizer.zero_grad()

                outputs = self(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

                running_loss += loss.item()
                if i % 200 == 199:  
                    print('[%d, %5d] loss: %.3f' %
                          (epoch + 1, i + 1, running_loss / 200))
                    running_loss = 0.0

        print('Finished Training')

    def test(self, testloader):
        correct = 0
        total = 0
        with torch.no_grad():
            for data in testloader:
                images, labels = data
                outputs = self(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        print('Accuracy of the network on the test images: %d %%' % (
            100 * correct / total))


In [6]:
CNN_ADAM_Reg_obj = CNN_ADAM_Reg()
CNN_ADAM_Reg_obj.train(trainloader)
CNN_ADAM_Reg_obj.test(testloader)

[1,   200] loss: 1.877
[1,   400] loss: 1.561
[1,   600] loss: 1.435
[2,   200] loss: 1.291
[2,   400] loss: 1.204
[2,   600] loss: 1.147
[3,   200] loss: 1.063
[3,   400] loss: 1.027
[3,   600] loss: 0.991
[4,   200] loss: 0.937
[4,   400] loss: 0.921
[4,   600] loss: 0.919
[5,   200] loss: 0.876
[5,   400] loss: 0.851
[5,   600] loss: 0.850
[6,   200] loss: 0.820
[6,   400] loss: 0.810
[6,   600] loss: 0.813
[7,   200] loss: 0.792
[7,   400] loss: 0.797
[7,   600] loss: 0.771
[8,   200] loss: 0.773
[8,   400] loss: 0.736
[8,   600] loss: 0.761
[9,   200] loss: 0.725
[9,   400] loss: 0.734
[9,   600] loss: 0.732
[10,   200] loss: 0.711
[10,   400] loss: 0.710
[10,   600] loss: 0.729
Finished Training
Accuracy of the network on the test images: 76 %


#### Batch normalization with RMSProp

In [8]:
class CNN_RMS_BNORM(nn.Module):
    def __init__(self):
        super(CNN_RMS_BNORM, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
        self.bn1 = nn.BatchNorm2d(32)  
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.bn2 = nn.BatchNorm2d(64)  
        self.conv3 = nn.Conv2d(64, 128, 3, padding=1)
        self.bn3 = nn.BatchNorm2d(128) 
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(128 * 4 * 4, 512)
        self.fc2 = nn.Linear(512, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.bn1(self.conv1(x))))  
        x = self.pool(F.relu(self.bn2(self.conv2(x))))  
        x = self.pool(F.relu(self.bn3(self.conv3(x))))  
        x = x.view(-1, 128 * 4 * 4)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x
    
    def train(self, trainloader):
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.RMSprop(self.parameters(), lr=0.001)

        for epoch in range(10):  
            running_loss = 0.0
            for i, data in enumerate(trainloader, 0):
                inputs, labels = data

                optimizer.zero_grad()

                outputs = self(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

                running_loss += loss.item()
                if i % 200 == 199:  
                    print('[%d, %5d] loss: %.3f' %
                          (epoch + 1, i + 1, running_loss / 200))
                    running_loss = 0.0

        print('Finished Training')

    def test(self, testloader):
        correct = 0
        total = 0
        with torch.no_grad():
            for data in testloader:
                images, labels = data
                outputs = self(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        print('Accuracy of the network on the test images: %d %%' % (
            100 * correct / total))


In [9]:
CNN_RMS_BNORM_obj = CNN_RMS_BNORM()
CNN_RMS_BNORM_obj.train(trainloader)
CNN_RMS_BNORM_obj.test(testloader)

[1,   200] loss: 2.196
[1,   400] loss: 1.549
[1,   600] loss: 1.392
[2,   200] loss: 1.189
[2,   400] loss: 1.139
[2,   600] loss: 1.071
[3,   200] loss: 0.999
[3,   400] loss: 0.971
[3,   600] loss: 0.949
[4,   200] loss: 0.896
[4,   400] loss: 0.891
[4,   600] loss: 0.858
[5,   200] loss: 0.818
[5,   400] loss: 0.815
[5,   600] loss: 0.806
[6,   200] loss: 0.770
[6,   400] loss: 0.773
[6,   600] loss: 0.771
[7,   200] loss: 0.734
[7,   400] loss: 0.729
[7,   600] loss: 0.732
[8,   200] loss: 0.683
[8,   400] loss: 0.688
[8,   600] loss: 0.703
[9,   200] loss: 0.650
[9,   400] loss: 0.675
[9,   600] loss: 0.663
[10,   200] loss: 0.636
[10,   400] loss: 0.638
[10,   600] loss: 0.638
Finished Training
Accuracy of the network on the test images: 75 %


#### Batch normalization with Adam

In [10]:
class CNN_ADAM_BNORM(nn.Module):
    def __init__(self):
        super(CNN_ADAM_BNORM, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
        self.bn1 = nn.BatchNorm2d(32)  
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.bn2 = nn.BatchNorm2d(64)  
        self.conv3 = nn.Conv2d(64, 128, 3, padding=1)
        self.bn3 = nn.BatchNorm2d(128)  
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(128 * 4 * 4, 512)
        self.fc2 = nn.Linear(512, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.bn1(self.conv1(x))))  
        x = self.pool(F.relu(self.bn2(self.conv2(x))))  
        x = self.pool(F.relu(self.bn3(self.conv3(x)))) 
        x = x.view(-1, 128 * 4 * 4)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x
    
    def train(self, trainloader):
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(self.parameters(), lr=0.001)

        for epoch in range(10):  
            running_loss = 0.0
            for i, data in enumerate(trainloader, 0):
                inputs, labels = data

                optimizer.zero_grad()

                outputs = self(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

                running_loss += loss.item()
                if i % 200 == 199:  
                    print('[%d, %5d] loss: %.3f' %
                          (epoch + 1, i + 1, running_loss / 200))
                    running_loss = 0.0

        print('Finished Training')

    def test(self, testloader):
        correct = 0
        total = 0
        with torch.no_grad():
            for data in testloader:
                images, labels = data
                outputs = self(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        print('Accuracy of the network on the test images: %d %%' % (
            100 * correct / total))


In [11]:
CNN_ADAM_BNORM_obj = CNN_ADAM_BNORM()
CNN_ADAM_BNORM_obj.train(trainloader)
CNN_ADAM_BNORM_obj.test(testloader)

[1,   200] loss: 1.753
[1,   400] loss: 1.421
[1,   600] loss: 1.289
[2,   200] loss: 1.096
[2,   400] loss: 1.034
[2,   600] loss: 1.011
[3,   200] loss: 0.918
[3,   400] loss: 0.897
[3,   600] loss: 0.890
[4,   200] loss: 0.826
[4,   400] loss: 0.820
[4,   600] loss: 0.811
[5,   200] loss: 0.755
[5,   400] loss: 0.761
[5,   600] loss: 0.738
[6,   200] loss: 0.689
[6,   400] loss: 0.711
[6,   600] loss: 0.698
[7,   200] loss: 0.670
[7,   400] loss: 0.662
[7,   600] loss: 0.667
[8,   200] loss: 0.652
[8,   400] loss: 0.648
[8,   600] loss: 0.624
[9,   200] loss: 0.604
[9,   400] loss: 0.607
[9,   600] loss: 0.630
[10,   200] loss: 0.598
[10,   400] loss: 0.592
[10,   600] loss: 0.580
Finished Training
Accuracy of the network on the test images: 78 %


#### Hyperparameter tuning in adem optimizer and RMSProp

#### RMSProp with batch normalization, increasing epoches and learning rate. epoches =  15, learning rate = 0.005

In [13]:
class CNN_RMS_BNORM2(nn.Module):
    def __init__(self):
        super(CNN_RMS_BNORM2, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
        self.bn1 = nn.BatchNorm2d(32)  
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.bn2 = nn.BatchNorm2d(64)  
        self.conv3 = nn.Conv2d(64, 128, 3, padding=1)
        self.bn3 = nn.BatchNorm2d(128) 
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(128 * 4 * 4, 512)
        self.fc2 = nn.Linear(512, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.bn1(self.conv1(x))))  
        x = self.pool(F.relu(self.bn2(self.conv2(x))))  
        x = self.pool(F.relu(self.bn3(self.conv3(x))))  
        x = x.view(-1, 128 * 4 * 4)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x
    
    def train(self, trainloader):
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.RMSprop(self.parameters(), lr=0.005)

        for epoch in range(15):  
            running_loss = 0.0
            for i, data in enumerate(trainloader, 0):
                inputs, labels = data

                optimizer.zero_grad()

                outputs = self(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

                running_loss += loss.item()
                if i % 200 == 199:  
                    print('[%d, %5d] loss: %.3f' %
                          (epoch + 1, i + 1, running_loss / 200))
                    running_loss = 0.0

        print('Finished Training')

    def test(self, testloader):
        correct = 0
        total = 0
        with torch.no_grad():
            for data in testloader:
                images, labels = data
                outputs = self(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        print('Accuracy of the network on the test images: %d %%' % (
            100 * correct / total))


In [14]:
CNN_RMS_BNORM_obj2 = CNN_RMS_BNORM2()
CNN_RMS_BNORM_obj2.train(trainloader)
CNN_RMS_BNORM_obj2.test(testloader)

[1,   200] loss: 7.638
[1,   400] loss: 2.053
[1,   600] loss: 1.891
[2,   200] loss: 1.599
[2,   400] loss: 1.482
[2,   600] loss: 1.389
[3,   200] loss: 1.244
[3,   400] loss: 1.194
[3,   600] loss: 1.134
[4,   200] loss: 1.072
[4,   400] loss: 1.024
[4,   600] loss: 0.987
[5,   200] loss: 0.939
[5,   400] loss: 0.919
[5,   600] loss: 0.887
[6,   200] loss: 0.845
[6,   400] loss: 0.834
[6,   600] loss: 0.819
[7,   200] loss: 0.805
[7,   400] loss: 0.790
[7,   600] loss: 0.769
[8,   200] loss: 0.753
[8,   400] loss: 0.750
[8,   600] loss: 0.735
[9,   200] loss: 0.704
[9,   400] loss: 0.713
[9,   600] loss: 0.706
[10,   200] loss: 0.668
[10,   400] loss: 0.686
[10,   600] loss: 0.697
[11,   200] loss: 0.661
[11,   400] loss: 0.663
[11,   600] loss: 0.668
[12,   200] loss: 0.636
[12,   400] loss: 0.632
[12,   600] loss: 0.634
[13,   200] loss: 0.616
[13,   400] loss: 0.613
[13,   600] loss: 0.624
[14,   200] loss: 0.617
[14,   400] loss: 0.594
[14,   600] loss: 0.609
[15,   200] loss: 0

#### Adem, increasing epoches and learning rate. epoches =  15, learning rate = 0.005

In [15]:
class CNN_ADAM2(nn.Module):
    def __init__(self):
        super(CNN_ADAM2, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(128 * 4 * 4, 512)
        self.fc2 = nn.Linear(512, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = x.view(-1, 128 * 4 * 4)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x
    
    def train(self, trainloader):
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(self.parameters(), lr=0.005)

        for epoch in range(15):  

            running_loss = 0.0
            for i, data in enumerate(trainloader, 0):
                inputs, labels = data

                optimizer.zero_grad()

                outputs = self(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

                running_loss += loss.item()
                if i % 200 == 199: 
                    print('[%d, %5d] loss: %.3f' %
                          (epoch + 1, i + 1, running_loss / 200))
                    running_loss = 0.0

        print('Finished Training')

    def test(self, testloader):
        correct = 0
        total = 0
        with torch.no_grad():
            for data in testloader:
                images, labels = data
                outputs = self(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        print('Accuracy of the network on the test images: %d %%' % (
            100 * correct / total))


In [16]:
CNN_Adem2_obj = CNN_ADAM2()
CNN_Adem2_obj.train(trainloader)
CNN_Adem2_obj.test(testloader)

[1,   200] loss: 1.963
[1,   400] loss: 1.689
[1,   600] loss: 1.577
[2,   200] loss: 1.506
[2,   400] loss: 1.441
[2,   600] loss: 1.422
[3,   200] loss: 1.358
[3,   400] loss: 1.360
[3,   600] loss: 1.345
[4,   200] loss: 1.337
[4,   400] loss: 1.307
[4,   600] loss: 1.295
[5,   200] loss: 1.260
[5,   400] loss: 1.269
[5,   600] loss: 1.249
[6,   200] loss: 1.241
[6,   400] loss: 1.242
[6,   600] loss: 1.218
[7,   200] loss: 1.208
[7,   400] loss: 1.194
[7,   600] loss: 1.220
[8,   200] loss: 1.204
[8,   400] loss: 1.189
[8,   600] loss: 1.181
[9,   200] loss: 1.204
[9,   400] loss: 1.176
[9,   600] loss: 1.175
[10,   200] loss: 1.172
[10,   400] loss: 1.190
[10,   600] loss: 1.163
[11,   200] loss: 1.146
[11,   400] loss: 1.196
[11,   600] loss: 1.141
[12,   200] loss: 1.155
[12,   400] loss: 1.159
[12,   600] loss: 1.152
[13,   200] loss: 1.142
[13,   400] loss: 1.163
[13,   600] loss: 1.172
[14,   200] loss: 1.130
[14,   400] loss: 1.137
[14,   600] loss: 1.153
[15,   200] loss: 1

#### RMSProp with only increased epoches

In [17]:
class CNN_RMS_BNORM3(nn.Module):
    def __init__(self):
        super(CNN_RMS_BNORM3, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
        self.bn1 = nn.BatchNorm2d(32)  
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.bn2 = nn.BatchNorm2d(64)  
        self.conv3 = nn.Conv2d(64, 128, 3, padding=1)
        self.bn3 = nn.BatchNorm2d(128) 
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(128 * 4 * 4, 512)
        self.fc2 = nn.Linear(512, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.bn1(self.conv1(x))))  
        x = self.pool(F.relu(self.bn2(self.conv2(x))))  
        x = self.pool(F.relu(self.bn3(self.conv3(x))))  
        x = x.view(-1, 128 * 4 * 4)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x
    
    def train(self, trainloader):
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.RMSprop(self.parameters(), lr=0.001)

        for epoch in range(15):  
            running_loss = 0.0
            for i, data in enumerate(trainloader, 0):
                inputs, labels = data

                optimizer.zero_grad()

                outputs = self(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

                running_loss += loss.item()
                if i % 200 == 199:  
                    print('[%d, %5d] loss: %.3f' %
                          (epoch + 1, i + 1, running_loss / 200))
                    running_loss = 0.0

        print('Finished Training')

    def test(self, testloader):
        correct = 0
        total = 0
        with torch.no_grad():
            for data in testloader:
                images, labels = data
                outputs = self(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        print('Accuracy of the network on the test images: %d %%' % (
            100 * correct / total))


In [18]:
CNN_RMS_BNORM3_obj = CNN_RMS_BNORM3()
CNN_RMS_BNORM3_obj.train(trainloader)
CNN_RMS_BNORM3_obj.test(testloader)

[1,   200] loss: 2.333
[1,   400] loss: 1.530
[1,   600] loss: 1.402
[2,   200] loss: 1.211
[2,   400] loss: 1.133
[2,   600] loss: 1.085
[3,   200] loss: 0.999
[3,   400] loss: 0.965
[3,   600] loss: 0.952
[4,   200] loss: 0.896
[4,   400] loss: 0.884
[4,   600] loss: 0.855
[5,   200] loss: 0.826
[5,   400] loss: 0.811
[5,   600] loss: 0.797
[6,   200] loss: 0.774
[6,   400] loss: 0.762
[6,   600] loss: 0.754
[7,   200] loss: 0.719
[7,   400] loss: 0.720
[7,   600] loss: 0.711
[8,   200] loss: 0.680
[8,   400] loss: 0.687
[8,   600] loss: 0.690
[9,   200] loss: 0.670
[9,   400] loss: 0.653
[9,   600] loss: 0.659
[10,   200] loss: 0.627
[10,   400] loss: 0.627
[10,   600] loss: 0.643
[11,   200] loss: 0.607
[11,   400] loss: 0.608
[11,   600] loss: 0.618
[12,   200] loss: 0.588
[12,   400] loss: 0.577
[12,   600] loss: 0.597
[13,   200] loss: 0.567
[13,   400] loss: 0.581
[13,   600] loss: 0.570
[14,   200] loss: 0.548
[14,   400] loss: 0.550
[14,   600] loss: 0.564
[15,   200] loss: 0

#### Adem with only increased epoches

In [19]:
class CNN_ADAM3(nn.Module):
    def __init__(self):
        super(CNN_ADAM3, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(128 * 4 * 4, 512)
        self.fc2 = nn.Linear(512, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = x.view(-1, 128 * 4 * 4)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x
    
    def train(self, trainloader):
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(self.parameters(), lr=0.001)

        for epoch in range(15):  

            running_loss = 0.0
            for i, data in enumerate(trainloader, 0):
                inputs, labels = data

                optimizer.zero_grad()

                outputs = self(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

                running_loss += loss.item()
                if i % 200 == 199:  
                    print('[%d, %5d] loss: %.3f' %
                          (epoch + 1, i + 1, running_loss / 200))
                    running_loss = 0.0

        print('Finished Training')

    def test(self, testloader):
        correct = 0
        total = 0
        with torch.no_grad():
            for data in testloader:
                images, labels = data
                outputs = self(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        print('Accuracy of the network on the test images: %d %%' % (
            100 * correct / total))


In [20]:
CNN_Adem3_obj = CNN_ADAM3()
CNN_Adem3_obj.train(trainloader)
CNN_Adem3_obj.test(testloader)

[1,   200] loss: 1.863
[1,   400] loss: 1.531
[1,   600] loss: 1.431
[2,   200] loss: 1.239
[2,   400] loss: 1.162
[2,   600] loss: 1.093
[3,   200] loss: 1.002
[3,   400] loss: 0.961
[3,   600] loss: 0.936
[4,   200] loss: 0.866
[4,   400] loss: 0.854
[4,   600] loss: 0.842
[5,   200] loss: 0.792
[5,   400] loss: 0.778
[5,   600] loss: 0.777
[6,   200] loss: 0.727
[6,   400] loss: 0.719
[6,   600] loss: 0.723
[7,   200] loss: 0.674
[7,   400] loss: 0.689
[7,   600] loss: 0.678
[8,   200] loss: 0.650
[8,   400] loss: 0.661
[8,   600] loss: 0.624
[9,   200] loss: 0.614
[9,   400] loss: 0.629
[9,   600] loss: 0.612
[10,   200] loss: 0.581
[10,   400] loss: 0.599
[10,   600] loss: 0.596
[11,   200] loss: 0.569
[11,   400] loss: 0.587
[11,   600] loss: 0.587
[12,   200] loss: 0.539
[12,   400] loss: 0.543
[12,   600] loss: 0.556
[13,   200] loss: 0.533
[13,   400] loss: 0.543
[13,   600] loss: 0.542
[14,   200] loss: 0.510
[14,   400] loss: 0.527
[14,   600] loss: 0.511
[15,   200] loss: 0