In [0]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from tqdm import tqdm_notebook

# Round-1

In [0]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, padding=1) #input -? OUtput? RF
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.pool1 = nn.MaxPool2d(2, 2)
        self.conv3 = nn.Conv2d(64, 128, 3, padding=1)
        self.conv4 = nn.Conv2d(128, 256, 3, padding=1)
        self.pool2 = nn.MaxPool2d(2, 2)
        self.conv5 = nn.Conv2d(256, 512, 3)
        self.conv6 = nn.Conv2d(512, 1024, 3)
        self.conv7 = nn.Conv2d(1024, 10, 3)

    def forward(self, x):
        x = self.pool1(F.relu(self.conv2(F.relu(self.conv1(x)))))
        x = self.pool2(F.relu(self.conv4(F.relu(self.conv3(x)))))
        x = F.relu(self.conv6(F.relu(self.conv5(x))))
        x = F.relu(self.conv7(x))
        x = x.view(-1, 10)
        return F.log_softmax(x)

In [3]:
!pip install torchsummary
from torchsummary import summary
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
model = Net().to(device)
summary(model, input_size=(1, 28, 28))
print(device)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 28, 28]             320
            Conv2d-2           [-1, 64, 28, 28]          18,496
         MaxPool2d-3           [-1, 64, 14, 14]               0
            Conv2d-4          [-1, 128, 14, 14]          73,856
            Conv2d-5          [-1, 256, 14, 14]         295,168
         MaxPool2d-6            [-1, 256, 7, 7]               0
            Conv2d-7            [-1, 512, 5, 5]       1,180,160
            Conv2d-8           [-1, 1024, 3, 3]       4,719,616
            Conv2d-9             [-1, 10, 1, 1]          92,170
Total params: 6,379,786
Trainable params: 6,379,786
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 1.51
Params size (MB): 24.34
Estimated Total Size (MB): 25.85
-------------------------------------



In [4]:


torch.manual_seed(1)
batch_size = 128

kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                    transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ../data/MNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Extracting ../data/MNIST/raw/train-images-idx3-ubyte.gz to ../data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ../data/MNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Extracting ../data/MNIST/raw/train-labels-idx1-ubyte.gz to ../data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw/t10k-images-idx3-ubyte.gz


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Extracting ../data/MNIST/raw/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Extracting ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw
Processing...
Done!


In [0]:

def train(model, device, train_loader, optimizer, epoch):
    model.train()
    pbar = tqdm_notebook(train_loader)
    for batch_idx, (data, target) in enumerate(pbar):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        pbar.set_description(desc= f'loss={loss.item()} batch_id={batch_idx}')
    
    train_eval(model,device,train_loader)
    
    
# The function below is to calculate the training error which will be useful when we will try to manage things like OVERFITTING.    

def train_eval(model, device, train_loader):
    model.eval()
    train_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in train_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            train_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    train_loss /= len(train_loader.dataset)

    print('\n\nTrain set: Average loss: {:.4f}, Accuracy: {}/{} ({:.3f}%)\n'.format(
        train_loss, correct, len(train_loader.dataset),
        100. * correct / len(train_loader.dataset)))
    
    
    
def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [6]:

model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

for epoch in range(1, 2):
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)

HBox(children=(IntProgress(value=0, max=469), HTML(value='')))






Train set: Average loss: 1.8641, Accuracy: 17482/60000 (29.137%)


Test set: Average loss: 1.8604, Accuracy: 2917/10000 (29%)



In [0]:
### For such a simple model we are receiving a poor accuracy of just 29% even after using Deep Neural Network.

# Round - 2 

### The mistake we did in previous architecture is that we used RELU Activation function on the last layer and always keep that in mind that on the last layer we always use Linear activation which also means NO Activation.Hence Let's try to use that.

In [0]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, padding=1) #input -? OUtput? RF
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.pool1 = nn.MaxPool2d(2, 2)
        self.conv3 = nn.Conv2d(64, 128, 3, padding=1)
        self.conv4 = nn.Conv2d(128, 256, 3, padding=1)
        self.pool2 = nn.MaxPool2d(2, 2)
        self.conv5 = nn.Conv2d(256, 512, 3)
        self.conv6 = nn.Conv2d(512, 1024, 3)
        self.conv7 = nn.Conv2d(1024, 10, 3)

    def forward(self, x):
        x = self.pool1(F.relu(self.conv2(F.relu(self.conv1(x)))))
        x = self.pool2(F.relu(self.conv4(F.relu(self.conv3(x)))))
        x = F.relu(self.conv6(F.relu(self.conv5(x))))
        x = self.conv7(x)  # SEE HERE I HAVE REMOVED THE RELU ACTIVATION ON THE LAST LAYER.
        x = x.view(-1, 10)
        return F.log_softmax(x)

In [8]:
!pip install torchsummary
from torchsummary import summary
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
model = Net().to(device)
summary(model, input_size=(1, 28, 28))
print(device)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 28, 28]             320
            Conv2d-2           [-1, 64, 28, 28]          18,496
         MaxPool2d-3           [-1, 64, 14, 14]               0
            Conv2d-4          [-1, 128, 14, 14]          73,856
            Conv2d-5          [-1, 256, 14, 14]         295,168
         MaxPool2d-6            [-1, 256, 7, 7]               0
            Conv2d-7            [-1, 512, 5, 5]       1,180,160
            Conv2d-8           [-1, 1024, 3, 3]       4,719,616
            Conv2d-9             [-1, 10, 1, 1]          92,170
Total params: 6,379,786
Trainable params: 6,379,786
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 1.51
Params size (MB): 24.34
Estimated Total Size (MB): 25.85
-------------------------------------



In [0]:


torch.manual_seed(1)
batch_size = 128

kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                    transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)


In [0]:

def train(model, device, train_loader, optimizer, epoch):
    model.train()
    pbar = tqdm_notebook(train_loader)
    for batch_idx, (data, target) in enumerate(pbar):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        pbar.set_description(desc= f'loss={loss.item()} batch_id={batch_idx}')
    
    train_eval(model,device,train_loader)
    
    
# The function below is to calculate the training error which will be useful when we will try to manage things like OVERFITTING.    

def train_eval(model, device, train_loader):
    model.eval()
    train_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in train_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            train_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    train_loss /= len(train_loader.dataset)

    print('\n\nTrain set: Average loss: {:.4f}, Accuracy: {}/{} ({:.3f}%)\n'.format(
        train_loss, correct, len(train_loader.dataset),
        100. * correct / len(train_loader.dataset)))
    
    
    
def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [12]:

model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

for epoch in range(1, 6):
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)

HBox(children=(IntProgress(value=0, max=469), HTML(value='')))






Train set: Average loss: 0.0772, Accuracy: 58523/60000 (97.538%)


Test set: Average loss: 0.0757, Accuracy: 9744/10000 (97%)



HBox(children=(IntProgress(value=0, max=469), HTML(value='')))




Train set: Average loss: 0.0355, Accuracy: 59338/60000 (98.897%)


Test set: Average loss: 0.0363, Accuracy: 9870/10000 (99%)



HBox(children=(IntProgress(value=0, max=469), HTML(value='')))




Train set: Average loss: 0.0242, Accuracy: 59547/60000 (99.245%)


Test set: Average loss: 0.0320, Accuracy: 9887/10000 (99%)



HBox(children=(IntProgress(value=0, max=469), HTML(value='')))




Train set: Average loss: 0.0188, Accuracy: 59657/60000 (99.428%)


Test set: Average loss: 0.0318, Accuracy: 9894/10000 (99%)



HBox(children=(IntProgress(value=0, max=469), HTML(value='')))




Train set: Average loss: 0.0189, Accuracy: 59643/60000 (99.405%)


Test set: Average loss: 0.0323, Accuracy: 9891/10000 (99%)



### We have improved our accuracy from 29% to 98% by removing the non linear activation at the last layer. Further increasing the number of epochs bring it to 99%.

# Round - 3 

Trying to reduce the number of convolutions

In [0]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, 3, padding=1) #input -? OUtput? RF
        self.pool1 = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(16, 32, 3, padding=1)
        self.pool2 = nn.MaxPool2d(2, 2)
        #self.conv3 = nn.Conv2d(32, 64, 3, padding=1)
        #self.conv4 = nn.Conv2d(128, 256, 3, padding=1)
        #self.pool2 = nn.MaxPool2d(2, 2)
        #self.conv5 = nn.Conv2d(256, 512, 3)
        #self.conv6 = nn.Conv2d(512, 1024, 3)
        self.conv7 = nn.Conv2d(32, 10, 1)
        self.conv8 = nn.Conv2d(10,10,7)

    def forward(self, x):
        x = self.pool1((F.relu(self.conv1(x))))
        x = self.pool2((F.relu(self.conv2(x))))
        x = self.conv8(F.relu(self.conv7(x)))
        #x = self.conv7(x)  # SEE HERE I HAVE REMOVED THE RELU ACTIVATION ON THE LAST LAYER.
        x = x.view(-1, 10)
        return F.log_softmax(x)

In [14]:
!pip install torchsummary
from torchsummary import summary
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
model = Net().to(device)
summary(model, input_size=(1, 28, 28))
print(device)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 28, 28]             160
         MaxPool2d-2           [-1, 16, 14, 14]               0
            Conv2d-3           [-1, 32, 14, 14]           4,640
         MaxPool2d-4             [-1, 32, 7, 7]               0
            Conv2d-5             [-1, 10, 7, 7]             330
            Conv2d-6             [-1, 10, 1, 1]           4,910
Total params: 10,040
Trainable params: 10,040
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.18
Params size (MB): 0.04
Estimated Total Size (MB): 0.22
----------------------------------------------------------------
cuda




In [0]:


torch.manual_seed(1)
batch_size = 128

kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                    transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)


In [0]:

def train(model, device, train_loader, optimizer, epoch):
    model.train()
    pbar = tqdm_notebook(train_loader)
    for batch_idx, (data, target) in enumerate(pbar):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        pbar.set_description(desc= f'loss={loss.item()} batch_id={batch_idx}')
    
    train_eval(model,device,train_loader)
    
    
# The function below is to calculate the training error which will be useful when we will try to manage things like OVERFITTING.    

def train_eval(model, device, train_loader):
    model.eval()
    train_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in train_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            train_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    train_loss /= len(train_loader.dataset)

    print('\n\nTrain set: Average loss: {:.4f}, Accuracy: {}/{} ({:.3f}%)\n'.format(
        train_loss, correct, len(train_loader.dataset),
        100. * correct / len(train_loader.dataset)))
    
    
    
def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [17]:
model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

for epoch in range(1, 4):
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)

HBox(children=(IntProgress(value=0, max=469), HTML(value='')))






Train set: Average loss: 0.1100, Accuracy: 57939/60000 (96.565%)


Test set: Average loss: 0.0969, Accuracy: 9680/10000 (97%)



HBox(children=(IntProgress(value=0, max=469), HTML(value='')))




Train set: Average loss: 0.0706, Accuracy: 58730/60000 (97.883%)


Test set: Average loss: 0.0619, Accuracy: 9789/10000 (98%)



HBox(children=(IntProgress(value=0, max=469), HTML(value='')))




Train set: Average loss: 0.0743, Accuracy: 58610/60000 (97.683%)


Test set: Average loss: 0.0697, Accuracy: 9769/10000 (98%)



# Round 4 

In [0]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        self.conv1 = nn.Conv2d(1, 16, 3, padding=1)
        self.batch_1 = nn.BatchNorm2d(16)
        self.pool1 = nn.MaxPool2d(2, 2)
        
        self.conv2 = nn.Conv2d(16, 32, 3, padding=1)
        self.batch_2 = nn.BatchNorm2d(32)
        self.pool2 = nn.MaxPool2d(2, 2)
        
        #self.batch_1 = nn.BatchNorm2d(16)
        self.conv3 = nn.Conv2d(32, 64, 3, padding=1)
        self.batch_3 = nn.BatchNorm2d(64)
        self.conv4 = nn.Conv2d(64,128, 3, padding=1)
        #self.batch_1 = nn.BatchNorm2d(16)
        #self.pool2 = nn.MaxPool2d(2, 2)
        #self.batch_1 = nn.BatchNorm2d(16)
        self.conv5 = nn.Conv2d(128, 10, 1)
        #self.batch_1 = nn.BatchNorm2d(16)
        #self.conv6 = nn.Conv2d(512, 1024, 3)
        self.conv7 = nn.Conv2d(10, 10, 7)

    def forward(self, x):
        x = self.pool1((F.relu(self.batch_1(self.conv1(x)))))
                       
        x = self.pool2((F.relu(self.batch_2(self.conv2(x)))))
        
        x = F.relu(self.conv4(F.relu(self.batch_3(self.conv3(x)))))
        
        x = self.conv7(F.relu(self.conv5(x)))
        
        x = x.view(-1, 10)
        return F.log_softmax(x)

In [19]:
!pip install torchsummary
from torchsummary import summary
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
model = Net().to(device)
summary(model, input_size=(1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 28, 28]             160
       BatchNorm2d-2           [-1, 16, 28, 28]              32
         MaxPool2d-3           [-1, 16, 14, 14]               0
            Conv2d-4           [-1, 32, 14, 14]           4,640
       BatchNorm2d-5           [-1, 32, 14, 14]              64
         MaxPool2d-6             [-1, 32, 7, 7]               0
            Conv2d-7             [-1, 64, 7, 7]          18,496
       BatchNorm2d-8             [-1, 64, 7, 7]             128
            Conv2d-9            [-1, 128, 7, 7]          73,856
           Conv2d-10             [-1, 10, 7, 7]           1,290
           Conv2d-11             [-1, 10, 1, 1]           4,910
Total params: 103,576
Trainable params: 103,576
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/



In [0]:


torch.manual_seed(1)
batch_size = 128

kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                    transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)


In [0]:

def train(model, device, train_loader, optimizer, epoch):
    model.train()
    pbar = tqdm_notebook(train_loader)
    for batch_idx, (data, target) in enumerate(pbar):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        pbar.set_description(desc= f'loss={loss.item()} batch_id={batch_idx}')
    
    train_eval(model,device,train_loader)
    
    
# The function below is to calculate the training error which will be useful when we will try to manage things like OVERFITTING.    

def train_eval(model, device, train_loader):
    model.eval()
    train_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in train_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            train_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    train_loss /= len(train_loader.dataset)

    print('\n\nTrain set: Average loss: {:.4f}, Accuracy: {}/{} ({:.3f}%)\n'.format(
        train_loss, correct, len(train_loader.dataset),
        100. * correct / len(train_loader.dataset)))
    
    
    
def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.3f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [0]:

model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

for epoch in range(1, 21):
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)

HBox(children=(IntProgress(value=0, max=469), HTML(value='')))






Train set: Average loss: 0.0707, Accuracy: 58653/60000 (97.755%)


Test set: Average loss: 0.0651, Accuracy: 9794/10000 (97.940%)



HBox(children=(IntProgress(value=0, max=469), HTML(value='')))




Train set: Average loss: 0.0553, Accuracy: 58937/60000 (98.228%)


Test set: Average loss: 0.0545, Accuracy: 9819/10000 (98.190%)



HBox(children=(IntProgress(value=0, max=469), HTML(value='')))




Train set: Average loss: 0.0489, Accuracy: 59009/60000 (98.348%)


Test set: Average loss: 0.0527, Accuracy: 9828/10000 (98.280%)



HBox(children=(IntProgress(value=0, max=469), HTML(value='')))




Train set: Average loss: 0.0222, Accuracy: 59599/60000 (99.332%)


Test set: Average loss: 0.0282, Accuracy: 9910/10000 (99.100%)



HBox(children=(IntProgress(value=0, max=469), HTML(value='')))




Train set: Average loss: 0.0240, Accuracy: 59534/60000 (99.223%)


Test set: Average loss: 0.0383, Accuracy: 9884/10000 (98.840%)



HBox(children=(IntProgress(value=0, max=469), HTML(value='')))




Train set: Average loss: 0.0240, Accuracy: 59529/60000 (99.215%)


Test set: Average loss: 0.0364, Accuracy: 9886/10000 (98.860%)



HBox(children=(IntProgress(value=0, max=469), HTML(value='')))




Train set: Average loss: 0.0134, Accuracy: 59747/60000 (99.578%)


Test set: Average loss: 0.0270, Accuracy: 9916/10000 (99.160%)



HBox(children=(IntProgress(value=0, max=469), HTML(value='')))




Train set: Average loss: 0.0133, Accuracy: 59750/60000 (99.583%)


Test set: Average loss: 0.0296, Accuracy: 9906/10000 (99.060%)



HBox(children=(IntProgress(value=0, max=469), HTML(value='')))




Train set: Average loss: 0.0145, Accuracy: 59697/60000 (99.495%)


Test set: Average loss: 0.0298, Accuracy: 9910/10000 (99.100%)



HBox(children=(IntProgress(value=0, max=469), HTML(value='')))




Train set: Average loss: 0.0272, Accuracy: 59420/60000 (99.033%)


Test set: Average loss: 0.0475, Accuracy: 9856/10000 (98.560%)



HBox(children=(IntProgress(value=0, max=469), HTML(value='')))




Train set: Average loss: 0.0138, Accuracy: 59706/60000 (99.510%)


Test set: Average loss: 0.0340, Accuracy: 9899/10000 (98.990%)



HBox(children=(IntProgress(value=0, max=469), HTML(value='')))




Train set: Average loss: 0.0089, Accuracy: 59814/60000 (99.690%)


Test set: Average loss: 0.0340, Accuracy: 9900/10000 (99.000%)



HBox(children=(IntProgress(value=0, max=469), HTML(value='')))

# Round 5 

In [0]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        self.conv1 = nn.Conv2d(1, 16, 3, padding=1)
        self.batch_1 = nn.BatchNorm2d(16)
        self.drop_1 = nn.Dropout(0.25)
        self.pool1 = nn.MaxPool2d(2, 2)
        
        self.conv2 = nn.Conv2d(16, 32, 3, padding=1)
        self.batch_2 = nn.BatchNorm2d(32)
        self.drop_2 = nn.Dropout(0.25)
        self.pool2 = nn.MaxPool2d(2, 2)
        
        #self.batch_1 = nn.BatchNorm2d(16)
        self.conv3 = nn.Conv2d(32, 64, 3, padding=1)
        self.batch_3 = nn.BatchNorm2d(64)
        self.drop_3 = nn.Dropout(0.25)
        
        self.conv4 = nn.Conv2d(64,128, 3, padding=1)
        #self.batch_1 = nn.BatchNorm2d(16)
        #self.pool2 = nn.MaxPool2d(2, 2)
        #self.batch_1 = nn.BatchNorm2d(16)
        self.conv5 = nn.Conv2d(128, 10, 1)
        #self.batch_1 = nn.BatchNorm2d(16)
        #self.conv6 = nn.Conv2d(512, 1024, 3)
        self.conv7 = nn.Conv2d(10, 10, 7)

    def forward(self, x):
        x = self.pool1(self.drop_1((F.relu(self.batch_1(self.conv1(x))))))
                       
        x = self.pool2(self.drop_2(F.relu(self.batch_2(self.conv2(x)))))
        
        x = F.relu(self.conv4(self.drop_3(F.relu(self.batch_3((self.conv3(x)))))))
        
        x = self.conv7(F.relu(self.conv5(x)))
        
        x = x.view(-1, 10)
        return F.log_softmax(x)

In [0]:
!pip install torchsummary
from torchsummary import summary
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
model = Net().to(device)
summary(model, input_size=(1, 28, 28))





----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 28, 28]             160
       BatchNorm2d-2           [-1, 16, 28, 28]              32
           Dropout-3           [-1, 16, 28, 28]               0
         MaxPool2d-4           [-1, 16, 14, 14]               0
            Conv2d-5           [-1, 32, 14, 14]           4,640
       BatchNorm2d-6           [-1, 32, 14, 14]              64
           Dropout-7           [-1, 32, 14, 14]               0
         MaxPool2d-8             [-1, 32, 7, 7]               0
            Conv2d-9             [-1, 64, 7, 7]          18,496
      BatchNorm2d-10             [-1, 64, 7, 7]             128
          Dropout-11             [-1, 64, 7, 7]               0
           Conv2d-12            [-1, 128, 7, 7]          73,856
           Conv2d-13             [-1, 10, 7, 7]           1,290
           Conv2d-14             [-1, 1

In [0]:


torch.manual_seed(1)
batch_size = 128

kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                    transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)


In [0]:

def train(model, device, train_loader, optimizer, epoch):
    model.train()
    pbar = tqdm_notebook(train_loader)
    for batch_idx, (data, target) in enumerate(pbar):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        pbar.set_description(desc= f'loss={loss.item()} batch_id={batch_idx}')
    
    train_eval(model,device,train_loader)
    
    
# The function below is to calculate the training error which will be useful when we will try to manage things like OVERFITTING.    

def train_eval(model, device, train_loader):
    model.eval()
    train_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in train_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            train_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    train_loss /= len(train_loader.dataset)

    print('\n\nTrain set: Average loss: {:.4f}, Accuracy: {}/{} ({:.3f}%)\n'.format(
        train_loss, correct, len(train_loader.dataset),
        100. * correct / len(train_loader.dataset)))
    
    
    
def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.3f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [0]:

model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

for epoch in range(1, 21):
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)

HBox(children=(IntProgress(value=0, max=469), HTML(value='')))





Train set: Average loss: 0.0832, Accuracy: 58568/60000 (97.613%)


Test set: Average loss: 0.0721, Accuracy: 9797/10000 (97.970%)



HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0793, Accuracy: 58619/60000 (97.698%)


Test set: Average loss: 0.0719, Accuracy: 9781/10000 (97.810%)



HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0476, Accuracy: 59191/60000 (98.652%)


Test set: Average loss: 0.0437, Accuracy: 9872/10000 (98.720%)



HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0417, Accuracy: 59309/60000 (98.848%)


Test set: Average loss: 0.0365, Accuracy: 9894/10000 (98.940%)



HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0336, Accuracy: 59416/60000 (99.027%)


Test set: Average loss: 0.0342, Accuracy: 9896/10000 (98.960%)



HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0319, Accuracy: 59486/60000 (99.143%)


Test set: Average loss: 0.0317, Accuracy: 9907/10000 (99.070%)



HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0242, Accuracy: 59568/60000 (99.280%)


Test set: Average loss: 0.0242, Accuracy: 9918/10000 (99.180%)



HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0305, Accuracy: 59486/60000 (99.143%)


Test set: Average loss: 0.0315, Accuracy: 9904/10000 (99.040%)



HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0262, Accuracy: 59518/60000 (99.197%)


Test set: Average loss: 0.0254, Accuracy: 9918/10000 (99.180%)



HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0417, Accuracy: 59199/60000 (98.665%)


Test set: Average loss: 0.0400, Accuracy: 9878/10000 (98.780%)



HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0282, Accuracy: 59500/60000 (99.167%)


Test set: Average loss: 0.0298, Accuracy: 9908/10000 (99.080%)



HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0191, Accuracy: 59697/60000 (99.495%)


Test set: Average loss: 0.0226, Accuracy: 9932/10000 (99.320%)



HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0163, Accuracy: 59722/60000 (99.537%)


Test set: Average loss: 0.0219, Accuracy: 9927/10000 (99.270%)



HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0164, Accuracy: 59743/60000 (99.572%)


Test set: Average loss: 0.0225, Accuracy: 9930/10000 (99.300%)



HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0164, Accuracy: 59732/60000 (99.553%)


Test set: Average loss: 0.0215, Accuracy: 9938/10000 (99.380%)



HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0228, Accuracy: 59572/60000 (99.287%)


Test set: Average loss: 0.0258, Accuracy: 9918/10000 (99.180%)



HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0204, Accuracy: 59611/60000 (99.352%)


Test set: Average loss: 0.0259, Accuracy: 9918/10000 (99.180%)



HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0160, Accuracy: 59740/60000 (99.567%)


Test set: Average loss: 0.0228, Accuracy: 9927/10000 (99.270%)



HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0126, Accuracy: 59803/60000 (99.672%)


Test set: Average loss: 0.0199, Accuracy: 9934/10000 (99.340%)



HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0116, Accuracy: 59819/60000 (99.698%)


Test set: Average loss: 0.0208, Accuracy: 9933/10000 (99.330%)



# Round 6

In [0]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        self.conv1 = nn.Conv2d(1, 16, 3, padding=1)
        self.batch_1 = nn.BatchNorm2d(16)
        self.drop_1 = nn.Dropout(0.25)
        self.pool1 = nn.MaxPool2d(2, 2)
        
        self.conv2 = nn.Conv2d(16, 32, 3, padding=1)
        self.batch_2 = nn.BatchNorm2d(32)
        self.drop_2 = nn.Dropout(0.25)
        self.pool2 = nn.MaxPool2d(2, 2)
        
        #self.batch_1 = nn.BatchNorm2d(16)
        #self.conv3 = nn.Conv2d(32, 64, 3, padding=1)
        #self.batch_3 = nn.BatchNorm2d(64)
        #self.drop_3 = nn.Dropout(0.25)
        
        self.conv3 = nn.Conv2d(32,32,3)
        #self.batch_3 = nn.BatchNorm2d(32)
        
        #self.conv4 = nn.Conv2d(64,128, 3, padding=1)
        #self.batch_1 = nn.BatchNorm2d(16)
        #self.pool2 = nn.MaxPool2d(2, 2)
        #self.batch_1 = nn.BatchNorm2d(16)
        self.conv5 = nn.Conv2d(32, 10, 1)
        #self.batch_1 = nn.BatchNorm2d(16)
        #self.conv6 = nn.Conv2d(512, 1024, 3)
        self.conv7 = nn.Conv2d(10, 10, 5)

    def forward(self, x):
        x = self.pool1(self.drop_1((F.relu(self.batch_1(self.conv1(x))))))
        
        
                       
        x = self.pool2(self.drop_2(F.relu(self.batch_2(self.conv2(x)))))
        
        
        x = F.relu(self.conv3(x))
        
        
        #x = F.relu(self.drop_3(F.relu(self.batch_3((self.conv3(x))))))
        
        x = self.conv7((F.relu(self.conv5(x))))
        
        x = x.view(-1, 10)
        return F.log_softmax(x)

In [0]:
!pip install torchsummary
from torchsummary import summary
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
model = Net().to(device)
summary(model, input_size=(1, 28, 28))





----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 28, 28]             160
       BatchNorm2d-2           [-1, 16, 28, 28]              32
           Dropout-3           [-1, 16, 28, 28]               0
         MaxPool2d-4           [-1, 16, 14, 14]               0
            Conv2d-5           [-1, 32, 14, 14]           4,640
       BatchNorm2d-6           [-1, 32, 14, 14]              64
           Dropout-7           [-1, 32, 14, 14]               0
         MaxPool2d-8             [-1, 32, 7, 7]               0
            Conv2d-9             [-1, 32, 5, 5]           9,248
      BatchNorm2d-10             [-1, 32, 5, 5]              64
           Conv2d-11             [-1, 10, 5, 5]             330
           Conv2d-12             [-1, 10, 1, 1]           2,510
Total params: 17,048
Trainable params: 17,048
Non-trainable params: 0
---------------------------------

In [0]:


torch.manual_seed(1)
batch_size = 128

kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                    transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)


In [0]:

def train(model, device, train_loader, optimizer, epoch):
    model.train()
    pbar = tqdm_notebook(train_loader)
    for batch_idx, (data, target) in enumerate(pbar):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        pbar.set_description(desc= f'loss={loss.item()} batch_id={batch_idx}')
    
    train_eval(model,device,train_loader)
    
    
# The function below is to calculate the training error which will be useful when we will try to manage things like OVERFITTING.    

def train_eval(model, device, train_loader):
    model.eval()
    train_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in train_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            train_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    train_loss /= len(train_loader.dataset)

    print('\n\nTrain set: Average loss: {:.4f}, Accuracy: {}/{} ({:.3f}%)\n'.format(
        train_loss, correct, len(train_loader.dataset),
        100. * correct / len(train_loader.dataset)))
    
    
    
def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.3f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [0]:
import torch.optim.lr_scheduler

In [0]:
import torch.optim.lr_scheduler
model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.83)

for epoch in range(1, 21):
    print('Epoch:', epoch,'LR:', scheduler.get_lr())
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)
    scheduler.step()








#for epoch in range(1, 21):
 #   train(model, device, train_loader, optimizer, epoch)
  #  test(model, device, test_loader)

Epoch: 1 LR: [0.01]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))





Train set: Average loss: 0.1185, Accuracy: 58656/60000 (97.760%)


Test set: Average loss: 0.1109, Accuracy: 9799/10000 (97.990%)

Epoch: 2 LR: [0.0083]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.1483, Accuracy: 57722/60000 (96.203%)


Test set: Average loss: 0.1393, Accuracy: 9654/10000 (96.540%)

Epoch: 3 LR: [0.006888999999999999]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0636, Accuracy: 59113/60000 (98.522%)


Test set: Average loss: 0.0603, Accuracy: 9858/10000 (98.580%)

Epoch: 4 LR: [0.005717869999999999]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0749, Accuracy: 58874/60000 (98.123%)


Test set: Average loss: 0.0728, Accuracy: 9820/10000 (98.200%)

Epoch: 5 LR: [0.004745832099999999]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0831, Accuracy: 58648/60000 (97.747%)


Test set: Average loss: 0.0825, Accuracy: 9774/10000 (97.740%)

Epoch: 6 LR: [0.003939040642999999]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0731, Accuracy: 58836/60000 (98.060%)


Test set: Average loss: 0.0738, Accuracy: 9802/10000 (98.020%)

Epoch: 7 LR: [0.003269403733689999]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0710, Accuracy: 58916/60000 (98.193%)


Test set: Average loss: 0.0718, Accuracy: 9821/10000 (98.210%)

Epoch: 8 LR: [0.002713605098962699]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0812, Accuracy: 58680/60000 (97.800%)


Test set: Average loss: 0.0830, Accuracy: 9777/10000 (97.770%)

Epoch: 9 LR: [0.00225229223213904]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0670, Accuracy: 58885/60000 (98.142%)


Test set: Average loss: 0.0689, Accuracy: 9812/10000 (98.120%)

Epoch: 10 LR: [0.0018694025526754033]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0845, Accuracy: 58547/60000 (97.578%)


Test set: Average loss: 0.0869, Accuracy: 9756/10000 (97.560%)

Epoch: 11 LR: [0.0015516041187205846]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0908, Accuracy: 58419/60000 (97.365%)


Test set: Average loss: 0.0921, Accuracy: 9736/10000 (97.360%)

Epoch: 12 LR: [0.0012878314185380852]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0839, Accuracy: 58552/60000 (97.587%)


Test set: Average loss: 0.0861, Accuracy: 9745/10000 (97.450%)

Epoch: 13 LR: [0.0010689000773866106]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0890, Accuracy: 58456/60000 (97.427%)


Test set: Average loss: 0.0914, Accuracy: 9731/10000 (97.310%)

Epoch: 14 LR: [0.0008871870642308869]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0779, Accuracy: 58675/60000 (97.792%)


Test set: Average loss: 0.0810, Accuracy: 9769/10000 (97.690%)

Epoch: 15 LR: [0.0007363652633116361]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0818, Accuracy: 58594/60000 (97.657%)


Test set: Average loss: 0.0839, Accuracy: 9755/10000 (97.550%)

Epoch: 16 LR: [0.0006111831685486578]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0739, Accuracy: 58708/60000 (97.847%)


Test set: Average loss: 0.0764, Accuracy: 9782/10000 (97.820%)

Epoch: 17 LR: [0.0005072820298953859]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0769, Accuracy: 58670/60000 (97.783%)


Test set: Average loss: 0.0799, Accuracy: 9772/10000 (97.720%)

Epoch: 18 LR: [0.0004210440848131704]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0826, Accuracy: 58554/60000 (97.590%)


Test set: Average loss: 0.0853, Accuracy: 9760/10000 (97.600%)

Epoch: 19 LR: [0.00034946659039493134]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0817, Accuracy: 58569/60000 (97.615%)


Test set: Average loss: 0.0845, Accuracy: 9760/10000 (97.600%)

Epoch: 20 LR: [0.000290057270027793]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0792, Accuracy: 58621/60000 (97.702%)


Test set: Average loss: 0.0820, Accuracy: 9768/10000 (97.680%)



In [0]:
x = self.conv7(F.relu(self.conv5((F.relu(self.conv5(x))))))

# Round 7

In [0]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        self.conv1 = nn.Conv2d(1, 8, 3, padding=1,bias = False)
        self.batch_1 = nn.BatchNorm2d(8)
        self.drop_1 = nn.Dropout(0.25)
        self.pool1 = nn.MaxPool2d(2, 2)
        
        self.conv2 = nn.Conv2d(8, 16, 3, padding=1,bias = False)
        self.batch_2 = nn.BatchNorm2d(16)
        self.drop_2 = nn.Dropout(0.25)
        self.pool2 = nn.MaxPool2d(2, 2)
        
        #self.batch_1 = nn.BatchNorm2d(16)
        self.conv3 = nn.Conv2d(16, 32, 3, padding=0,bias = False)
        self.batch_3 = nn.BatchNorm2d(32)
        self.drop_3 = nn.Dropout(0.25)
        
        self.conv4 = nn.Conv2d(32,32, 3, padding=0,bias = False)
        self.batch_4 = nn.BatchNorm2d(32)
        self.drop_4 = nn.Dropout(0.25)
        
        #self.pool2 = nn.MaxPool2d(2, 2)
        #self.batch_1 = nn.BatchNorm2d(16)
        
        self.conv5 = nn.Conv2d(32, 10, 1,bias = False)
        
        #self.batch_1 = nn.BatchNorm2d(16)
        #self.conv6 = nn.Conv2d(512, 1024, 3)
        
        self.conv7 = nn.Conv2d(10,10,3,bias = False)

    def forward(self, x):
        x = self.pool1(self.drop_1((F.relu(self.batch_1(self.conv1(x))))))
                       
        x = self.pool2(self.drop_2(F.relu(self.batch_2(self.conv2(x)))))
        
        x = self.drop_3(F.relu(self.batch_3((self.conv3(x)))))
        
        x = self.drop_4(F.relu(self.batch_4((self.conv4(x)))))
        
        x = F.relu(self.conv5(x))
        
        x = self.conv7(x)
        
        x = x.view(-1, 10)
        return F.log_softmax(x)

In [0]:
!pip install torchsummary
from torchsummary import summary
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
model = Net().to(device)
summary(model, input_size=(1, 28, 28))





----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 8, 28, 28]              72
       BatchNorm2d-2            [-1, 8, 28, 28]              16
           Dropout-3            [-1, 8, 28, 28]               0
         MaxPool2d-4            [-1, 8, 14, 14]               0
            Conv2d-5           [-1, 16, 14, 14]           1,152
       BatchNorm2d-6           [-1, 16, 14, 14]              32
           Dropout-7           [-1, 16, 14, 14]               0
         MaxPool2d-8             [-1, 16, 7, 7]               0
            Conv2d-9             [-1, 32, 5, 5]           4,608
      BatchNorm2d-10             [-1, 32, 5, 5]              64
          Dropout-11             [-1, 32, 5, 5]               0
           Conv2d-12             [-1, 32, 3, 3]           9,216
      BatchNorm2d-13             [-1, 32, 3, 3]              64
          Dropout-14             [-1, 3

In [0]:


torch.manual_seed(1)
batch_size = 128

kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                    transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)


In [0]:

def train(model, device, train_loader, optimizer, epoch):
    model.train()
    pbar = tqdm_notebook(train_loader)
    for batch_idx, (data, target) in enumerate(pbar):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        pbar.set_description(desc= f'loss={loss.item()} batch_id={batch_idx}')
    
    train_eval(model,device,train_loader)
    
    
# The function below is to calculate the training error which will be useful when we will try to manage things like OVERFITTING.    

def train_eval(model, device, train_loader):
    model.eval()
    train_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in train_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            train_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    train_loss /= len(train_loader.dataset)

    print('\n\nTrain set: Average loss: {:.4f}, Accuracy: {}/{} ({:.3f}%)\n'.format(
        train_loss, correct, len(train_loader.dataset),
        100. * correct / len(train_loader.dataset)))
    
    
    
def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.3f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [0]:
import torch.optim.lr_scheduler
model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.83)

for epoch in range(1, 21):
    print('Epoch:', epoch,'LR:', scheduler.get_lr())
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)
    scheduler.step()








#for epoch in range(1, 21):
 #   train(model, device, train_loader, optimizer, epoch)
  #  test(model, device, test_loader)

Epoch: 1 LR: [0.01]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))





Train set: Average loss: 0.1090, Accuracy: 58240/60000 (97.067%)


Test set: Average loss: 0.0956, Accuracy: 9735/10000 (97.350%)

Epoch: 2 LR: [0.0083]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0687, Accuracy: 58860/60000 (98.100%)


Test set: Average loss: 0.0620, Accuracy: 9823/10000 (98.230%)

Epoch: 3 LR: [0.006888999999999999]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0574, Accuracy: 59009/60000 (98.348%)


Test set: Average loss: 0.0506, Accuracy: 9851/10000 (98.510%)

Epoch: 4 LR: [0.005717869999999999]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0546, Accuracy: 59031/60000 (98.385%)


Test set: Average loss: 0.0455, Accuracy: 9863/10000 (98.630%)

Epoch: 5 LR: [0.004745832099999999]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0460, Accuracy: 59213/60000 (98.688%)


Test set: Average loss: 0.0415, Accuracy: 9877/10000 (98.770%)

Epoch: 6 LR: [0.003939040642999999]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0437, Accuracy: 59233/60000 (98.722%)


Test set: Average loss: 0.0396, Accuracy: 9869/10000 (98.690%)

Epoch: 7 LR: [0.003269403733689999]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0419, Accuracy: 59274/60000 (98.790%)


Test set: Average loss: 0.0369, Accuracy: 9890/10000 (98.900%)

Epoch: 8 LR: [0.002713605098962699]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0396, Accuracy: 59296/60000 (98.827%)


Test set: Average loss: 0.0359, Accuracy: 9891/10000 (98.910%)

Epoch: 9 LR: [0.00225229223213904]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0411, Accuracy: 59249/60000 (98.748%)


Test set: Average loss: 0.0366, Accuracy: 9888/10000 (98.880%)

Epoch: 10 LR: [0.0018694025526754033]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0377, Accuracy: 59333/60000 (98.888%)


Test set: Average loss: 0.0333, Accuracy: 9898/10000 (98.980%)

Epoch: 11 LR: [0.0015516041187205846]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0379, Accuracy: 59315/60000 (98.858%)


Test set: Average loss: 0.0343, Accuracy: 9898/10000 (98.980%)

Epoch: 12 LR: [0.0012878314185380852]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0357, Accuracy: 59376/60000 (98.960%)


Test set: Average loss: 0.0329, Accuracy: 9904/10000 (99.040%)

Epoch: 13 LR: [0.0010689000773866106]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0344, Accuracy: 59387/60000 (98.978%)


Test set: Average loss: 0.0315, Accuracy: 9904/10000 (99.040%)

Epoch: 14 LR: [0.0008871870642308869]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0371, Accuracy: 59329/60000 (98.882%)


Test set: Average loss: 0.0330, Accuracy: 9902/10000 (99.020%)

Epoch: 15 LR: [0.0007363652633116361]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0354, Accuracy: 59369/60000 (98.948%)


Test set: Average loss: 0.0324, Accuracy: 9904/10000 (99.040%)

Epoch: 16 LR: [0.0006111831685486578]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0358, Accuracy: 59351/60000 (98.918%)


Test set: Average loss: 0.0324, Accuracy: 9900/10000 (99.000%)

Epoch: 17 LR: [0.0005072820298953859]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0351, Accuracy: 59376/60000 (98.960%)


Test set: Average loss: 0.0316, Accuracy: 9905/10000 (99.050%)

Epoch: 18 LR: [0.0004210440848131704]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0353, Accuracy: 59358/60000 (98.930%)


Test set: Average loss: 0.0322, Accuracy: 9904/10000 (99.040%)

Epoch: 19 LR: [0.00034946659039493134]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0365, Accuracy: 59335/60000 (98.892%)


Test set: Average loss: 0.0321, Accuracy: 9905/10000 (99.050%)

Epoch: 20 LR: [0.000290057270027793]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0361, Accuracy: 59344/60000 (98.907%)


Test set: Average loss: 0.0327, Accuracy: 9902/10000 (99.020%)



# Round 8

In [0]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        self.conv1 = nn.Conv2d(1, 8, 3, padding=1,bias = False)
        self.batch_1 = nn.BatchNorm2d(8)
        self.drop_1 = nn.Dropout(0.25)
        self.pool1 = nn.MaxPool2d(2, 2)
        
        self.conv2 = nn.Conv2d(8, 16, 3, padding=1,bias = False)
        self.batch_2 = nn.BatchNorm2d(16)
        self.drop_2 = nn.Dropout(0.25)
        self.pool2 = nn.MaxPool2d(2, 2)
        
        #self.batch_1 = nn.BatchNorm2d(16)
        self.conv3 = nn.Conv2d(16, 32, 3, padding=1,bias = False)
        self.batch_3 = nn.BatchNorm2d(32)
        self.drop_3 = nn.Dropout(0.25)
        
        self.conv4 = nn.Conv2d(32,32, 3, padding=0,bias = False)
        self.batch_4 = nn.BatchNorm2d(32)
        self.drop_4 = nn.Dropout(0.25)
        
        #self.pool2 = nn.MaxPool2d(2, 2)
        #self.batch_1 = nn.BatchNorm2d(16)
        
        self.conv5 = nn.Conv2d(32, 10, 1,bias = False)
        
        #self.batch_1 = nn.BatchNorm2d(16)
        #self.conv6 = nn.Conv2d(512, 1024, 3)
        
        self.conv7 = nn.Conv2d(10,10,5,bias = False)

    def forward(self, x):
        x = self.pool1(self.drop_1((F.relu(self.batch_1(self.conv1(x))))))
                       
        x = self.pool2(self.drop_2(F.relu(self.batch_2(self.conv2(x)))))
        
        x = self.drop_3(F.relu(self.batch_3((self.conv3(x)))))
        
        x = self.drop_4(F.relu(self.batch_4((self.conv4(x)))))
        
        x = F.relu(self.conv5(x))
        
        x = self.conv7(x)
        
        x = x.view(-1, 10)
        return F.log_softmax(x)

In [0]:
!pip install torchsummary
from torchsummary import summary
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
model = Net().to(device)
summary(model, input_size=(1, 28, 28))





----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 8, 28, 28]              72
       BatchNorm2d-2            [-1, 8, 28, 28]              16
           Dropout-3            [-1, 8, 28, 28]               0
         MaxPool2d-4            [-1, 8, 14, 14]               0
            Conv2d-5           [-1, 16, 14, 14]           1,152
       BatchNorm2d-6           [-1, 16, 14, 14]              32
           Dropout-7           [-1, 16, 14, 14]               0
         MaxPool2d-8             [-1, 16, 7, 7]               0
            Conv2d-9             [-1, 32, 7, 7]           4,608
      BatchNorm2d-10             [-1, 32, 7, 7]              64
          Dropout-11             [-1, 32, 7, 7]               0
           Conv2d-12             [-1, 32, 5, 5]           9,216
      BatchNorm2d-13             [-1, 32, 5, 5]              64
          Dropout-14             [-1, 3

In [0]:


torch.manual_seed(1)
batch_size = 128

kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                    transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)


In [0]:

def train(model, device, train_loader, optimizer, epoch):
    model.train()
    pbar = tqdm_notebook(train_loader)
    for batch_idx, (data, target) in enumerate(pbar):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        pbar.set_description(desc= f'loss={loss.item()} batch_id={batch_idx}')
    
    train_eval(model,device,train_loader)
    
    
# The function below is to calculate the training error which will be useful when we will try to manage things like OVERFITTING.    

def train_eval(model, device, train_loader):
    model.eval()
    train_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in train_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            train_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    train_loss /= len(train_loader.dataset)

    print('\n\nTrain set: Average loss: {:.4f}, Accuracy: {}/{} ({:.3f}%)\n'.format(
        train_loss, correct, len(train_loader.dataset),
        100. * correct / len(train_loader.dataset)))
    
    
    
def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.3f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [0]:
import torch.optim.lr_scheduler
model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.83)

for epoch in range(1, 21):
    print('Epoch:', epoch,'LR:', scheduler.get_lr())
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)
    scheduler.step()








#for epoch in range(1, 21):
 #   train(model, device, train_loader, optimizer, epoch)
  #  test(model, device, test_loader)

Epoch: 1 LR: [0.01]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))





Train set: Average loss: 0.0942, Accuracy: 58346/60000 (97.243%)


Test set: Average loss: 0.0822, Accuracy: 9762/10000 (97.620%)

Epoch: 2 LR: [0.0083]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0792, Accuracy: 58594/60000 (97.657%)


Test set: Average loss: 0.0690, Accuracy: 9803/10000 (98.030%)

Epoch: 3 LR: [0.006888999999999999]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0506, Accuracy: 59116/60000 (98.527%)


Test set: Average loss: 0.0440, Accuracy: 9863/10000 (98.630%)

Epoch: 4 LR: [0.005717869999999999]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0499, Accuracy: 59092/60000 (98.487%)


Test set: Average loss: 0.0445, Accuracy: 9855/10000 (98.550%)

Epoch: 5 LR: [0.004745832099999999]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0568, Accuracy: 58979/60000 (98.298%)


Test set: Average loss: 0.0497, Accuracy: 9850/10000 (98.500%)

Epoch: 6 LR: [0.003939040642999999]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0454, Accuracy: 59186/60000 (98.643%)


Test set: Average loss: 0.0405, Accuracy: 9874/10000 (98.740%)

Epoch: 7 LR: [0.003269403733689999]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0420, Accuracy: 59257/60000 (98.762%)


Test set: Average loss: 0.0385, Accuracy: 9876/10000 (98.760%)

Epoch: 8 LR: [0.002713605098962699]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0381, Accuracy: 59322/60000 (98.870%)


Test set: Average loss: 0.0365, Accuracy: 9886/10000 (98.860%)

Epoch: 9 LR: [0.00225229223213904]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0376, Accuracy: 59312/60000 (98.853%)


Test set: Average loss: 0.0344, Accuracy: 9886/10000 (98.860%)

Epoch: 10 LR: [0.0018694025526754033]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0359, Accuracy: 59370/60000 (98.950%)


Test set: Average loss: 0.0352, Accuracy: 9895/10000 (98.950%)

Epoch: 11 LR: [0.0015516041187205846]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0338, Accuracy: 59381/60000 (98.968%)


Test set: Average loss: 0.0311, Accuracy: 9897/10000 (98.970%)

Epoch: 12 LR: [0.0012878314185380852]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0358, Accuracy: 59353/60000 (98.922%)


Test set: Average loss: 0.0336, Accuracy: 9888/10000 (98.880%)

Epoch: 13 LR: [0.0010689000773866106]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0364, Accuracy: 59331/60000 (98.885%)


Test set: Average loss: 0.0336, Accuracy: 9892/10000 (98.920%)

Epoch: 14 LR: [0.0008871870642308869]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0356, Accuracy: 59363/60000 (98.938%)


Test set: Average loss: 0.0334, Accuracy: 9891/10000 (98.910%)

Epoch: 15 LR: [0.0007363652633116361]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0351, Accuracy: 59364/60000 (98.940%)


Test set: Average loss: 0.0329, Accuracy: 9894/10000 (98.940%)

Epoch: 16 LR: [0.0006111831685486578]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0333, Accuracy: 59404/60000 (99.007%)


Test set: Average loss: 0.0321, Accuracy: 9899/10000 (98.990%)

Epoch: 17 LR: [0.0005072820298953859]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0351, Accuracy: 59356/60000 (98.927%)


Test set: Average loss: 0.0332, Accuracy: 9896/10000 (98.960%)

Epoch: 18 LR: [0.0004210440848131704]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0336, Accuracy: 59383/60000 (98.972%)


Test set: Average loss: 0.0320, Accuracy: 9900/10000 (99.000%)

Epoch: 19 LR: [0.00034946659039493134]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0343, Accuracy: 59369/60000 (98.948%)


Test set: Average loss: 0.0326, Accuracy: 9895/10000 (98.950%)

Epoch: 20 LR: [0.000290057270027793]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0349, Accuracy: 59368/60000 (98.947%)


Test set: Average loss: 0.0331, Accuracy: 9893/10000 (98.930%)



In [0]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        self.conv1 = nn.Conv2d(1, 8, 3, padding=1,bias = False)
        self.batch_1 = nn.BatchNorm2d(8)
        self.drop_1 = nn.Dropout(0.25)
        self.pool1 = nn.MaxPool2d(2, 2)
        
        self.conv2 = nn.Conv2d(8, 16, 3, padding=1,bias = False)
        self.batch_2 = nn.BatchNorm2d(16)
        self.drop_2 = nn.Dropout(0.25)
        self.pool2 = nn.MaxPool2d(2, 2)
        
        #self.batch_1 = nn.BatchNorm2d(16)
        self.conv3 = nn.Conv2d(16, 16, 3, padding=1,bias = False)
        self.batch_3 = nn.BatchNorm2d(16)
        self.drop_3 = nn.Dropout(0.25)
        
        self.conv4 = nn.Conv2d(16,32, 3, padding=1,bias = False)
        self.batch_4 = nn.BatchNorm2d(32)
        self.drop_4 = nn.Dropout(0.25)
        
        #self.pool2 = nn.MaxPool2d(2, 2)
        #self.batch_1 = nn.BatchNorm2d(16)
        
        self.conv5 = nn.Conv2d(32, 10, 1,bias = False)
        
        #self.batch_1 = nn.BatchNorm2d(16)
        #self.conv6 = nn.Conv2d(512, 1024, 3)
        
        self.conv7 = nn.Conv2d(10,10,7,bias = False)

    def forward(self, x):
        x = self.pool1(self.drop_1((F.relu(self.batch_1(self.conv1(x))))))
                       
        x = self.pool2(self.drop_2(F.relu(self.batch_2(self.conv2(x)))))
        
        x = self.drop_3(F.relu(self.batch_3((self.conv3(x)))))
        
        x = self.drop_4(F.relu(self.batch_4((self.conv4(x)))))
        
        x = F.relu(self.conv5(x))
        
        x = self.conv7(x)
        
        x = x.view(-1, 10)
        return F.log_softmax(x)

In [0]:
!pip install torchsummary
from torchsummary import summary
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
model = Net().to(device)
summary(model, input_size=(1, 28, 28))





----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 8, 28, 28]              72
       BatchNorm2d-2            [-1, 8, 28, 28]              16
           Dropout-3            [-1, 8, 28, 28]               0
         MaxPool2d-4            [-1, 8, 14, 14]               0
            Conv2d-5           [-1, 16, 14, 14]           1,152
       BatchNorm2d-6           [-1, 16, 14, 14]              32
           Dropout-7           [-1, 16, 14, 14]               0
         MaxPool2d-8             [-1, 16, 7, 7]               0
            Conv2d-9             [-1, 16, 7, 7]           2,304
      BatchNorm2d-10             [-1, 16, 7, 7]              32
          Dropout-11             [-1, 16, 7, 7]               0
           Conv2d-12             [-1, 32, 7, 7]           4,608
      BatchNorm2d-13             [-1, 32, 7, 7]              64
          Dropout-14             [-1, 3

In [0]:


torch.manual_seed(1)
batch_size = 128

kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                    transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)


In [0]:

def train(model, device, train_loader, optimizer, epoch):
    model.train()
    pbar = tqdm_notebook(train_loader)
    for batch_idx, (data, target) in enumerate(pbar):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        pbar.set_description(desc= f'loss={loss.item()} batch_id={batch_idx}')
    
    train_eval(model,device,train_loader)
    
    
# The function below is to calculate the training error which will be useful when we will try to manage things like OVERFITTING.    

def train_eval(model, device, train_loader):
    model.eval()
    train_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in train_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            train_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    train_loss /= len(train_loader.dataset)

    print('\n\nTrain set: Average loss: {:.4f}, Accuracy: {}/{} ({:.3f}%)\n'.format(
        train_loss, correct, len(train_loader.dataset),
        100. * correct / len(train_loader.dataset)))
    
    
    
def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.3f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [0]:
import torch.optim.lr_scheduler
model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=0.5, gamma=0.83)

for epoch in range(1, 21):
    print('Epoch:', epoch,'LR:', scheduler.get_lr())
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)
    scheduler.step()








#for epoch in range(1, 21):
 #   train(model, device, train_loader, optimizer, epoch)
  #  test(model, device, test_loader)

Epoch: 1 LR: [0.01]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))





Train set: Average loss: 0.1051, Accuracy: 58174/60000 (96.957%)


Test set: Average loss: 0.0922, Accuracy: 9745/10000 (97.450%)

Epoch: 2 LR: [0.006888999999999999]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0648, Accuracy: 58891/60000 (98.152%)


Test set: Average loss: 0.0562, Accuracy: 9845/10000 (98.450%)

Epoch: 3 LR: [0.004745832099999999]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0589, Accuracy: 58951/60000 (98.252%)


Test set: Average loss: 0.0514, Accuracy: 9840/10000 (98.400%)

Epoch: 4 LR: [0.003269403733689999]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0503, Accuracy: 59099/60000 (98.498%)


Test set: Average loss: 0.0446, Accuracy: 9858/10000 (98.580%)

Epoch: 5 LR: [0.00225229223213904]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0454, Accuracy: 59225/60000 (98.708%)


Test set: Average loss: 0.0410, Accuracy: 9872/10000 (98.720%)

Epoch: 6 LR: [0.0015516041187205846]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0430, Accuracy: 59246/60000 (98.743%)


Test set: Average loss: 0.0388, Accuracy: 9876/10000 (98.760%)

Epoch: 7 LR: [0.0010689000773866106]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0426, Accuracy: 59280/60000 (98.800%)


Test set: Average loss: 0.0385, Accuracy: 9879/10000 (98.790%)

Epoch: 8 LR: [0.0007363652633116361]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0416, Accuracy: 59285/60000 (98.808%)


Test set: Average loss: 0.0378, Accuracy: 9886/10000 (98.860%)

Epoch: 9 LR: [0.0005072820298953859]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0409, Accuracy: 59307/60000 (98.845%)


Test set: Average loss: 0.0372, Accuracy: 9890/10000 (98.900%)

Epoch: 10 LR: [0.00034946659039493134]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0402, Accuracy: 59315/60000 (98.858%)


Test set: Average loss: 0.0366, Accuracy: 9890/10000 (98.900%)

Epoch: 11 LR: [0.0002407475341230682]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0398, Accuracy: 59313/60000 (98.855%)


Test set: Average loss: 0.0360, Accuracy: 9891/10000 (98.910%)

Epoch: 12 LR: [0.00016585097625738168]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0401, Accuracy: 59316/60000 (98.860%)


Test set: Average loss: 0.0362, Accuracy: 9887/10000 (98.870%)

Epoch: 13 LR: [0.00011425473754371022]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0398, Accuracy: 59316/60000 (98.860%)


Test set: Average loss: 0.0360, Accuracy: 9889/10000 (98.890%)

Epoch: 14 LR: [7.871008869386196e-05]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0399, Accuracy: 59314/60000 (98.857%)


Test set: Average loss: 0.0361, Accuracy: 9887/10000 (98.870%)

Epoch: 15 LR: [5.42233801012015e-05]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0397, Accuracy: 59321/60000 (98.868%)


Test set: Average loss: 0.0359, Accuracy: 9887/10000 (98.870%)

Epoch: 16 LR: [3.7354486551717713e-05]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0401, Accuracy: 59310/60000 (98.850%)


Test set: Average loss: 0.0361, Accuracy: 9888/10000 (98.880%)

Epoch: 17 LR: [2.5733505785478328e-05]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0398, Accuracy: 59320/60000 (98.867%)


Test set: Average loss: 0.0360, Accuracy: 9888/10000 (98.880%)

Epoch: 18 LR: [1.772781213561602e-05]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0395, Accuracy: 59323/60000 (98.872%)


Test set: Average loss: 0.0357, Accuracy: 9889/10000 (98.890%)

Epoch: 19 LR: [1.2212689780225874e-05]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0398, Accuracy: 59325/60000 (98.875%)


Test set: Average loss: 0.0360, Accuracy: 9888/10000 (98.880%)

Epoch: 20 LR: [8.413321989597605e-06]


HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0398, Accuracy: 59325/60000 (98.875%)


Test set: Average loss: 0.0359, Accuracy: 9889/10000 (98.890%)



In [0]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        self.conv1 = nn.Conv2d(1, 8, 3, padding=1,bias = False)
        self.batch_1 = nn.BatchNorm2d(8)
        self.drop_1 = nn.Dropout(0.25)
        self.pool1 = nn.MaxPool2d(2, 2)
        
        self.conv2 = nn.Conv2d(8, 16, 3, padding=1,bias = False)
        self.batch_2 = nn.BatchNorm2d(16)
        self.drop_2 = nn.Dropout(0.25)
        self.pool2 = nn.MaxPool2d(2, 2)
        
        #self.batch_1 = nn.BatchNorm2d(16)
        self.conv3 = nn.Conv2d(16, 16, 3, padding=1,bias = False)
        self.batch_3 = nn.BatchNorm2d(16)
        self.drop_3 = nn.Dropout(0.25)
        
        self.conv4 = nn.Conv2d(16,32, 3, padding=1,bias = False)
        self.batch_4 = nn.BatchNorm2d(32)
        self.drop_4 = nn.Dropout(0.25)
        
        #self.pool2 = nn.MaxPool2d(2, 2)
        #self.batch_1 = nn.BatchNorm2d(16)
        
        self.conv5 = nn.Conv2d(32,16,3, padding=1,bias = False)
        self.batch_5 = nn.BatchNorm2d(16)
        self.drop_5 = nn.Dropout(0.25)
        
        self.conv6 = nn.Conv2d(16, 10, 1,bias = False)
        
        #self.batch_1 = nn.BatchNorm2d(16)
        #self.conv6 = nn.Conv2d(512, 1024, 3)
        
        self.conv7 = nn.Conv2d(10,10,7,bias = False)

    def forward(self, x):
        x = self.pool1(self.drop_1((F.relu(self.batch_1(self.conv1(x))))))
                       
        x = self.pool2(self.drop_2(F.relu(self.batch_2(self.conv2(x)))))
        
        x = self.drop_3(F.relu(self.batch_3((self.conv3(x)))))
        
        x = self.drop_4(F.relu(self.batch_4((self.conv4(x)))))
        
        x = self.drop_5(F.relu(self.batch_5((self.conv5(x)))))
        
        x = F.relu(self.conv6(x))
        
        x = self.conv7(x)
        
        x = x.view(-1, 10)
        return F.log_softmax(x)

In [0]:
!pip install torchsummary
from torchsummary import summary
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
model = Net().to(device)
summary(model, input_size=(1, 28, 28))





----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 8, 28, 28]              72
       BatchNorm2d-2            [-1, 8, 28, 28]              16
           Dropout-3            [-1, 8, 28, 28]               0
         MaxPool2d-4            [-1, 8, 14, 14]               0
            Conv2d-5           [-1, 16, 14, 14]           1,152
       BatchNorm2d-6           [-1, 16, 14, 14]              32
           Dropout-7           [-1, 16, 14, 14]               0
         MaxPool2d-8             [-1, 16, 7, 7]               0
            Conv2d-9             [-1, 16, 7, 7]           2,304
      BatchNorm2d-10             [-1, 16, 7, 7]              32
          Dropout-11             [-1, 16, 7, 7]               0
           Conv2d-12             [-1, 32, 7, 7]           4,608
      BatchNorm2d-13             [-1, 32, 7, 7]              64
          Dropout-14             [-1, 3

In [0]:


torch.manual_seed(1)
batch_size = 128

kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                    transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)


In [0]:

def train(model, device, train_loader, optimizer, epoch):
    model.train()
    pbar = tqdm_notebook(train_loader)
    for batch_idx, (data, target) in enumerate(pbar):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        pbar.set_description(desc= f'loss={loss.item()} batch_id={batch_idx}')
    
    train_eval(model,device,train_loader)
    
    
# The function below is to calculate the training error which will be useful when we will try to manage things like OVERFITTING.    

def train_eval(model, device, train_loader):
    model.eval()
    train_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in train_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            train_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    train_loss /= len(train_loader.dataset)

    print('\n\nTrain set: Average loss: {:.4f}, Accuracy: {}/{} ({:.3f}%)\n'.format(
        train_loss, correct, len(train_loader.dataset),
        100. * correct / len(train_loader.dataset)))
    
    
    
def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.3f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [0]:
import torch.optim.lr_scheduler
model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.83)

#for epoch in range(1, 21):
 #   print('Epoch:', epoch,'LR:', scheduler.get_lr())
  #  train(model, device, train_loader, optimizer, epoch)
   # test(model, device, test_loader)
    #scheduler.step()








for epoch in range(1, 21):
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)

HBox(children=(IntProgress(value=0, max=469), HTML(value='')))





Train set: Average loss: 0.1163, Accuracy: 57741/60000 (96.235%)


Test set: Average loss: 0.0975, Accuracy: 9692/10000 (96.920%)



HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0613, Accuracy: 58940/60000 (98.233%)


Test set: Average loss: 0.0546, Accuracy: 9848/10000 (98.480%)



HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0664, Accuracy: 58720/60000 (97.867%)


Test set: Average loss: 0.0569, Accuracy: 9832/10000 (98.320%)



HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0379, Accuracy: 59340/60000 (98.900%)


Test set: Average loss: 0.0368, Accuracy: 9885/10000 (98.850%)



HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0426, Accuracy: 59206/60000 (98.677%)


Test set: Average loss: 0.0385, Accuracy: 9881/10000 (98.810%)



HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0337, Accuracy: 59421/60000 (99.035%)


Test set: Average loss: 0.0337, Accuracy: 9901/10000 (99.010%)



HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0355, Accuracy: 59371/60000 (98.952%)


Test set: Average loss: 0.0358, Accuracy: 9893/10000 (98.930%)



HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0360, Accuracy: 59391/60000 (98.985%)


Test set: Average loss: 0.0371, Accuracy: 9887/10000 (98.870%)



HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0327, Accuracy: 59439/60000 (99.065%)


Test set: Average loss: 0.0327, Accuracy: 9913/10000 (99.130%)



HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0252, Accuracy: 59528/60000 (99.213%)


Test set: Average loss: 0.0268, Accuracy: 9923/10000 (99.230%)



HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0275, Accuracy: 59486/60000 (99.143%)


Test set: Average loss: 0.0289, Accuracy: 9917/10000 (99.170%)



HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0285, Accuracy: 59484/60000 (99.140%)


Test set: Average loss: 0.0323, Accuracy: 9896/10000 (98.960%)



HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0344, Accuracy: 59436/60000 (99.060%)


Test set: Average loss: 0.0351, Accuracy: 9903/10000 (99.030%)



HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0283, Accuracy: 59502/60000 (99.170%)


Test set: Average loss: 0.0309, Accuracy: 9910/10000 (99.100%)



HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0295, Accuracy: 59487/60000 (99.145%)


Test set: Average loss: 0.0353, Accuracy: 9890/10000 (98.900%)



HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0303, Accuracy: 59510/60000 (99.183%)


Test set: Average loss: 0.0336, Accuracy: 9908/10000 (99.080%)



HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0271, Accuracy: 59505/60000 (99.175%)


Test set: Average loss: 0.0285, Accuracy: 9916/10000 (99.160%)



HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0257, Accuracy: 59548/60000 (99.247%)


Test set: Average loss: 0.0275, Accuracy: 9918/10000 (99.180%)



HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0224, Accuracy: 59625/60000 (99.375%)


Test set: Average loss: 0.0261, Accuracy: 9930/10000 (99.300%)



HBox(children=(IntProgress(value=0, max=469), HTML(value='')))



Train set: Average loss: 0.0221, Accuracy: 59660/60000 (99.433%)


Test set: Average loss: 0.0265, Accuracy: 9922/10000 (99.220%)

