In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import math
import torch.optim as optim
from torchvision.transforms import transforms
from torchvision import datasets

In [2]:
#torch.cuda.is_available()

True

In [11]:
#test = torch.randn([1,3,106,106])

In [12]:
#conv_test = nn.Conv2d(3,6,7,stride=2)

In [14]:
#conv_test.forward(test).shape

torch.Size([1, 6, 50, 50])

### Model

In [2]:
# Dense Block without Bottleneck
# Single Conv Block is BN>>Relu>>Conv PreActivation type
class DenseBlock(nn.Module):
    def __init__(self,inChannels,growth_rate):
        super(DenseBlock, self).__init__()
        self.bn1 = nn.BatchNorm2d(num_features=inChannels)
        self.conv1 = nn.Conv2d(inChannels,growth_rate,kernel_size=3,padding=1,stride=1,bias=False)
        
    def forward(self,x):
        out = self.conv1(F.relu(self.bn1(x)))
        out = torch.cat([x,out], 1) #Channel wise concatenation [n_batch,channel,*size]         
        return out

In [3]:
class TransitionLayer(nn.Module):
    def __init__(self, inChannels, outChannels):
        super(TransitionLayer, self).__init__()
        self.bn1 = nn.BatchNorm2d(num_features=inChannels)
        self.conv1 = nn.Conv2d(inChannels,outChannels, kernel_size=1,bias=False)
    
    def forward(self,x):
        out = self.conv1(F.relu(self.bn1(x)))
        out = F.avg_pool2d(out,kernel_size=2,stride=2,padding=0)
        return out

In [4]:
class DenseNet(nn.Module):
    def __init__(self, growthRate,nClasses,block_config = (6,12,24,16)):
        super(DenseNet, self).__init__()
        nChannels = 2 * growthRate
        self.conv_i1 = nn.Conv2d(3,nChannels, kernel_size=7,stride=2)
        self.bn_i1 = nn.BatchNorm2d(num_features=3)
        self.relu_i1 = nn.ReLU(inplace=True)
        self.max_pool_i1 = nn.MaxPool2d(3,stride=2,padding=1)
        
        self.dense1 = self._make_dense_block(nChannels,block_config[0],growthRate)
        nChannels += block_config[0]*growthRate
        nOutChannels = int(math.floor(nChannels // 2))
        self.trans1 = TransitionLayer(nChannels, nOutChannels )
        
        nChannels = nOutChannels
        self.dense2 = self._make_dense_block(nChannels,block_config[1],growthRate)
        nChannels += block_config[1]*growthRate
        nOutChannels = int(math.floor(nChannels // 2))
        self.trans2 = TransitionLayer(nChannels, nOutChannels )
        
        nChannels = nOutChannels
        self.dense3 = self._make_dense_block(nChannels,block_config[2],growthRate)
        nChannels += block_config[2]*growthRate
        nOutChannels = int(math.floor(nChannels // 2))
        self.trans3 = TransitionLayer(nChannels, nOutChannels )
        
        nChannels = nOutChannels
        self.dense4 = self._make_dense_block(nChannels,block_config[3],growthRate)
        nOutChannels += block_config[3]*growthRate

        
        #self.bn1 = nn.BatchNorm2d(nOutChannels)
        self.fc = nn.Linear(nOutChannels, nClasses)
        
    
    def _make_dense_block(self,inChannels,numBlocks,growthRate):
        layers = []
        for i in range(0,numBlocks):
            layers.append(DenseBlock(inChannels, growthRate))
            inChannels += growthRate
        return nn.Sequential(*layers)
    
    
    def weight_init(self):
        for i,m in enumerate(self.modules()):
            if isinstance(m,nn.Conv2d):
                nn.init.xavier_normal(m.weight)
                #nn.init.constant(m.bias, 0)
    
    def forward(self,x):
        out = self.conv_i1(self.relu_i1(self.bn_i1(x)))
        
        #print(out.shape)
        out = self.trans1(self.dense1(out))
        #print(out.shape)
        out = self.trans2(self.dense2(out))
        #print(out.shape)
        out = self.trans3(self.dense3(out))
        #print(out.shape)
        out = self.dense4(out)
        #print(out.shape)
        out = F.adaptive_avg_pool2d(out,(1,1))
        #print(out.shape)
        out = torch.flatten(out,1)
        #print(out.shape)
        out = self.fc(out)
        #print(out.shape)
        return out
        

In [5]:
model = DenseNet(12,10).cuda()

In [6]:
model

DenseNet(
  (conv_i1): Conv2d(3, 24, kernel_size=(7, 7), stride=(2, 2))
  (bn_i1): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu_i1): ReLU(inplace=True)
  (max_pool_i1): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (dense1): Sequential(
    (0): DenseBlock(
      (bn1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(24, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
    (1): DenseBlock(
      (bn1): BatchNorm2d(36, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(36, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
    (2): DenseBlock(
      (bn1): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
    (3): DenseBlock(
      (bn1): BatchNorm2d(60, e

In [7]:
model.weight_init()



In [8]:
from torchsummary import summary
summary(model.cuda(), input_size=(3,32,32))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
       BatchNorm2d-1            [-1, 3, 32, 32]               6
              ReLU-2            [-1, 3, 32, 32]               0
            Conv2d-3           [-1, 24, 13, 13]           3,552
       BatchNorm2d-4           [-1, 24, 13, 13]              48
            Conv2d-5           [-1, 12, 13, 13]           2,592
        DenseBlock-6           [-1, 36, 13, 13]               0
       BatchNorm2d-7           [-1, 36, 13, 13]              72
            Conv2d-8           [-1, 12, 13, 13]           3,888
        DenseBlock-9           [-1, 48, 13, 13]               0
      BatchNorm2d-10           [-1, 48, 13, 13]              96
           Conv2d-11           [-1, 12, 13, 13]           5,184
       DenseBlock-12           [-1, 60, 13, 13]               0
      BatchNorm2d-13           [-1, 60, 13, 13]             120
           Conv2d-14           [-1, 12,

### Dataset

In [9]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = datasets.CIFAR10(root='D:\Work\DL_learn\Datasets', train=True,
                                        download=True, transform=transform)

testset = datasets.CIFAR10(root='D:\Work\DL_learn\Datasets', train=False,
                                       download=True, transform=transform)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Files already downloaded and verified
Files already downloaded and verified


In [10]:
batch_size = 10
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=2)

testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=2)

## Hyper Params and Loss

In [11]:
momentum = 0.95
learning_rate = 0.001
nr_classes = 10
num_epochs = 150
loss_vctr = []

# Oprimization Criteria and Optimization method
criterion = nn.CrossEntropyLoss().cuda()
optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum, nesterov = False)

In [12]:
from tensorboardX import SummaryWriter
writer = SummaryWriter('runs_densenetcifar10')

In [13]:
# check
img,lab = next(iter(trainloader))
print(model(img.cuda()).shape)

torch.Size([10, 10])


### Training

In [14]:
@torch.no_grad()
def val(testloader,model):
    correct = 0
    total = 0
    for data in testloader:
        images, labels = data
        outputs = model(images.cuda())
        _, predicted = torch.max(outputs.cpu().data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum()
        return (100 * correct / total)

In [None]:
niter = 0
for epoch in range(num_epochs):
    for i,(images,labels) in enumerate(trainloader):
        images = images.cuda()
        labels =labels.cuda()
        outputs = model(images)
        optimizer.zero_grad()
        loss = criterion(outputs,labels)
        loss.backward()
        optimizer.step()
        del images,labels,outputs
        if i%100 == 0:
            print('Epoch [%d/%d], Step [%d/%d], Loss = %.4f' %(epoch+1, num_epochs, i+1, len(trainset)//batch_size, loss.data))
            writer.add_scalar('Train/Loss', loss.data, niter)
            niter += 100
    acc = val(testloader,model)
    print('Epoch [%d/%d], Acc = %.4f' %(epoch + 1, num_epochs, acc))
    writer.add_scalar('Val/Acc', acc, epoch + 1)
    torch.save(model.state_dict(),'densenet_checkpoint.pth')

Epoch [1/150], Step [1/5000], Loss = 2.2603
Epoch [1/150], Step [101/5000], Loss = 2.1505
Epoch [1/150], Step [201/5000], Loss = 1.8400
Epoch [1/150], Step [301/5000], Loss = 1.9526
Epoch [1/150], Step [401/5000], Loss = 1.5498
Epoch [1/150], Step [501/5000], Loss = 1.5303
Epoch [1/150], Step [601/5000], Loss = 2.2121
Epoch [1/150], Step [701/5000], Loss = 1.5430
Epoch [1/150], Step [801/5000], Loss = 2.0853
Epoch [1/150], Step [901/5000], Loss = 1.6663
Epoch [1/150], Step [1001/5000], Loss = 1.9463
Epoch [1/150], Step [1101/5000], Loss = 1.9798
Epoch [1/150], Step [1201/5000], Loss = 1.6013
Epoch [1/150], Step [1301/5000], Loss = 1.5902
Epoch [1/150], Step [1401/5000], Loss = 1.9786
Epoch [1/150], Step [1501/5000], Loss = 1.6033
Epoch [1/150], Step [1601/5000], Loss = 2.0231
Epoch [1/150], Step [1701/5000], Loss = 2.1045
Epoch [1/150], Step [1801/5000], Loss = 1.3183
Epoch [1/150], Step [1901/5000], Loss = 1.2828
Epoch [1/150], Step [2001/5000], Loss = 1.2455
Epoch [1/150], Step [2101

Epoch [4/150], Step [2401/5000], Loss = 0.8739
Epoch [4/150], Step [2501/5000], Loss = 0.9294
Epoch [4/150], Step [2601/5000], Loss = 0.7423
Epoch [4/150], Step [2701/5000], Loss = 1.2037
Epoch [4/150], Step [2801/5000], Loss = 0.7492
Epoch [4/150], Step [2901/5000], Loss = 1.1895
Epoch [4/150], Step [3001/5000], Loss = 0.6321
Epoch [4/150], Step [3101/5000], Loss = 0.5908
Epoch [4/150], Step [3201/5000], Loss = 0.6837
Epoch [4/150], Step [3301/5000], Loss = 1.2573
Epoch [4/150], Step [3401/5000], Loss = 1.2678
Epoch [4/150], Step [3501/5000], Loss = 0.7981
Epoch [4/150], Step [3601/5000], Loss = 0.7928
Epoch [4/150], Step [3701/5000], Loss = 0.4307
Epoch [4/150], Step [3801/5000], Loss = 0.6854
Epoch [4/150], Step [3901/5000], Loss = 1.5771
Epoch [4/150], Step [4001/5000], Loss = 0.4243
Epoch [4/150], Step [4101/5000], Loss = 0.3415
Epoch [4/150], Step [4201/5000], Loss = 1.1349
Epoch [4/150], Step [4301/5000], Loss = 1.2291
Epoch [4/150], Step [4401/5000], Loss = 0.6964
Epoch [4/150]

Epoch [7/150], Step [4801/5000], Loss = 0.3396
Epoch [7/150], Step [4901/5000], Loss = 0.4436
Epoch [7/150], Acc = 70.0000
Epoch [8/150], Step [1/5000], Loss = 0.3351
Epoch [8/150], Step [101/5000], Loss = 0.4596
Epoch [8/150], Step [201/5000], Loss = 0.5497
Epoch [8/150], Step [301/5000], Loss = 0.2926
Epoch [8/150], Step [401/5000], Loss = 0.3153
Epoch [8/150], Step [501/5000], Loss = 0.4674
Epoch [8/150], Step [601/5000], Loss = 0.3758
Epoch [8/150], Step [701/5000], Loss = 0.3520
Epoch [8/150], Step [801/5000], Loss = 0.2146
Epoch [8/150], Step [901/5000], Loss = 0.2455
Epoch [8/150], Step [1001/5000], Loss = 0.2790
Epoch [8/150], Step [1101/5000], Loss = 0.4921
Epoch [8/150], Step [1201/5000], Loss = 0.1399
Epoch [8/150], Step [1301/5000], Loss = 0.2894
Epoch [8/150], Step [1401/5000], Loss = 0.6084
Epoch [8/150], Step [1501/5000], Loss = 0.8095
Epoch [8/150], Step [1601/5000], Loss = 0.2862
Epoch [8/150], Step [1701/5000], Loss = 0.7234
Epoch [8/150], Step [1801/5000], Loss = 0.6

Epoch [11/150], Step [2001/5000], Loss = 0.2297
Epoch [11/150], Step [2101/5000], Loss = 0.2077
Epoch [11/150], Step [2201/5000], Loss = 0.2226
Epoch [11/150], Step [2301/5000], Loss = 0.0409
Epoch [11/150], Step [2401/5000], Loss = 0.3865
Epoch [11/150], Step [2501/5000], Loss = 0.4849
Epoch [11/150], Step [2601/5000], Loss = 0.1403
Epoch [11/150], Step [2701/5000], Loss = 0.1898
Epoch [11/150], Step [2801/5000], Loss = 0.1653
Epoch [11/150], Step [2901/5000], Loss = 0.1103
Epoch [11/150], Step [3001/5000], Loss = 0.6431
Epoch [11/150], Step [3101/5000], Loss = 0.3356
Epoch [11/150], Step [3201/5000], Loss = 0.7917
Epoch [11/150], Step [3301/5000], Loss = 0.0425
Epoch [11/150], Step [3401/5000], Loss = 0.3765
Epoch [11/150], Step [3501/5000], Loss = 0.0068
Epoch [11/150], Step [3601/5000], Loss = 0.3831
Epoch [11/150], Step [3701/5000], Loss = 0.1550
Epoch [11/150], Step [3801/5000], Loss = 0.6547
Epoch [11/150], Step [3901/5000], Loss = 0.3512
Epoch [11/150], Step [4001/5000], Loss =

Epoch [14/150], Step [4001/5000], Loss = 0.5348
Epoch [14/150], Step [4101/5000], Loss = 0.1145
Epoch [14/150], Step [4201/5000], Loss = 0.1189
Epoch [14/150], Step [4301/5000], Loss = 0.0440
Epoch [14/150], Step [4401/5000], Loss = 0.0502
Epoch [14/150], Step [4501/5000], Loss = 0.1077
Epoch [14/150], Step [4601/5000], Loss = 0.2574
Epoch [14/150], Step [4701/5000], Loss = 0.0408
Epoch [14/150], Step [4801/5000], Loss = 0.5160
Epoch [14/150], Step [4901/5000], Loss = 0.6352
Epoch [14/150], Acc = 50.0000
Epoch [15/150], Step [1/5000], Loss = 0.2026
Epoch [15/150], Step [101/5000], Loss = 0.2444
Epoch [15/150], Step [201/5000], Loss = 0.0930
Epoch [15/150], Step [301/5000], Loss = 0.0731
Epoch [15/150], Step [401/5000], Loss = 0.2111
Epoch [15/150], Step [501/5000], Loss = 0.0167
Epoch [15/150], Step [601/5000], Loss = 0.1211
Epoch [15/150], Step [701/5000], Loss = 0.1651
Epoch [15/150], Step [801/5000], Loss = 0.0135
Epoch [15/150], Step [901/5000], Loss = 0.0906
Epoch [15/150], Step [

Epoch [18/150], Step [1001/5000], Loss = 0.0093
Epoch [18/150], Step [1101/5000], Loss = 0.1543
Epoch [18/150], Step [1201/5000], Loss = 0.2673
Epoch [18/150], Step [1301/5000], Loss = 0.2011
Epoch [18/150], Step [1401/5000], Loss = 0.1680
Epoch [18/150], Step [1501/5000], Loss = 0.2071
Epoch [18/150], Step [1601/5000], Loss = 0.2157
Epoch [18/150], Step [1701/5000], Loss = 0.0053
Epoch [18/150], Step [1801/5000], Loss = 0.0838
Epoch [18/150], Step [1901/5000], Loss = 0.1903
Epoch [18/150], Step [2001/5000], Loss = 0.3356
Epoch [18/150], Step [2101/5000], Loss = 0.0265
Epoch [18/150], Step [2201/5000], Loss = 0.2226
Epoch [18/150], Step [2301/5000], Loss = 0.4436
Epoch [18/150], Step [2401/5000], Loss = 0.2290
Epoch [18/150], Step [2501/5000], Loss = 0.3183
Epoch [18/150], Step [2601/5000], Loss = 0.1212
Epoch [18/150], Step [2701/5000], Loss = 0.0807
Epoch [18/150], Step [2801/5000], Loss = 0.1022
Epoch [18/150], Step [2901/5000], Loss = 0.0066
Epoch [18/150], Step [3001/5000], Loss =

Epoch [21/150], Step [3001/5000], Loss = 0.0759
Epoch [21/150], Step [3101/5000], Loss = 0.0133
Epoch [21/150], Step [3201/5000], Loss = 0.0150
Epoch [21/150], Step [3301/5000], Loss = 0.0100
Epoch [21/150], Step [3401/5000], Loss = 0.3636
Epoch [21/150], Step [3501/5000], Loss = 0.0141
Epoch [21/150], Step [3601/5000], Loss = 0.0215
Epoch [21/150], Step [3701/5000], Loss = 0.0337
Epoch [21/150], Step [3801/5000], Loss = 0.1005
Epoch [21/150], Step [3901/5000], Loss = 0.0384
Epoch [21/150], Step [4001/5000], Loss = 0.0782
Epoch [21/150], Step [4101/5000], Loss = 0.0107
Epoch [21/150], Step [4201/5000], Loss = 0.0712
Epoch [21/150], Step [4301/5000], Loss = 0.0113
Epoch [21/150], Step [4401/5000], Loss = 0.5995
Epoch [21/150], Step [4501/5000], Loss = 0.0062
Epoch [21/150], Step [4601/5000], Loss = 0.0610
Epoch [21/150], Step [4701/5000], Loss = 0.0991
Epoch [21/150], Step [4801/5000], Loss = 0.0452
Epoch [21/150], Step [4901/5000], Loss = 0.0188
Epoch [21/150], Acc = 90.0000
Epoch [22/

Epoch [24/150], Acc = 50.0000
Epoch [25/150], Step [1/5000], Loss = 0.0067
Epoch [25/150], Step [101/5000], Loss = 0.0215
Epoch [25/150], Step [201/5000], Loss = 0.1485
Epoch [25/150], Step [301/5000], Loss = 0.5046
Epoch [25/150], Step [401/5000], Loss = 0.0401
Epoch [25/150], Step [501/5000], Loss = 0.2469
Epoch [25/150], Step [601/5000], Loss = 0.0162
Epoch [25/150], Step [701/5000], Loss = 0.4334
Epoch [25/150], Step [801/5000], Loss = 0.0098
Epoch [25/150], Step [901/5000], Loss = 0.0914
Epoch [25/150], Step [1001/5000], Loss = 0.0024
Epoch [25/150], Step [1101/5000], Loss = 0.0164
Epoch [25/150], Step [1201/5000], Loss = 0.3268
Epoch [25/150], Step [1301/5000], Loss = 0.3238
Epoch [25/150], Step [1401/5000], Loss = 0.1793
Epoch [25/150], Step [1501/5000], Loss = 0.0171
Epoch [25/150], Step [1601/5000], Loss = 0.0091
Epoch [25/150], Step [1701/5000], Loss = 0.0784
Epoch [25/150], Step [1801/5000], Loss = 0.0402
Epoch [25/150], Step [1901/5000], Loss = 0.0367
Epoch [25/150], Step [

Epoch [28/150], Step [2001/5000], Loss = 0.0021
Epoch [28/150], Step [2101/5000], Loss = 0.0148
Epoch [28/150], Step [2201/5000], Loss = 0.0060
Epoch [28/150], Step [2301/5000], Loss = 0.0055
Epoch [28/150], Step [2401/5000], Loss = 0.0025
Epoch [28/150], Step [2501/5000], Loss = 0.1697
Epoch [28/150], Step [2601/5000], Loss = 0.2942
Epoch [28/150], Step [2701/5000], Loss = 0.0001
Epoch [28/150], Step [2801/5000], Loss = 0.4689
Epoch [28/150], Step [2901/5000], Loss = 0.0080
Epoch [28/150], Step [3001/5000], Loss = 0.0006
Epoch [28/150], Step [3101/5000], Loss = 0.0076
Epoch [28/150], Step [3201/5000], Loss = 0.0430
Epoch [28/150], Step [3301/5000], Loss = 0.0049
Epoch [28/150], Step [3401/5000], Loss = 0.2287
Epoch [28/150], Step [3501/5000], Loss = 0.0280
Epoch [28/150], Step [3601/5000], Loss = 0.0126
Epoch [28/150], Step [3701/5000], Loss = 0.0056
Epoch [28/150], Step [3801/5000], Loss = 0.0054
Epoch [28/150], Step [3901/5000], Loss = 0.0026
Epoch [28/150], Step [4001/5000], Loss =

Epoch [31/150], Step [4001/5000], Loss = 0.0015
Epoch [31/150], Step [4101/5000], Loss = 0.0064
Epoch [31/150], Step [4201/5000], Loss = 0.1766
Epoch [31/150], Step [4301/5000], Loss = 0.1498
Epoch [31/150], Step [4401/5000], Loss = 0.0244
Epoch [31/150], Step [4501/5000], Loss = 0.0057
Epoch [31/150], Step [4601/5000], Loss = 0.0002
Epoch [31/150], Step [4701/5000], Loss = 0.0028
Epoch [31/150], Step [4801/5000], Loss = 0.2050
Epoch [31/150], Step [4901/5000], Loss = 0.0928
Epoch [31/150], Acc = 50.0000
Epoch [32/150], Step [1/5000], Loss = 0.0590
Epoch [32/150], Step [101/5000], Loss = 0.0070
Epoch [32/150], Step [201/5000], Loss = 0.0002
Epoch [32/150], Step [301/5000], Loss = 0.0074
Epoch [32/150], Step [401/5000], Loss = 0.0500
Epoch [32/150], Step [501/5000], Loss = 0.0116
Epoch [32/150], Step [601/5000], Loss = 0.0469
Epoch [32/150], Step [701/5000], Loss = 0.0830
Epoch [32/150], Step [801/5000], Loss = 0.0056
Epoch [32/150], Step [901/5000], Loss = 0.2796
Epoch [32/150], Step [

Epoch [35/150], Step [1001/5000], Loss = 0.2001
Epoch [35/150], Step [1101/5000], Loss = 0.0802
Epoch [35/150], Step [1201/5000], Loss = 0.0119
Epoch [35/150], Step [1301/5000], Loss = 0.0004
Epoch [35/150], Step [1401/5000], Loss = 0.0032
Epoch [35/150], Step [1501/5000], Loss = 0.1289
Epoch [35/150], Step [1601/5000], Loss = 0.0055
Epoch [35/150], Step [1701/5000], Loss = 0.0013
Epoch [35/150], Step [1801/5000], Loss = 0.0079
Epoch [35/150], Step [1901/5000], Loss = 0.0083
Epoch [35/150], Step [2001/5000], Loss = 0.0002
Epoch [35/150], Step [2101/5000], Loss = 0.0174
Epoch [35/150], Step [2201/5000], Loss = 0.0934
Epoch [35/150], Step [2301/5000], Loss = 0.0488
Epoch [35/150], Step [2401/5000], Loss = 0.1212
Epoch [35/150], Step [2501/5000], Loss = 0.0011
Epoch [35/150], Step [2601/5000], Loss = 0.0093
Epoch [35/150], Step [2701/5000], Loss = 0.0020
Epoch [35/150], Step [2801/5000], Loss = 0.0009
Epoch [35/150], Step [2901/5000], Loss = 0.1196
Epoch [35/150], Step [3001/5000], Loss =

Epoch [38/150], Step [3001/5000], Loss = 0.0022
Epoch [38/150], Step [3101/5000], Loss = 0.0018
Epoch [38/150], Step [3201/5000], Loss = 0.0126
Epoch [38/150], Step [3301/5000], Loss = 0.0023
Epoch [38/150], Step [3401/5000], Loss = 0.1032
Epoch [38/150], Step [3501/5000], Loss = 0.0089
Epoch [38/150], Step [3601/5000], Loss = 0.1881
Epoch [38/150], Step [3701/5000], Loss = 0.0020
Epoch [38/150], Step [3801/5000], Loss = 0.0005
Epoch [38/150], Step [3901/5000], Loss = 0.0354
Epoch [38/150], Step [4001/5000], Loss = 0.0007
Epoch [38/150], Step [4101/5000], Loss = 0.0004
Epoch [38/150], Step [4201/5000], Loss = 0.1726
Epoch [38/150], Step [4301/5000], Loss = 0.0240
Epoch [38/150], Step [4401/5000], Loss = 0.2254
Epoch [38/150], Step [4501/5000], Loss = 0.0460
Epoch [38/150], Step [4601/5000], Loss = 0.0028
Epoch [38/150], Step [4701/5000], Loss = 0.0000
Epoch [38/150], Step [4801/5000], Loss = 0.0008
Epoch [38/150], Step [4901/5000], Loss = 0.5162
Epoch [38/150], Acc = 60.0000
Epoch [39/

Epoch [41/150], Acc = 70.0000
Epoch [42/150], Step [1/5000], Loss = 0.0000
Epoch [42/150], Step [101/5000], Loss = 0.0179
Epoch [42/150], Step [201/5000], Loss = 0.0005
Epoch [42/150], Step [301/5000], Loss = 0.0020
Epoch [42/150], Step [401/5000], Loss = 0.0786
Epoch [42/150], Step [501/5000], Loss = 0.0055
Epoch [42/150], Step [601/5000], Loss = 0.1446
Epoch [42/150], Step [701/5000], Loss = 0.0001
Epoch [42/150], Step [801/5000], Loss = 0.0168
Epoch [42/150], Step [901/5000], Loss = 0.0001
Epoch [42/150], Step [1001/5000], Loss = 0.0024
Epoch [42/150], Step [1101/5000], Loss = 0.0041
Epoch [42/150], Step [1201/5000], Loss = 0.0248
Epoch [42/150], Step [1301/5000], Loss = 0.0011
Epoch [42/150], Step [1401/5000], Loss = 0.0708
Epoch [42/150], Step [1501/5000], Loss = 0.0181
Epoch [42/150], Step [1601/5000], Loss = 0.0075
Epoch [42/150], Step [1701/5000], Loss = 0.0020
Epoch [42/150], Step [1801/5000], Loss = 0.0030
Epoch [42/150], Step [1901/5000], Loss = 0.0021
Epoch [42/150], Step [

Epoch [45/150], Step [2001/5000], Loss = 0.0003
Epoch [45/150], Step [2101/5000], Loss = 0.0035
Epoch [45/150], Step [2201/5000], Loss = 0.0009
Epoch [45/150], Step [2301/5000], Loss = 0.0107
Epoch [45/150], Step [2401/5000], Loss = 0.0006
Epoch [45/150], Step [2501/5000], Loss = 0.0009
Epoch [45/150], Step [2601/5000], Loss = 0.0010
Epoch [45/150], Step [2701/5000], Loss = 0.0057
Epoch [45/150], Step [2801/5000], Loss = 0.0004
Epoch [45/150], Step [2901/5000], Loss = 0.0032
Epoch [45/150], Step [3001/5000], Loss = 0.0002
Epoch [45/150], Step [3101/5000], Loss = 0.0006
Epoch [45/150], Step [3201/5000], Loss = 0.0041
Epoch [45/150], Step [3301/5000], Loss = 0.0000
Epoch [45/150], Step [3401/5000], Loss = 0.0025
Epoch [45/150], Step [3501/5000], Loss = 0.0000
Epoch [45/150], Step [3601/5000], Loss = 0.0004
Epoch [45/150], Step [3701/5000], Loss = 0.0004
Epoch [45/150], Step [3801/5000], Loss = 0.0387
Epoch [45/150], Step [3901/5000], Loss = 0.0049
Epoch [45/150], Step [4001/5000], Loss =

Epoch [48/150], Step [4001/5000], Loss = 0.0019
Epoch [48/150], Step [4101/5000], Loss = 0.0010
Epoch [48/150], Step [4201/5000], Loss = 0.0014
Epoch [48/150], Step [4301/5000], Loss = 0.0036
Epoch [48/150], Step [4401/5000], Loss = 0.0054
Epoch [48/150], Step [4501/5000], Loss = 0.0012
Epoch [48/150], Step [4601/5000], Loss = 0.0013
Epoch [48/150], Step [4701/5000], Loss = 0.0146
Epoch [48/150], Step [4801/5000], Loss = 0.0007
Epoch [48/150], Step [4901/5000], Loss = 0.0041
Epoch [48/150], Acc = 60.0000
Epoch [49/150], Step [1/5000], Loss = 0.0062
Epoch [49/150], Step [101/5000], Loss = 0.0002
Epoch [49/150], Step [201/5000], Loss = 0.0031
Epoch [49/150], Step [301/5000], Loss = 0.0046
Epoch [49/150], Step [401/5000], Loss = 0.0018
Epoch [49/150], Step [501/5000], Loss = 0.0021
Epoch [49/150], Step [601/5000], Loss = 0.0063
Epoch [49/150], Step [701/5000], Loss = 0.0006
Epoch [49/150], Step [801/5000], Loss = 0.2104
Epoch [49/150], Step [901/5000], Loss = 0.0010
Epoch [49/150], Step [

Epoch [52/150], Step [1001/5000], Loss = 0.0004
Epoch [52/150], Step [1101/5000], Loss = 0.0012
Epoch [52/150], Step [1201/5000], Loss = 0.0574
Epoch [52/150], Step [1301/5000], Loss = 0.0005
Epoch [52/150], Step [1401/5000], Loss = 0.0003
Epoch [52/150], Step [1501/5000], Loss = 0.0005
Epoch [52/150], Step [1601/5000], Loss = 0.0000
Epoch [52/150], Step [1701/5000], Loss = 0.0012
Epoch [52/150], Step [1801/5000], Loss = 0.0473
Epoch [52/150], Step [1901/5000], Loss = 0.0019
Epoch [52/150], Step [2001/5000], Loss = 0.0016
Epoch [52/150], Step [2101/5000], Loss = 0.1006
Epoch [52/150], Step [2201/5000], Loss = 0.0005
Epoch [52/150], Step [2301/5000], Loss = 0.0007
Epoch [52/150], Step [2401/5000], Loss = 0.0010
Epoch [52/150], Step [2501/5000], Loss = 0.0092
Epoch [52/150], Step [2601/5000], Loss = 0.0003
Epoch [52/150], Step [2701/5000], Loss = 0.0030
Epoch [52/150], Step [2801/5000], Loss = 0.0778
Epoch [52/150], Step [2901/5000], Loss = 0.0012
Epoch [52/150], Step [3001/5000], Loss =

Epoch [55/150], Step [3001/5000], Loss = 0.0000
Epoch [55/150], Step [3101/5000], Loss = 0.0106
Epoch [55/150], Step [3201/5000], Loss = 0.0355
Epoch [55/150], Step [3301/5000], Loss = 0.0228
Epoch [55/150], Step [3401/5000], Loss = 0.0004
Epoch [55/150], Step [3501/5000], Loss = 0.0001
Epoch [55/150], Step [3601/5000], Loss = 0.0445
Epoch [55/150], Step [3701/5000], Loss = 0.0103
Epoch [55/150], Step [3801/5000], Loss = 0.0013
Epoch [55/150], Step [3901/5000], Loss = 0.0006
Epoch [55/150], Step [4001/5000], Loss = 0.0011
Epoch [55/150], Step [4101/5000], Loss = 0.0411
Epoch [55/150], Step [4201/5000], Loss = 0.0001
Epoch [55/150], Step [4301/5000], Loss = 0.0000
Epoch [55/150], Step [4401/5000], Loss = 0.0004
Epoch [55/150], Step [4501/5000], Loss = 0.0005
Epoch [55/150], Step [4601/5000], Loss = 0.1574
Epoch [55/150], Step [4701/5000], Loss = 0.0580
Epoch [55/150], Step [4801/5000], Loss = 0.0023
Epoch [55/150], Step [4901/5000], Loss = 0.0015
Epoch [55/150], Acc = 90.0000
Epoch [56/

Epoch [58/150], Acc = 80.0000
Epoch [59/150], Step [1/5000], Loss = 0.0064
Epoch [59/150], Step [101/5000], Loss = 0.0088
Epoch [59/150], Step [201/5000], Loss = 0.0201
Epoch [59/150], Step [301/5000], Loss = 0.0116
Epoch [59/150], Step [401/5000], Loss = 0.0001
Epoch [59/150], Step [501/5000], Loss = 0.0009
Epoch [59/150], Step [601/5000], Loss = 0.0202
Epoch [59/150], Step [701/5000], Loss = 0.0000
Epoch [59/150], Step [801/5000], Loss = 0.0017
Epoch [59/150], Step [901/5000], Loss = 0.0025
Epoch [59/150], Step [1001/5000], Loss = 0.2136
Epoch [59/150], Step [1101/5000], Loss = 0.0046
Epoch [59/150], Step [1201/5000], Loss = 0.0010
Epoch [59/150], Step [1301/5000], Loss = 0.0003
Epoch [59/150], Step [1401/5000], Loss = 0.0128
Epoch [59/150], Step [1501/5000], Loss = 0.0000
Epoch [59/150], Step [1601/5000], Loss = 0.0097
Epoch [59/150], Step [1701/5000], Loss = 0.0077
Epoch [59/150], Step [1801/5000], Loss = 0.0004
Epoch [59/150], Step [1901/5000], Loss = 0.0000
Epoch [59/150], Step [

Epoch [62/150], Step [2001/5000], Loss = 0.0000
Epoch [62/150], Step [2101/5000], Loss = 0.0010
Epoch [62/150], Step [2201/5000], Loss = 0.0000
Epoch [62/150], Step [2301/5000], Loss = 0.0005
Epoch [62/150], Step [2401/5000], Loss = 0.0008
Epoch [62/150], Step [2501/5000], Loss = 0.0090
Epoch [62/150], Step [2601/5000], Loss = 0.2726
Epoch [62/150], Step [2701/5000], Loss = 0.0010
Epoch [62/150], Step [2801/5000], Loss = 0.0010
Epoch [62/150], Step [2901/5000], Loss = 0.0052
Epoch [62/150], Step [3001/5000], Loss = 0.0071
Epoch [62/150], Step [3101/5000], Loss = 0.0001
Epoch [62/150], Step [3201/5000], Loss = 0.0019
Epoch [62/150], Step [3301/5000], Loss = 0.0005
Epoch [62/150], Step [3401/5000], Loss = 0.0000
Epoch [62/150], Step [3501/5000], Loss = 0.0022
Epoch [62/150], Step [3601/5000], Loss = 0.0014
Epoch [62/150], Step [3701/5000], Loss = 0.5525
Epoch [62/150], Step [3801/5000], Loss = 0.0673
Epoch [62/150], Step [3901/5000], Loss = 0.0007
Epoch [62/150], Step [4001/5000], Loss =

Accuracy of the network on the 10000 test images: 74 %
