<a href="https://colab.research.google.com/github/Jayesh-CSE/Data-independent-neural-pruning-via-coresets/blob/main/Data_independent_pruning_MLP%2B%20LaNET%20300-100_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Model define and Train and Prepare coreset and prune first hidden layer

In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
# import torch.nn.utils.prune as prune
from typing import Callable, Tuple, Union
import sys
from matplotlib import pyplot as plt

In [2]:
#cd /content/drive/MyDrive/Data Independent Pruning Coreset

In [3]:
batch_size = 128

transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.13066), (0.30810))])

trainset = torchvision.datasets.MNIST(root='/content/drive/MyDrive/Data Independent Pruning Coreset/data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=False, num_workers=2)

testset = torchvision.datasets.MNIST(root='/content/drive/MyDrive/Data Independent Pruning Coreset/data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=2)

classes = [str(i) for i in range(10)]

  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


In [4]:
class Net(nn.Module):
    def __init__(self, in_size=28*28, num_n1=10000, num_n2 = 5000, cache_activation=False):
        super().__init__()
        self.fc1 = nn.Linear(in_size, num_n1)
        self.fc2 = nn.Linear(num_n1, num_n2)
        self.fc3 = nn.Linear(num_n2, 10)
        self.cache_activation = cache_activation

    def forward(self, x):
        x0 = x.view(x.size(0), -1)
        x1 = F.relu(self.fc1(x0))
        x2 = F.relu(self.fc2(x1))
        x3 = self.fc3(x2)
        
        if self.cache_activation:
            return x1, x2, x3
        else:
            return x3

In [5]:
def train(epochs, lr=0.01):
    
    #criterion = nn.CrossEntropyLoss().cuda()
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9)
    
    for ep in range(epochs):
        for (x, y) in list(trainloader):
            
            optimizer.zero_grad()
           #output = model(x.cuda(0, non_blocking=True))
            output = model(x)
            if model.cache_activation:
                output = output[-1]
            #ls = criterion(output, y.cuda(0, non_blocking=True))
            ls = criterion(output, y)
            ls.backward()
            optimizer.step()
            
        print(ep+1, validate(testloader, model))

In [6]:
def validate(loader, model):
    
    model.eval()
    acc = 0
    n = 0
    
    with torch.no_grad():
        for i, (inp, target) in enumerate(loader):
            #output = model(inp.cuda(non_blocking=True))
            output = model(inp)
            if model.cache_activation:
                output = output[-1]
            _, pred = torch.max(output, 1)
            #acc += (pred == target.cuda(non_blocking=True)).sum().item()
            acc += (pred == target).sum().item()
            n += len(target)
            
    acc = acc/n
    return acc

In [7]:
num_n1 = int(1e4)
num_n2 = int(5e3)
model = Net(28*28, num_n1, num_n2, True)
#train(10, 0.01)

# torch.cuda.set_device()
# torch.save(model.state_dict(), '/content/drive/MyDrive/Data Independent Pruning Coreset/v_fc_{}_{}'.format(num_n1, num_n2))
model.load_state_dict(torch.load('/content/drive/MyDrive/Data Independent Pruning Coreset/v_fc_{}_{}'.format(num_n1, num_n2)))
#model = model.cuda()

<All keys matched successfully>

In [8]:
torch.save(model.state_dict(), '/content/drive/MyDrive/Data Independent Pruning Coreset/v_fc_{}_{}'.format(num_n1, num_n2))

In [9]:
print('Epoch', 0, 'Neurons', num_n1, num_n2)
print('Test acc', validate(testloader, model))

Epoch 0 Neurons 10000 5000
Test acc 0.982


In [10]:
print(model)

Net(
  (fc1): Linear(in_features=784, out_features=10000, bias=True)
  (fc2): Linear(in_features=10000, out_features=5000, bias=True)
  (fc3): Linear(in_features=5000, out_features=10, bias=True)
)


In [11]:
class Coreset:
    def __init__(self, points, weights, activation_function: Callable, upper_bound: int = 1):
        assert points.shape[0] == weights.shape[0]

        self.__points = points.cpu()
        self.__weights = weights.cpu()
        self.__activation = activation_function
        self.__beta = upper_bound
        self.__sensitivity = None
        self.indices = None

    @property
    def sensitivity(self):
        if self.__sensitivity is None:
            points_norm = self.__points.norm(dim=1)
            assert points_norm.shape[0] == self.__points.shape[0]
            weights = torch.abs(self.__weights).max(dim=1)[0]  # max returns (values, indices)
            assert weights.shape[0] == self.__points.shape[0]
            #print(len(self.__beta))
            #print(points_norm.shape)
            #print(weights.shape)
            self.__sensitivity = weights * torch.abs(self.__activation(self.__beta * points_norm))
            self.__sensitivity /= self.__sensitivity.sum()

        return self.__sensitivity

    def compute_coreset(self, coreset_size):
        assert coreset_size <= self.__points.shape[0]
        prob = self.sensitivity.cpu().detach().numpy()

        indices = set()
        idxs = []

        cnt = 0
        while len(indices) < coreset_size:
            i = np.random.choice(a=self.__points.shape[0], size=1, p=prob).tolist()[0]
            idxs.append(i)
            indices.add(i)
            cnt += 1

        hist = np.histogram(idxs, bins=range(self.__points.shape[0] + 1))[0].flatten()
        idxs = np.nonzero(hist)[0]
        self.indices = idxs
        coreset = self.__points[idxs, :]

        weights = (self.__weights[idxs].t() * torch.tensor(hist[idxs]).float()).t()
        weights = (weights.t() / (torch.tensor(prob[idxs]) * cnt)).t()

        return coreset, weights

In [12]:
def compress_fc_layer(layer1: Tuple[torch.Tensor, torch.Tensor],
                      layer2: Tuple[torch.Tensor, torch.Tensor],
                      compressed_size,
                      activation: Callable,
                      upper_bound,
                      device,
                      compression_type):
    num_neurons = layer1[1].shape[0]
    if compression_type == "Coreset":
        points = np.concatenate(
            (layer1[0].cpu().detach().numpy(), layer1[1].view(num_neurons, 1).cpu().detach().numpy()),
            axis=1)
        points = torch.tensor(points)
        weights = layer2[0].t()
        coreset = Coreset(points=points, weights=weights, activation_function=activation, upper_bound=upper_bound)
        points, weights = coreset.compute_coreset(compressed_size)
        indices = coreset.indices
        layer1 = (points[:, :-1].to(device), points[:, 1].to(device))
        weights = weights.t()
        layer2 = (weights.to(device), layer2[1].to(device))
    elif compression_type == "Uniform":
        indices = np.random.choice(num_neurons, size=compressed_size, replace=False)
        layer1 = (layer1[0][indices, :], layer1[1][indices])
        layer2 = (layer2[0][:, indices], layer2[1])
    elif compression_type == "Top-K":
        indices = torch.topk(torch.norm(layer1[0], dim=1), k=compressed_size)[1]
        layer1 = (layer1[0][indices, :], layer1[1][indices])
        layer2 = (layer2[0][:, indices], layer2[1])
    else:
        sys.exit("There is not a compression type: {}".format(compression_type))

    return layer1, layer2, indices

In [13]:
def relu(X):
   return np.maximum(0,X)

In [14]:
relu(torch.tensor([4,3,-2]))

tensor([4, 3, 0])

In [15]:
layer1 = tuple(model.fc1.parameters())
layer2 = tuple(model.fc2.parameters())

In [16]:
from torch import linalg as LA

data_norm = []

for (x, y) in list(trainset):
    data_norm.append(LA.vector_norm(x).int())

In [17]:
#len(data_norm)
#data_norm
#type(data_norm)
max(data_norm)

tensor(48, dtype=torch.int32)

In [18]:
num_core_n = 1000
beta = 1

l1, l2, ind = compress_fc_layer(layer1, layer2, num_core_n , relu, beta , "cpu", "Coreset")

In [19]:
[len(a) for a in l2]

[5000, 5000]

In [20]:
model.fc1 = nn.Linear(l1[0].shape[1], l1[0].shape[0])
model.fc2 = nn.Linear(l2[0].shape[1], l2[0].shape[0])

with torch.no_grad():
    model.fc1.weight.copy_(l1[0])
    model.fc1.bias.copy_(l1[1])
    model.fc2.weight.copy_(l2[0])
    model.fc2.bias.copy_(l2[1])

In [21]:
print(model)

Net(
  (fc1): Linear(in_features=784, out_features=1000, bias=True)
  (fc2): Linear(in_features=1000, out_features=5000, bias=True)
  (fc3): Linear(in_features=5000, out_features=10, bias=True)
)


In [None]:
train(5, 0.01)

In [None]:
print('Epoch', 1 , 'Neurons', num_core_n, num_n2)
print('Test acc', validate(testloader, model))

Epoch 1 Neurons 1000 5000
Test acc 0.9818


In [None]:
print(model)

Net(
  (fc1): Linear(in_features=784, out_features=1000, bias=True)
  (fc2): Linear(in_features=1000, out_features=5000, bias=True)
  (fc3): Linear(in_features=5000, out_features=10, bias=True)
)


#Layer 2 pruning as per layer 3

In [None]:
layer2 = tuple(model.fc2.parameters())
layer3 = tuple(model.fc3.parameters())

In [None]:
num_core_n = 500
beta = 1

l1, l2, ind = compress_fc_layer(layer2, layer3, num_core_n , relu, beta , "cpu", "Coreset")

In [None]:
[len(a) for a in l1]

[500, 500]

In [None]:
model.fc2 = nn.Linear(l1[0].shape[1], l1[0].shape[0])
model.fc3 = nn.Linear(l2[0].shape[1], l2[0].shape[0])

with torch.no_grad():
    model.fc2.weight.copy_(l1[0])
    model.fc2.bias.copy_(l1[1])
    model.fc3.weight.copy_(l2[0])
    model.fc3.bias.copy_(l2[1])

In [None]:
train(5, 0.01)

1 0.9662
2 0.9778
3 0.9792
4 0.9826
5 0.9839


In [None]:
print('Epoch', 1 , 'Neurons', 1000, 500)
print('Test acc', validate(testloader, model))

Epoch 1 Neurons 1000 500
Test acc 0.9839


In [None]:
print(model)

Net(
  (fc1): Linear(in_features=784, out_features=1000, bias=True)
  (fc2): Linear(in_features=1000, out_features=500, bias=True)
  (fc3): Linear(in_features=500, out_features=10, bias=True)
)


#LeNET_300_100_MNIST_Model

In [23]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
# import torch.nn.utils.prune as prune
from typing import Callable, Tuple, Union
import sys
from matplotlib import pyplot as plt

In [24]:
cd /content/drive/MyDrive/Data Independent Pruning Coreset

/content/drive/MyDrive/Data Independent Pruning Coreset


In [25]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(16 * 4 * 4, 300)
        self.fc2 = nn.Linear(300, 100)
        self.fc3 = nn.Linear(100, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 4 * 4)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return F.log_softmax(x, dim=1)
    
net = Net()
net.load_state_dict(torch.load("LeNET_300_100_MNIST_Model"))

<All keys matched successfully>

In [26]:
from torchsummary import summary
device=torch.device("cpu")
model=Net().to(device)
summary(model, input_size=(1, 28, 28), device="cpu")

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 6, 24, 24]             156
         MaxPool2d-2            [-1, 6, 12, 12]               0
            Conv2d-3             [-1, 16, 8, 8]           2,416
         MaxPool2d-4             [-1, 16, 4, 4]               0
            Linear-5                  [-1, 300]          77,100
            Linear-6                  [-1, 100]          30,100
            Linear-7                   [-1, 10]           1,010
Total params: 110,782
Trainable params: 110,782
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.05
Params size (MB): 0.42
Estimated Total Size (MB): 0.47
----------------------------------------------------------------


  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


In [27]:
batch_size = 128

transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.13066), (0.30810))])

trainset = torchvision.datasets.MNIST(root='/content/drive/MyDrive/Data Independent Pruning Coreset/data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=False, num_workers=2)

testset = torchvision.datasets.MNIST(root='/content/drive/MyDrive/Data Independent Pruning Coreset/data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=2)

classes = [str(i) for i in range(10)]

In [28]:
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the test images: %f %%' % (
    100 * correct / total))

old_accuracy=correct/total

Accuracy of the network on the test images: 89.750000 %


In [29]:
(net.state_dict()["fc1.weight"]).shape

torch.Size([300, 256])

In [30]:
(net.state_dict()["fc2.weight"]).shape

torch.Size([100, 300])

In [31]:
class Coreset:
    def __init__(self, points, weights, activation_function: Callable, upper_bound: int = 1):
        assert points.shape[0] == weights.shape[0]

        self.__points = points.cpu()
        self.__weights = weights.cpu()
        self.__activation = activation_function
        self.__beta = upper_bound
        self.__sensitivity = None
        self.indices = None

    @property
    def sensitivity(self):
        if self.__sensitivity is None:
            points_norm = self.__points.norm(dim=1)
            assert points_norm.shape[0] == self.__points.shape[0]
            weights = torch.abs(self.__weights).max(dim=1)[0]  # max returns (values, indices)
            assert weights.shape[0] == self.__points.shape[0]
            #print(len(self.__beta))
            #print(points_norm.shape)
            #print(weights.shape)
            self.__sensitivity = weights * torch.abs(self.__activation(self.__beta * points_norm))
            self.__sensitivity /= self.__sensitivity.sum()

        return self.__sensitivity

    def compute_coreset(self, coreset_size):
        assert coreset_size <= self.__points.shape[0]
        prob = self.sensitivity.cpu().detach().numpy()

        indices = set()
        idxs = []

        cnt = 0
        while len(indices) < coreset_size:
            i = np.random.choice(a=self.__points.shape[0], size=1, p=prob).tolist()[0]
            idxs.append(i)
            indices.add(i)
            cnt += 1

        hist = np.histogram(idxs, bins=range(self.__points.shape[0] + 1))[0].flatten()
        idxs = np.nonzero(hist)[0]
        self.indices = idxs
        coreset = self.__points[idxs, :]

        weights = (self.__weights[idxs].t() * torch.tensor(hist[idxs]).float()).t()
        weights = (weights.t() / (torch.tensor(prob[idxs]) * cnt)).t()

        return coreset, weights

In [32]:
def compress_fc_layer(layer1: Tuple[torch.Tensor, torch.Tensor],
                      layer2: Tuple[torch.Tensor, torch.Tensor],
                      compressed_size,
                      activation: Callable,
                      upper_bound,
                      device,
                      compression_type):
    num_neurons = layer1[1].shape[0]
    if compression_type == "Coreset":
        points = np.concatenate(
            (layer1[0].cpu().detach().numpy(), layer1[1].view(num_neurons, 1).cpu().detach().numpy()),
            axis=1)
        points = torch.tensor(points)
        weights = layer2[0].t()
        coreset = Coreset(points=points, weights=weights, activation_function=activation, upper_bound=upper_bound)
        points, weights = coreset.compute_coreset(compressed_size)
        indices = coreset.indices
        layer1 = (points[:, :-1].to(device), points[:, 1].to(device))
        weights = weights.t()
        layer2 = (weights.to(device), layer2[1].to(device))
    elif compression_type == "Uniform":
        indices = np.random.choice(num_neurons, size=compressed_size, replace=False)
        layer1 = (layer1[0][indices, :], layer1[1][indices])
        layer2 = (layer2[0][:, indices], layer2[1])
    elif compression_type == "Top-K":
        indices = torch.topk(torch.norm(layer1[0], dim=1), k=compressed_size)[1]
        layer1 = (layer1[0][indices, :], layer1[1][indices])
        layer2 = (layer2[0][:, indices], layer2[1])
    else:
        sys.exit("There is not a compression type: {}".format(compression_type))

    return layer1, layer2, indices

In [33]:
def relu(X):
   return np.maximum(0,X)

In [34]:
layer1 = tuple(model.fc1.parameters())
layer2 = tuple(model.fc2.parameters())

In [35]:
model.fc1.weight.shape

torch.Size([300, 256])

In [36]:
num_core_n = 50
beta = 1

l1, l2, ind = compress_fc_layer(layer1, layer2, num_core_n , relu, beta , "cpu", "Coreset")

In [37]:
[len(a) for a in l1]

[50, 50]

In [38]:
model.fc1 = nn.Linear(l1[0].shape[1], l1[0].shape[0])
model.fc2 = nn.Linear(l2[0].shape[1], l2[0].shape[0])

with torch.no_grad():
    model.fc1.weight.copy_(l1[0])
    model.fc1.bias.copy_(l1[1])
    model.fc2.weight.copy_(l2[0])
    model.fc2.bias.copy_(l2[1])

In [39]:
print(model)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=256, out_features=50, bias=True)
  (fc2): Linear(in_features=50, out_features=100, bias=True)
  (fc3): Linear(in_features=100, out_features=10, bias=True)
)


In [40]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

In [41]:
for epoch in range(10):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data
        # print(inputs.shape)
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 20 == 19:    # print every 2000 mini-batches
            print("[{}, {}] loss: {}".format
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finished Reraining')

[1, 20] loss: 0.003772250100970268
[1, 40] loss: 0.0035867044031620025
[1, 60] loss: 0.0033470877483487128
[1, 80] loss: 0.0037434734031558036
[1, 100] loss: 0.0036631953865289687
[1, 120] loss: 0.003892331674695015
[1, 140] loss: 0.0034026815742254257
[1, 160] loss: 0.0027475959062576295
[1, 180] loss: 0.003043649524450302
[1, 200] loss: 0.003050462529063225
[1, 220] loss: 0.002916663631796837
[1, 240] loss: 0.00315389883518219
[1, 260] loss: 0.0031958159655332565
[1, 280] loss: 0.00246702741086483
[1, 300] loss: 0.0028255219385027886
[1, 320] loss: 0.0027658695355057715
[1, 340] loss: 0.00296642754599452
[1, 360] loss: 0.002764710146933794
[1, 380] loss: 0.0025635625422000883
[1, 400] loss: 0.0031047858335077764
[1, 420] loss: 0.0025944182798266413
[1, 440] loss: 0.002173855446279049
[1, 460] loss: 0.0017420647814869882
[2, 20] loss: 0.0022177101150155066
[2, 40] loss: 0.0020342259146273137
[2, 60] loss: 0.002068952813744545
[2, 80] loss: 0.0024745532050728797
[2, 100] loss: 0.002446

In [42]:
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the test images: %f %%' % (
    100 * correct / total))

Accuracy of the network on the test images: 97.930000 %


In [43]:
print(model)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=256, out_features=50, bias=True)
  (fc2): Linear(in_features=50, out_features=100, bias=True)
  (fc3): Linear(in_features=100, out_features=10, bias=True)
)


#Layer 2 Pruning based on layer 3

In [51]:
layer2 = tuple(model.fc2.parameters())
layer3 = tuple(model.fc3.parameters())

In [52]:
model.fc3.weight.shape

torch.Size([10, 100])

In [53]:
num_core_n = 25
beta = 1

l2, l3, ind = compress_fc_layer(layer2, layer3, num_core_n , relu, beta , "cpu", "Coreset")

In [56]:
[len(a) for a in l3]

[10, 10]

In [57]:
model.fc2 = nn.Linear(l2[0].shape[1], l2[0].shape[0])
model.fc3 = nn.Linear(l3[0].shape[1], l3[0].shape[0])

with torch.no_grad():
    model.fc2.weight.copy_(l2[0])
    model.fc2.bias.copy_(l2[1])
    model.fc3.weight.copy_(l3[0])
    model.fc3.bias.copy_(l3[1])

In [58]:
print(model)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=256, out_features=50, bias=True)
  (fc2): Linear(in_features=50, out_features=25, bias=True)
  (fc3): Linear(in_features=25, out_features=10, bias=True)
)


In [59]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

In [60]:
for epoch in range(10):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data
        # print(inputs.shape)
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 20 == 19:    # print every 2000 mini-batches
            print("[{}, {}] loss: {}".format
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finished Reraining')

[1, 20] loss: 0.0005789671568199993
[1, 40] loss: 0.00051240194728598
[1, 60] loss: 0.0006788657288998365
[1, 80] loss: 0.0006780273383483291
[1, 100] loss: 0.0005728032554034143
[1, 120] loss: 0.0005050754318945109
[1, 140] loss: 0.0006201593736186624
[1, 160] loss: 0.0005338320704177022
[1, 180] loss: 0.0005170704592019319
[1, 200] loss: 0.0005440214904956519
[1, 220] loss: 0.0007376004755496979
[1, 240] loss: 0.0005114182522520423
[1, 260] loss: 0.0005392436496913433
[1, 280] loss: 0.0005973276984877885
[1, 300] loss: 0.0006202204870060086
[1, 320] loss: 0.0005212844056077301
[1, 340] loss: 0.0007022701539099217
[1, 360] loss: 0.0006026241723448039
[1, 380] loss: 0.0006337221176363528
[1, 400] loss: 0.0007763129917439073
[1, 420] loss: 0.0006610381701029838
[1, 440] loss: 0.0005607548784464598
[1, 460] loss: 0.00040976276132278144
[2, 20] loss: 0.0005663281790912151
[2, 40] loss: 0.0004799041268415749
[2, 60] loss: 0.0006377864284440875
[2, 80] loss: 0.0006410947227850556
[2, 100] l

In [61]:
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the test images: %f %%' % (
    100 * correct / total))

Accuracy of the network on the test images: 98.450000 %


In [62]:
print(model)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=256, out_features=50, bias=True)
  (fc2): Linear(in_features=50, out_features=25, bias=True)
  (fc3): Linear(in_features=25, out_features=10, bias=True)
)
