In [1]:
import torch
import torchvision
from torchvision import datasets, transforms
import torch.autograd.profiler as profiler

transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))
        ])

trainset = datasets.MNIST(root='./data', train=True,download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset,batch_size=64,shuffle=True, num_workers=2)

testset = datasets.MNIST(root='./data', train=False,download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64,shuffle=False, num_workers=2)


In [17]:
# network structure
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.quant = torch.quantization.QuantStub()
        self.fc1 = nn.Linear(784, 100)
        self.fc2 = nn.Linear(100, 30)
        self.fc3 = nn.Linear(30, 10)
        self.dequant = torch.quantization.DeQuantStub()

    def forward(self, x):
        x = self.quant(x)
        x = x.view(x.shape[0], -1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        x = F.relu(x)
        x = self.fc3(x)
#         x = F.log_softmax(x, dim=1)
        x = self.dequant(x)
        return x

    
net = Net()

In [18]:
import time
import os
import psutil

#train & validation
import torch.optim as optim
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)


time_start = time.perf_counter()
for epoch in range(10):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 800 == 799:    # print every 800 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 800))
            running_loss = 0.0

print("Finished Training")

time_elapsed = (time.perf_counter() - time_start)
process = psutil.Process(os.getpid())
mem = process.memory_info().rss/1024.0/1024.0
print("report: %5.1f secs %5.1f MB" % (time_elapsed, mem))



[1,   800] loss: 0.991
[2,   800] loss: 0.338
[3,   800] loss: 0.278
[4,   800] loss: 0.234
[5,   800] loss: 0.202
[6,   800] loss: 0.175
[7,   800] loss: 0.156
[8,   800] loss: 0.140
[9,   800] loss: 0.128
[10,   800] loss: 0.116
Finished Training
report:  57.8 secs 233.4 MB


In [19]:
# validation
dataiter = iter(testloader)
images, labels = dataiter.next()

class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))

with torch.no_grad():
    for data in testloader:
        images, labels = data[0], data[1]
        outputs = net(images)
        _, predicted = torch.max(outputs, 1)
        c = (predicted == labels).squeeze()
        for i in range(4):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1


for i in range(10):
    #print('%5s : %2d %%' % (classes[i], 100 * class_correct[i] / class_total[i]))
    print('%2.2f%%' % (100.0 * class_correct[i] / class_total[i]))
    
print('Accuracy of the image classifier on the 10000 test images is: %2.2f %%' % (
    100.0 * sum(class_correct) / sum(class_total)))

torch.save(net.state_dict(),'./models/net_mnist.pt')


98.46%
97.26%
92.54%
95.24%
92.65%
87.50%
94.64%
96.61%
98.33%
94.81%
Accuracy of the image classifier on the 10000 test images is: 95.06 %


In [21]:
import copy

net_fp32 = copy.deepcopy(net)
net_fp32.eval()
net_fp32.qconfig = torch.quantization.get_default_qconfig('fbgemm')
net_fp32_pre = torch.quantization.prepare(net_fp32)
for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data
        net_fp32_pre(inputs)

dataiter = iter(testloader)
images, labels = dataiter.next()

class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))

with torch.no_grad():
    for data in testloader:
        images, labels = data[0], data[1]
        outputs = net_fp32(images)
        _, predicted = torch.max(outputs, 1)
        c = (predicted == labels).squeeze()
        for i in range(4):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1


for i in range(10):
    #print('%5s : %2d %%' % (classes[i], 100 * class_correct[i] / class_total[i]))
    print('%2.2f%%' % (100.0 * class_correct[i] / class_total[i]))
    
print('Accuracy of the image classifier on the 10000 test images is: %2.2f %%' % (
    100.0 * sum(class_correct) / sum(class_total)))
        

98.46%
97.26%
92.54%
95.24%
92.65%
87.50%
94.64%
96.61%
98.33%
94.81%
Accuracy of the image classifier on the 10000 test images is: 95.06 %


In [22]:
net_int8 = torch.quantization.convert(net_fp32_pre)

dataiter = iter(testloader)
images, labels = dataiter.next()

class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))

with torch.no_grad():
    for data in testloader:
        images, labels = data[0], data[1]
        outputs = net_int8(images)
        _, predicted = torch.max(outputs, 1)
        c = (predicted == labels).squeeze()
        for i in range(4):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1


for i in range(10):
    #print('%5s : %2d %%' % (classes[i], 100 * class_correct[i] / class_total[i]))
    print('%2.2f%%' % (100.0 * class_correct[i] / class_total[i]))
    
print('Accuracy of the image classifier on the 10000 test images is: %2.2f %%' % (
    100.0 * sum(class_correct) / sum(class_total)))

98.46%
97.26%
92.54%
96.83%
92.65%
87.50%
96.43%
96.61%
98.33%
94.81%
Accuracy of the image classifier on the 10000 test images is: 95.38 %


In [2]:
# LeNet-5
import torch.nn as nn
import torch.nn.functional as F

class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.quant = torch.quantization.QuantStub()
        self.conv1 = nn.Conv2d(1, 6, 5, padding = 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.conv3 = nn.Conv2d(16, 120, 5)
        self.fc1 = nn.Linear(120, 84)
        self.fc2 = nn.Linear(84, 10)
        self.dequant = torch.quantization.DeQuantStub()

    def forward(self, x):
        x = self.quant(x)
        x = F.max_pool2d(torch.tanh(self.conv1(x)), (2, 2)) #c1,s2
        x = F.max_pool2d(torch.tanh(self.conv2(x)), (2, 2)) #c3,s4
        x = torch.tanh(self.conv3(x)) #c5
        x = x.view(x.shape[0], -1)
        x = self.fc1(x)
        x = torch.tanh(x)
        x = self.fc2(x)
#         x = F.log_softmax(x, dim=1)
        x = self.dequant(x)

        return x

    
model_fp32 = LeNet()

In [4]:
import time
import os
import psutil

#train & validation
import torch.optim as optim
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model_fp32.parameters(), lr=0.001, momentum=0.9)


time_start = time.perf_counter()
for epoch in range(10):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model_fp32(inputs)
        loss = criterion(outputs, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 800 == 799:    # print every 800 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 800))
            running_loss = 0.0

print("Finished Training")

time_elapsed = (time.perf_counter() - time_start)
process = psutil.Process(os.getpid())
mem = process.memory_info().rss/1024.0/1024.0
print("report: %5.1f secs %5.1f MB" % (time_elapsed, mem))

# validation
dataiter = iter(testloader)
images, labels = dataiter.next()

class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))

with torch.no_grad():
    for data in testloader:
        images, labels = data[0], data[1]
        outputs = model_fp32(images)
        _, predicted = torch.max(outputs, 1)
        c = (predicted == labels).squeeze()
        for i in range(4):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1


for i in range(10):
    #print('%5s : %2d %%' % (classes[i], 100 * class_correct[i] / class_total[i]))
    print('%2.2f%%' % (100.0 * class_correct[i] / class_total[i]))
    
print('Accuracy of the image classifier on the 10000 test images is: %2.2f %%' % (
    100.0 * sum(class_correct) / sum(class_total)))


[1,   800] loss: 1.476
[2,   800] loss: 0.402
[3,   800] loss: 0.244
[4,   800] loss: 0.176
[5,   800] loss: 0.136
[6,   800] loss: 0.114
[7,   800] loss: 0.098
[8,   800] loss: 0.087
[9,   800] loss: 0.079
[10,   800] loss: 0.072
Finished Training
report: 100.1 secs 219.8 MB
100.00%
97.26%
97.01%
96.83%
98.53%
97.50%
96.43%
100.00%
100.00%
96.10%
Accuracy of the image classifier on the 10000 test images is: 97.93 %


NameError: name 'net' is not defined

In [5]:
torch.save(model_fp32.state_dict(),'./models/lenet_mnist.pt')

In [7]:
model_fp32.eval()
model_fp32.qconfig = torch.quantization.get_default_qconfig('fbgemm')
model_fp32_pre = torch.quantization.prepare(model_fp32)
for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data
        model_fp32_pre(inputs)

dataiter = iter(testloader)
images, labels = dataiter.next()

class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))

with torch.no_grad():
    for data in testloader:
        images, labels = data[0], data[1]
        outputs = model_fp32(images)
        _, predicted = torch.max(outputs, 1)
        c = (predicted == labels).squeeze()
        for i in range(4):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1


for i in range(10):
    #print('%5s : %2d %%' % (classes[i], 100 * class_correct[i] / class_total[i]))
    print('%2.2f%%' % (100.0 * class_correct[i] / class_total[i]))
    
print('Accuracy of the image classifier on the 10000 test images is: %2.2f %%' % (
    100.0 * sum(class_correct) / sum(class_total)))


100.00%
97.26%
97.01%
96.83%
98.53%
97.50%
96.43%
100.00%
100.00%
96.10%
Accuracy of the image classifier on the 10000 test images is: 97.93 %


In [8]:
model_int8 = torch.quantization.convert(model_fp32_pre)

dataiter = iter(testloader)
images, labels = dataiter.next()

class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))

with torch.no_grad():
    for data in testloader:
        images, labels = data[0], data[1]
        outputs = model_int8(images)
        _, predicted = torch.max(outputs, 1)
        c = (predicted == labels).squeeze()
        for i in range(4):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1


for i in range(10):
    #print('%5s : %2d %%' % (classes[i], 100 * class_correct[i] / class_total[i]))
    print('%2.2f%%' % (100.0 * class_correct[i] / class_total[i]))
    
print('Accuracy of the image classifier on the 10000 test images is: %2.2f %%' % (
    100.0 * sum(class_correct) / sum(class_total)))

100.00%
97.26%
97.01%
96.83%
97.06%
97.50%
96.43%
100.00%
98.33%
96.10%
Accuracy of the image classifier on the 10000 test images is: 97.61 %
