In [1]:
import numpy as np
import torch.nn as nn
import torch
import torch.nn.functional as F
from torchvision import datasets, transforms

In [68]:
mnist_train = datasets.MNIST(root="./datasets", train=True, transform=transforms.ToTensor(), download=True)
mnist_test = datasets.MNIST(root="./datasets", train=False, transform=transforms.ToTensor(), download=True)
train_loader = torch.utils.data.DataLoader(mnist_train, batch_size=100, shuffle=True)
test_loader = torch.utils.data.DataLoader(mnist_test, batch_size=100, shuffle=False)

In [20]:
acc=[]

In [73]:
#Convolutional neural network model implementation 
class MNIST_CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1,16, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(16,32, kernel_size=3, padding=1)
        self.conv3=nn.Conv2d(32,64, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(7*7*64, 256)
        self.fc2 = nn.Linear(256, 10)

    def forward(self, x):
        m = nn.Sigmoid()
        x = self.conv1(x)
        x = F.relu(x)
        x = F.max_pool2d(x, kernel_size=2)
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, kernel_size=2)
        x=self.conv3(x)
        x=F.relu(x)
        x = x.view(-1, 7*7*64)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        return x

In [44]:
#Adam+CrossEntropyLoss
model=MNIST_CNN()
cross_entropy=nn.CrossEntropyLoss()#cross_entropy loss 
optimizer=torch.optim.Adam(model.parameters(),lr=0.001)#optimizer

In [35]:
%%time
#train runs 
for epoch in range(2): 
    for images, labels in train_loader:
        optimizer.zero_grad()
        x = images  
        y = model(x)
        loss =cross_entropy(y, labels)
        loss.backward()
        optimizer.step()

#test run 
correct = 0
total = len(mnist_test)
with torch.no_grad():
    for images, labels in test_loader:
        x = images  
        y = model(x)
        predictions = torch.argmax(y, dim=1)
        correct += torch.sum((predictions == labels).float())

acc.append(correct/total)

Wall time: 53.7 s


In [19]:
time=np.array([123,115,128])
for i in range(3):
    print("For",i+1,"convolutional layers accuracy=",acc[i],"& execution time=",time[i],"s")

For 1 convolutional layers accuracy= tensor(0.9787) & execution time= 123 s
For 2 convolutional layers accuracy= tensor(0.9861) & execution time= 115 s
For 3 convolutional layers accuracy= tensor(0.9859) & execution time= 128 s


In [40]:
time=np.array([180,116,85,53.7])
out=[16,14,8,4]
for i in range(4):
    print("For 1 convolutional layer with output channel size of",out[i],"accuracy=",acc[i],"& execution time=",time[i],"s")

For 1 convolutional layer with output channel size of 16 accuracy= tensor(0.9795) & execution time= 180.0 s
For 1 convolutional layer with output channel size of 14 accuracy= tensor(0.9812) & execution time= 116.0 s
For 1 convolutional layer with output channel size of 8 accuracy= tensor(0.9744) & execution time= 85.0 s
For 1 convolutional layer with output channel size of 4 accuracy= tensor(0.9608) & execution time= 53.7 s


In [53]:
#Adam+NLLoss
model=MNIST_CNN()
negative_loglikelihhodloos=torch.nn.NLLLoss()#negative log likelihood loss
optimizer=torch.optim.Adam(model.parameters(),lr=0.001)#optimizer

In [54]:
%%time
#train runs 
for epoch in range(2): 
    for images, labels in train_loader:
        optimizer.zero_grad()
        x = images  
        y = model(x)
        loss =negative_loglikelihhodloos(y, labels)
        loss.backward()
        optimizer.step()

#test run 
correct = 0
total = len(mnist_test)
with torch.no_grad():
    for images, labels in test_loader:
        x = images  
        y = model(x)
        predictions = torch.argmax(y, dim=1)
        correct += torch.sum((predictions == labels).float())

print("Accuracy with computing NLLLoss=",correct/total)

Accuracy with computing NLLLoss= tensor(0.1135)
Wall time: 2min 39s


In [74]:
acc_lr=[]

In [79]:
#SGD+CrossEntropyLoss
model=MNIST_CNN()
cross_entropy=nn.CrossEntropyLoss()#cross_entropy loss 
optimizer=torch.optim.SGD(model.parameters(),lr=0.1)#optimizer

In [80]:
%%time
#train runs 
for epoch in range(2): 
    for images, labels in train_loader:
        optimizer.zero_grad()
        x = images  
        y = model(x)
        loss =cross_entropy(y, labels)
        loss.backward()
        optimizer.step()

#test run 
correct = 0
total = len(mnist_test)
with torch.no_grad():
    for images, labels in test_loader:
        x = images  
        y = model(x)
        predictions = torch.argmax(y, dim=1)
        correct += torch.sum((predictions == labels).float())

acc_lr.append(correct/total)

Wall time: 1min 59s


In [82]:
lr=[0.001,0.01,0.1]
for i in range(3):
    print("Accuracy with SGD lr",lr[i],"=",acc_lr[i])

Accuracy with SGD lr 0.001 = tensor(0.1721)
Accuracy with SGD lr 0.01 = tensor(0.9008)
Accuracy with SGD lr 0.1 = tensor(0.9815)


In [108]:
acc_batch=[]

In [122]:
mnist_train = datasets.MNIST(root="./datasets", train=True, transform=transforms.ToTensor(), download=True)
mnist_test = datasets.MNIST(root="./datasets", train=False, transform=transforms.ToTensor(), download=True)
train_loader = torch.utils.data.DataLoader(mnist_train, batch_size=100, shuffle=True)
test_loader = torch.utils.data.DataLoader(mnist_test, batch_size=100, shuffle=False)

In [123]:
model=MNIST_CNN()
cross_entropy=nn.CrossEntropyLoss()#cross_entropy loss 
optimizer=torch.optim.SGD(model.parameters(),lr=0.001)#optimizer

In [124]:
%%time
#train runs 
for epoch in range(2): 
    for images, labels in train_loader:
        optimizer.zero_grad()
        x = images  
        y = model(x)
        loss =cross_entropy(y, labels)
        loss.backward()
        optimizer.step()

#test run 
correct = 0
total = len(mnist_test)
with torch.no_grad():
    for images, labels in test_loader:
        x = images  
        y = model(x)
        predictions = torch.argmax(y, dim=1)
        correct += torch.sum((predictions == labels).float())

acc_batch.append(correct/total)

Wall time: 2min 7s


In [125]:
time=[127,161,214]
batch_size=[100,50,10]
for i in range(3):
    print("Accuracy with SGD batch size",batch_size[i],"=",acc_batch[i],"execution time=",time[i])

Accuracy with SGD batch size 100 = tensor(0.1404) execution time= 127
Accuracy with SGD batch size 50 = tensor(0.2829) execution time= 161
Accuracy with SGD batch size 10 = tensor(0.8904) execution time= 214


In [14]:
%%time
#train runs 
for epoch in range(2): 
    for images, labels in train_loader:
        optimizer.zero_grad()
        x = images  
        y = model(x)
        loss =cross_entropy(y, labels)
        loss.backward()
        optimizer.step()

#test run 
correct = 0
total = len(mnist_test)
with torch.no_grad():
    for images, labels in test_loader:
        x = images  
        y = model(x)
        predictions = torch.argmax(y, dim=1)
        correct += torch.sum((predictions == labels).float())

print("Accuracy with avg pooling=",correct/total)

Accuracy with avg pooling= tensor(0.9856)
Wall time: 2min 29s


In [42]:
%%time
#train runs 
for epoch in range(2): 
    for images, labels in train_loader:
        optimizer.zero_grad()
        x = images  
        y = model(x)
        loss =cross_entropy(y, labels)
        loss.backward()
        optimizer.step()

#test run 
correct = 0
total = len(mnist_test)
with torch.no_grad():
    for images, labels in test_loader:
        x = images  
        y = model(x)
        predictions = torch.argmax(y, dim=1)
        correct += torch.sum((predictions == labels).float())

print("Accuracy with Tanh activation layer=",correct/total)

Accuracy with Tanh activation layer= tensor(0.9860)
Wall time: 1min 50s


In [45]:
%%time
#train runs 
for epoch in range(2): 
    for images, labels in train_loader:
        optimizer.zero_grad()
        x = images  
        y = model(x)
        loss =cross_entropy(y, labels)
        loss.backward()
        optimizer.step()

#test run 
correct = 0
total = len(mnist_test)
with torch.no_grad():
    for images, labels in test_loader:
        x = images  
        y = model(x)
        predictions = torch.argmax(y, dim=1)
        correct += torch.sum((predictions == labels).float())

print("Accuracy with Sigmoid activation layer=",correct/total)

Accuracy with Sigmoid activation layer= tensor(0.9229)
Wall time: 2min 32s
