In [1]:
import torch
import torchvision as tv
from torchvision import datasets, transforms
from torch.utils.data import DataLoader as DL

In [2]:
 #separate MNIST dataset from torchvision into train and test data

'''
transform given data into tensors using 'transforms' 
'''

train = datasets.MNIST("",
                      train = True,
                      download = True,
                      transform = transforms.Compose([transforms.ToTensor()]))

test = datasets.MNIST("",
                      train = False,
                      download = True,
                      transform = transforms.Compose([transforms.ToTensor()]))

In [3]:
#convert format for iteration through the data

'''
batch_size = how many inputs to pass to model at a time
shuffle = to shuffle inputs or not
'''

trainset = DL(train, batch_size=8, shuffle=True)

testset = DL(test, batch_size=8, shuffle=True)

In [4]:
#check above data once

for data in trainset:
    print(data)
    break
    
for data in testset:
    print(data)
    break

[tensor([[[[0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          ...,
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.]]],


        [[[0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          ...,
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.]]],


        [[[0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          ...,
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.]]],


        ...,


        [[[0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          ...,
          [0., 0., 0.,  ..., 0

# Neural Network Part

In [5]:
import torch.nn as nn
import torch.nn.functional as F

In [6]:
#create class for the neural network

'''
fully connected layer = fc
nn.Linear(input, ouput)
initial input =  image size = 28*28
middle layers = 3 layers of 64 neurons
final output = number of classes (0-9) = 10
'''

class Net(nn.Module):
    
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(28*28, 64)
        self.fc2 = nn.Linear(64, 64)
        self.fc3 = nn.Linear(64, 64)
        self.fc4 = nn.Linear(64, 10)

#ReLU activation function on hidden layers
#Use log_softmax for output to get probability for classes
    
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.fc4(x)
        
        return F.log_softmax(x, dim=1)

In [7]:
#view created network

net = Net()
net

Net(
  (fc1): Linear(in_features=784, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=64, bias=True)
  (fc3): Linear(in_features=64, out_features=64, bias=True)
  (fc4): Linear(in_features=64, out_features=10, bias=True)
)

In [8]:
#check with randomly generated input

X = torch.rand((28,28))
#flatten the image matrix to 1-D
#-1 = array of any size
X = X.view(-1, 28*28)
output = net(X)

In [9]:
output

tensor([[-2.3480, -2.3681, -2.1974, -2.3142, -2.1847, -2.3432, -2.4020, -2.2065,
         -2.2574, -2.4409]], grad_fn=<LogSoftmaxBackward>)

Loss and Optimizer

In [10]:
import torch.optim as optim

In [11]:
#lr = learning rate = 0.001
opt = optim.Adam(net.parameters(), lr = 0.001)

#EPOCHS = number of times to iterate over dataset
EPOCHS = 3

In [15]:
#train network

'''
loss = error
zero_grad() = makes gradient zero after batch
nll_loss = calculates loss to update weights
if data is 1 hot vector, use mean squared error
backward() = propogate the weights backward
opt.step() = adjusts the weights
'''

for epoch in range(EPOCHS):
    for data in trainset:
        X, y = data
        net.zero_grad()
        output = net(X.view(-1, 28*28))
        loss = F.nll_loss(output, y)
        loss.backward()
        opt.step() 
    print(loss)

tensor(0.0365, grad_fn=<NllLossBackward>)
tensor(0.0750, grad_fn=<NllLossBackward>)
tensor(0.1559, grad_fn=<NllLossBackward>)


In [18]:
#check the model

correct = 0
total = 0

'''
no_grad() = as test data will not be used for optimization,
we do not need to calculate gradient for it
'''
with torch.no_grad():
    for data in trainset:
        X, y = data
        output = net(X.view(-1, 28*28))
        for idx, i in enumerate(output):
            if torch.argmax(i) == y[idx]:
                correct += 1
            total += 1
            
print("Accuracy: ",round(correct/total, 3))

Accuracy:  0.98
