In [1]:
import torch
import torch.nn as nn
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.autograd import Variable

In [2]:
input_size = 784       # Number (features) of input neurons (image pixels)
hidden_size = 400      # Number of hidden neurons !!! Better choose it like that = (input+ output)/2? so 780+ 10 ?2 = ~400
out_size = 10       # Number of classes (0-9)
epochs = 10            # How many times we pass our entire dataset into our network
batch_size = 100       # Input size of the data during one iteration
learning_rate = 0.001  # How fast we are Learning

In [3]:
train_dataset= datasets.MNIST(root='./data',
                             train=True,
                              transform=transforms.ToTensor(),
                              download=True)

test_dataset=datasets.MNIST(root='./data',
                           train=False,
                           transform=transforms.ToTensor())

In [4]:
# Make Data iterable by Loading it ot a Loader. Shuffle the training data to make it independant of the order
train_loader= torch.utils.data.DataLoader(dataset=train_dataset,batch_size=batch_size,shuffle=False)

test_loader=torch.utils.data.DataLoader(dataset=test_dataset,batch_size=batch_size,shuffle=False)

In [5]:
class Net(nn.Module):
    def __init__(self,input_size,hidden_size,out_size):
        super(Net,self).__init__()
        self.fc1=nn.Linear(input_size,hidden_size) # First Layer
        self.relu = nn.ReLU()                      # First Layer Activation
        self.fc2 = nn.Linear(hidden_size,hidden_size) # Second Layer
        self.fc3 = nn.Linear(hidden_size,out_size)
        
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        out = self.relu(out)
        out = self.fc3(out)
        return out        

In [6]:
# Create an object of the class,which represents our network
net= Net(input_size,hidden_size,out_size)
CUDA=torch.cuda.is_available()
if CUDA:
    net=net.cuda()
# The Loss Function. The Cross Entropy Loss comes along with Softmax. Therefore, noo need to specify Softmax as well
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(),lr=learning_rate)

In [7]:
list(net.parameters())

[Parameter containing:
 tensor([[-0.0247,  0.0329, -0.0077,  ...,  0.0052, -0.0143,  0.0058],
         [-0.0168,  0.0086,  0.0022,  ..., -0.0273, -0.0116, -0.0295],
         [ 0.0343, -0.0313, -0.0166,  ..., -0.0194,  0.0231, -0.0109],
         ...,
         [-0.0047, -0.0116,  0.0017,  ..., -0.0054, -0.0332, -0.0145],
         [ 0.0161, -0.0080, -0.0245,  ..., -0.0338, -0.0296,  0.0283],
         [-0.0324, -0.0212,  0.0244,  ..., -0.0122, -0.0277,  0.0174]],
        requires_grad=True),
 Parameter containing:
 tensor([-0.0335,  0.0106,  0.0203,  0.0123, -0.0057, -0.0174, -0.0147,  0.0237,
         -0.0273,  0.0352,  0.0286,  0.0093, -0.0326, -0.0056, -0.0109,  0.0233,
          0.0347, -0.0005,  0.0261,  0.0102, -0.0329,  0.0356, -0.0237,  0.0036,
         -0.0259, -0.0320,  0.0277,  0.0174,  0.0012, -0.0156,  0.0107,  0.0240,
          0.0350,  0.0117,  0.0287, -0.0318, -0.0337,  0.0253,  0.0061, -0.0042,
          0.0165,  0.0082, -0.0242, -0.0083,  0.0211, -0.0094,  0.0339, -0.0140

In [8]:
# train the network
correct_train = 0
total_train = 0
for epoch in range(epochs):
    for i, (images,labels) in enumerate(train_loader):
        # Flatten the image from size (batch,1,28,28) where 1 represents the number of channels (grayscale -->1) and wrap it in a variable
        images=Variable(images.view(-1,28*28))
        labels=Variable(labels)
        if CUDA:
            images=images.cuda()
            labels=labels.cuda()
        # Clear the param_grad (parameters) in param=param - lr.param_grad, so it wont be accumulated
        optimizer.zero_grad()
        outputs=net(images)                          # Forward pass
        _,predicted= torch.max(outputs.data,1)         # Return the second argument of torch.max which represents 
                                                     # the index location of the each maximum value found 
        total_train += labels.size(0)
        if CUDA:                                     # transer it back to CPU cz .sum is not implemented in GPU
            correct_train +=(predicted.cpu() == labels.cpu()).sum()
        else:
            correct_train += (predicted == labels).sum()
        
        loss = criterion(outputs,labels)             # Difference between the actual and predicted (loss function)
        loss.backward()                              # Backpropagation
        optimizer.step()                             # Update the weights
        
        if (i+1) % 100 == 0:
            print('Epoch [{}/{}],Iteration [{}/{}],Training Loss: {}, Training Accuracy: {}%'.format
                 (epoch+1,epochs,i+1,len(train_dataset)//batch_size,loss,(100*correct_train/total_train)))
            
print(" DONE TRAINING!")

Epoch [1/10],Iteration [100/600],Training Loss: 0.18981724977493286, Training Accuracy: 82.30000305175781%
Epoch [1/10],Iteration [200/600],Training Loss: 0.275746613740921, Training Accuracy: 86.69000244140625%
Epoch [1/10],Iteration [300/600],Training Loss: 0.3598189651966095, Training Accuracy: 88.92666625976562%
Epoch [1/10],Iteration [400/600],Training Loss: 0.2954384684562683, Training Accuracy: 90.2074966430664%
Epoch [1/10],Iteration [500/600],Training Loss: 0.1884990930557251, Training Accuracy: 90.97799682617188%
Epoch [1/10],Iteration [600/600],Training Loss: 0.23214274644851685, Training Accuracy: 91.8316650390625%
Epoch [2/10],Iteration [100/600],Training Loss: 0.06403125077486038, Training Accuracy: 92.46571350097656%
Epoch [2/10],Iteration [200/600],Training Loss: 0.12085876613855362, Training Accuracy: 92.93499755859375%
Epoch [2/10],Iteration [300/600],Training Loss: 0.10519495606422424, Training Accuracy: 93.35111236572266%
Epoch [2/10],Iteration [400/600],Training Lo

In [9]:
# visialize the train Loader
for i, (images,labels) in enumerate(train_loader):
    print(images.size())
    images=images.view(-1,784)
    print(images.size())

torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([

torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([

torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([

torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([

In [10]:
# examples of using logical operators between tensors
a=torch.tensor([1,2,3,6])
b=torch.tensor([1,4,3,6])
print(a==b)
print((a==b).sum())

tensor([ True, False,  True,  True])
tensor(3)


In [12]:
# test the network (No loss and weight calculaiton , no weight update)
correct=0
total=0
for images,labels in test_loader:
    images=Variable(images.view(-1,28*28))
    if CUDA:
        images=images.cuda()
    # for each input (sample/row) in the batch, the output  will contain 10 elements
    outputs=net(images)
    # we could alse write : predicted=outputs.data.max(1)[1]
    _, predicted = torch.max(outputs.data,1)
    total += labels.size(0)    # increment the total count (100)
    # We can also use: correct += predicted.eq(labels).sum()
    if CUDA:
        correct += (predicted.cpu() == labels.cpu()).sum()
    else:
        correct += (predicted == labels).sum()
        
print('Final Test Accuracy: %d %%' % (100 * correct / total))

Final Test Accuracy: 98 %
