In [1]:
#we are going to take a look at how to build a class A model that
#helps us calculate all of that in an easy and efficient way (multiple neuron)
#for multiple iterations

import torch, torchvision
from torchvision import transforms
import matplotlib.pyplot as plt
from torch import nn, optim

In [2]:
transform = transforms.Compose([transforms.ToTensor()])
trainset = torchvision.datasets.MNIST(root ='./data', train = True, transform = transform, download = True)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

In [3]:
#can use any other name
class Net(nn.Module):
    #Initializer
    def __init__(self):
        super().__init__()
        #Linear layer (input to the layer-size of images, how many outputs of neurons you want - convention)
        self.hidden = nn.Linear(28*28, 512)
        self.output = nn.Linear(512, 10) #MNIST contains 10 classes (no. of neurons at output) so output 10
        #10 neurons- sigmoid activation will not do as expected, use softmax (classes)
        #returns probability distributed across k number of classes
        
        self.sigmoid = nn.Sigmoid()
        self.softmax = nn.LogSoftmax(dim=1) #all operations column wise
    
    #Sequence for forward propagation
    def forward(self, x):
        x = self.hidden(x)
        x = self.sigmoid(x)
        x = self.output(x)
        x = self.softmax(x)
        
        return x

In [4]:
model = Net()

In [5]:
model

Net(
  (hidden): Linear(in_features=784, out_features=512, bias=True)
  (output): Linear(in_features=512, out_features=10, bias=True)
  (sigmoid): Sigmoid()
  (softmax): LogSoftmax(dim=1)
)

In [6]:
#In PyTorch there is a convention to define what loss you are going to use
criterion = nn.NLLLoss()
#After getting gradients, update the weights
optimizer = optim.SGD(model.parameters(), lr=0.01)

for epoch in range(5):
    running_loss = 0
    for images, labels in trainloader:
        images = images.view(images.shape[0], -1) #reshape
        optimizer.zero_grad() #*Reason: check video
        #log probability for our output
        logits = model(images) #pass one batch to model
        loss = criterion(logits, labels) #calculate loss
        #specific to this batch of images
        loss.backward() #backward prop
        optimizer.step()
        running_loss += loss.item()
    else:
        print('The running loss is: {}'.format(running_loss/len(trainloader)))
        

The running loss is: 2.092078208160807
The running loss is: 1.4586845521987883
The running loss is: 0.9584713845110652
The running loss is: 0.7238238281659735
The running loss is: 0.6044687770450039


In [None]:
#* We make sure that the gradients are set null initially to eliminate 
# the vanishing gradient problem 
# and the exploding gradient problem