In [13]:
#we are going to take a look at how to build a class A model that
#helps us calculate all of that in an easy and efficient way (multiple neuron)
import torch, torchvision
from torchvision import transforms
import matplotlib.pyplot as plt
from torch import nn, optim

In [4]:
transform = transforms.Compose([transforms.ToTensor()])
trainset = torchvision.datasets.MNIST(root ='./data', train = True, transform = transform, download = True)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

In [5]:
#can use any other name
class Net(nn.Module):
    #Initializer
    def __init__(self):
        super().__init__()
        #Linear layer (input to the layer-size of images, how many outputs of neurons you want - convention)
        self.hidden = nn.Linear(28*28, 512)
        self.output = nn.Linear(512, 10) #MNIST contains 10 classes (no. of neurons at output) so output 10
        #10 neurons- sigmoid activation will not do as expected, use softmax (classes)
        #returns probability distributed across k number of classes
        
        self.sigmoid = nn.Sigmoid()
        self.softmax = nn.LogSoftmax(dim=1) #all operations column wise
    
    #Sequence for forward propagation
    def forward(self, x):
        x = self.hidden(x)
        x = self.sigmoid(x)
        x = self.output(x)
        x = self.softmax(x)
        
        return x

In [6]:
model = Net()

In [7]:
model

Net(
  (hidden): Linear(in_features=784, out_features=512, bias=True)
  (output): Linear(in_features=512, out_features=10, bias=True)
  (sigmoid): Sigmoid()
  (softmax): LogSoftmax(dim=1)
)

In [8]:
#In PyTorch there is a convention to define what loss you are going to use
criterion = nn.NLLLoss()

In [9]:
#Pass one batch from the trainloader
images, labels = next(iter(trainloader))

In [10]:
#Readjust and reshape the images to flatten them
#from matrix to the required form (to tensor)

images = images.view(images.shape[0], -1)
#log probability for our output
logits = model(images)
loss = criterion(logits, labels)
loss

tensor(2.3735, grad_fn=<NllLossBackward>)

In [11]:
#Backward propagation
loss.backward()

In [14]:
#After getting gradients, update the weights
optimizer = optim.SGD(model.parameters(), lr=0.01)

In [15]:
optimizer.step()

In [None]:
#So, we did a forward propagation
#We calculated the loss, and then we did the backward propagation to get the gradient
#and then using the gradients we updated the weights using the optimizers
#and PyTorch
#All of this is for a single epoch