In [19]:
import torch, torchvision
from torchvision import transforms
import matplotlib.pyplot as plt
from torch import nn, optim

In [20]:
transform = transforms.Compose([transforms.ToTensor()])
trainset = torchvision.datasets.MNIST(root ='./data', train = True, transform = transform, download = True)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

In [21]:
#can use any other name
class Net(nn.Module):
    #Initializer
    def __init__(self):
        super().__init__()
        #Linear layer (input to the layer-size of images, how many outputs of neurons you want - convention)
        self.hidden = nn.Linear(28*28, 512)
        
        #self.hidden2 = nn.Linear(512, 512) #might help training, improve accuracy
        #output size of previous equals input size of first (hidden layer)
        
        self.output = nn.Linear(512, 10) 
        
        self.sigmoid = nn.ReLU() #changed the activation function
        self.softmax = nn.LogSoftmax(dim=1) #all operations column wise
    
    #Sequence for forward propagation
    def forward(self, x):
        x = self.hidden(x)
        #x = self.hidden2(x)
        x = self.sigmoid(x)
        x = self.output(x)
        x = self.softmax(x)
        
        return x

In [22]:
model = Net()

In [23]:
model

Net(
  (hidden): Linear(in_features=784, out_features=512, bias=True)
  (output): Linear(in_features=512, out_features=10, bias=True)
  (sigmoid): ReLU()
  (softmax): LogSoftmax(dim=1)
)

In [24]:
#In PyTorch there is a convention to define what loss you are going to use
criterion = nn.NLLLoss()
#After getting gradients, update the weights
optimizer = optim.SGD(model.parameters(), lr=0.01)

for epoch in range(5):
    running_loss = 0
    for images, labels in trainloader:
        images = images.view(images.shape[0], -1) #reshape
        optimizer.zero_grad() #*Reason: check video
        #log probability for our output
        logits = model(images) #pass one batch to model
        loss = criterion(logits, labels) #calculate loss
        #specific to this batch of images
        loss.backward() #backward prop
        optimizer.step()
        running_loss += loss.item()
    else:
        print('The running loss is: {}'.format(running_loss/len(trainloader)))
        

The running loss is: 1.1613862334347482
The running loss is: 0.47632771385694617
The running loss is: 0.3847478471839352
The running loss is: 0.3457400458914512
The running loss is: 0.3215620513918049


In [25]:
#Huge difference in output loss after using ReLU (loss minimized)

In [26]:
images, labels = next(iter(trainloader))

In [27]:
img = images[0].view(1, -1) #reshape
#pass single image and not a batch 

In [28]:
with torch.no_grad():
    logprobs = model(img)

In [29]:
logprobs
#view(images.shape[0], -1) #reshape

tensor([[-12.4698,  -0.0229,  -4.3339,  -5.9972, -10.1029,  -8.7949,  -7.8413,
          -6.9813,  -5.2560,  -8.2694]])

In [30]:
probs = torch.exp(logprobs)
probs

tensor([[3.8411e-06, 9.7741e-01, 1.3117e-02, 2.4856e-03, 4.0962e-05, 1.5150e-04,
         3.9318e-04, 9.2907e-04, 5.2163e-03, 2.5624e-04]])

In [31]:
torch.sum(probs)

tensor(1.0000)

In [33]:
#How to find the maximum probability
#we want index of highest probability
#each value represents a class

torch.argmax(probs)

tensor(1)