# PyTorch Tutorial 12 - Activation Functions

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [None]:
# POPULAR  Activation functions
#  - Step function: Not used in practice
# - Sigmoid: More popular choice for the binary classification tasks, especially at the last layer [0:1]
# - TanH: Used much in Hidden layers [-1:1]
# - ReLU: More popular choice & Use it if you dont know what to use. max(0, x)
# - Leaky ReLU: improved version of ReLU. Tries to solve the vanishing gradient problem [-inf: inf]
# - Softmax: Used often at the last layer of Multi Class classification tasks

# PyTorch Tutorial 13 - Feed-Forward Neural Network

In [2]:
# MNIST
# DataLoader, Transformation
# Multilayer Neural Net, activation function
# Loss and Optimizer
# Training Loop (batch training)
# Model evaluation 
# GPU support

In [2]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt

In [12]:
# device config
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# hyper params
input_size = 28 * 28
hidden_size = 100
num_classes = 10
num_epochs = 2
batch_size = 100
learning_rate = 0.001

# MNIST
train_dataset = torchvision.datasets.MNIST(root="./data", train=True,
                                           transform=transforms.ToTensor(), download=True)

test_dataset = torchvision.datasets.MNIST(root="./data", train=False,
                                           transform=transforms.ToTensor())

train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, 
                                           shuffle = True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, 
                                           shuffle = False)

examples = iter(train_loader)
samples, labels = next(examples)
print(samples.shape, labels.shape)

torch.Size([100, 1, 28, 28]) torch.Size([100])


In [3]:
# for i in range(6):
#     plt.subplot(2, 3, i+1)
#     plt.imshow(samples[i][0]
#                #, cmap="red"
#                )
# plt.show()


: 

In [14]:
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet, self).__init__()
        self.l1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.l2 = nn.Linear(hidden_size, num_classes)
    
    def forward(self, x):
        out = self.l1(x)
        out = self.relu(out)
        out = self.l2(out)
        # we dont need Softmax function. Because we use Cross Entropy loss which already applies Softmax for us
        return out
    

model = NeuralNet(input_size, hidden_size, num_classes)
# loss & optimizer
criterion = nn.CrossEntropyLoss() # this one applies Softmax for us
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)


# training loop

n_total_steps = len(train_loader)

for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        # images.shape = 100, 1, 28, 28
        # so we need to reshape
        # input size is 784 = 28 * 28
        # so we need to bring it into: 100, 784 format
        images = images.reshape(-1, 28*28).to(device)
        labels = labels.to(device)

        # forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (i+1) % 100 == 0:
            print(f"epoch: {epoch+1} / {num_epochs}, step: {i+1} / {n_total_steps}, loss: {loss.item():.4f}")




# testing / evaluation
with torch.no_grad():
    n_correct = 0
    n_samples = 0
    for images, labels in test_loader:
        images = images.reshape(-1, 28*28).to(device)
        labels = labels.to(device)  # this is the actual labels
        outputs = model(images)

        # it actually returns values, index
        _, predictions = torch.max(outputs, 1)  # 1 is to denote the dimension rowwise or columnwise
        n_samples += labels.shape[0]  # this gives us the number of samples in the current batch
        n_correct += (predictions == labels).sum().item()

    acc = 100.0 * n_correct / n_samples

    print(f"accuracy: {acc}")

epoch: 1 / 2, step: 100 / 600, loss: 0.4394
epoch: 1 / 2, step: 200 / 600, loss: 0.3250
epoch: 1 / 2, step: 300 / 600, loss: 0.2819
epoch: 1 / 2, step: 400 / 600, loss: 0.3069
epoch: 1 / 2, step: 500 / 600, loss: 0.2044
epoch: 1 / 2, step: 600 / 600, loss: 0.1854
epoch: 2 / 2, step: 100 / 600, loss: 0.1359
epoch: 2 / 2, step: 200 / 600, loss: 0.1319
epoch: 2 / 2, step: 300 / 600, loss: 0.2587
epoch: 2 / 2, step: 400 / 600, loss: 0.3138
epoch: 2 / 2, step: 500 / 600, loss: 0.1458
epoch: 2 / 2, step: 600 / 600, loss: 0.2091
accuracy: 95.11


In [15]:
model

NeuralNet(
  (l1): Linear(in_features=784, out_features=100, bias=True)
  (relu): ReLU()
  (l2): Linear(in_features=100, out_features=10, bias=True)
)

In [16]:
criterion

CrossEntropyLoss()

In [17]:
optimizer

Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: None
    lr: 0.001
    maximize: False
    weight_decay: 0
)