In [2]:
"""Class 14. Multilayer Perceptron and backpropagation

Objectives:
1. Understand deep feed forward network
2. How backpropagation works
3. Implement a deep feed forward network using PyTorch
"""

import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

In [3]:
""" Deep Neural Network
1. Deep Feedforward network: data is passed only in the forward direction layerwise
2. Recurrent Neural Network: data can passed any layer to any layer.
"""

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cpu


### Data Pipeline

In [4]:
"""
Image: A 2D matrix of pixel values.
Color Channels: 
  - Grayscale image has 1 channel per pixel. For a pixel color between (0-255)
  - RGB image has 3 channels per pixel. For a pixel (0-255, 0-255, 0-255) => (128, 247, 18)
"""
# Preprocessing pipeline
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(
        mean=(0.1307,),
        std=(0.3081,)
    )
])

In [5]:
# Download dataset

train_dataset = datasets.MNIST(
    root='C:\Users\Loccha kakko\PyCharmMiscProject\machine_learning',
    train=True,
    download=True,
    transform=transform
)

test_dataset = datasets.MNIST(
    root= 'C:\Users\Loccha kakko\PyCharmMiscProject\machine_learning',
    train=False,
    download=True,
    transform=transform
)

SyntaxError: (unicode error) 'unicodeescape' codec can't decode bytes in position 2-3: truncated \UXXXXXXXX escape (3936338690.py, line 4)

In [7]:
print(type(train_data))

<class 'torchvision.datasets.mnist.MNIST'>


In [9]:
"""
MNIST: Hand written digit recognition dataset
Trainset contains 60000 images
Each image is 28x28 in shape
So there is 784 pixels for each image

We are given 785 columns for each image
X: column 1-784 (pixels)
y: cloumn 785 (digits)


Understand Batch Size:

We have 60000 images
We calculate logits for each images 
Calulcate gradients
Update the weights

Now if we have 10 epoch and in each epoch we calculate logits for 60000 images
Then 10 x 60000 = 600000 unit

A batch size is a random sample from the training set.
batch size 32, 64, 128

We have total 60000 training images
For example everytime we select only 100 images (Batch size).
So, total batches = 60000 / 100 = 600 batches
For this batch we will calculate logits, compute gradient and update weights (Step)

for epoch in epochs:
    batches = [batch1, batch2, ...batch600]
    for each step in steps:
         We calculate logits for each images 
         Calulcate gradients
         Update the weights

"""

# loads data one batch at a time into the memory
train_loader = DataLoader(
    train_dataset,
    batch_size=64,
    shuffle=True,
)

test_loader = DataLoader(
    test_dataset,
    batch_size=64,
    shuffle=False,
)

In [10]:
class Perceptron(nn.Module):
    def __init__(self, input_size):
        super(Perceptron, self).__init__()
        self.w = nn.Parameter(torch.randn(input_size))
        self.b = nn.Parameter(torch.randn(1))
    
    def forward(self, x):
        x = x @ self.w + self.b
        return x

In [17]:
input_size = 28 * 28
sample_input = torch.randn(input_size)

perceptron = Perceptron(input_size)
output = perceptron(sample_input)
print(output)

tensor([-9.3480], grad_fn=<AddBackward0>)


In [18]:
""" 
Activation function: ReLU (Rectified Linear Unit)
function: relu(z) = max(0, z)
"""

class ReLU(nn.Module):
    def __init__(self):
        super(ReLU, self).__init__()
        
    def forward(self, x):
        return torch.maximum(torch.tensor(0.0), x)
    

In [19]:
relu = ReLU()
output = relu(output)
print(output)

tensor([0.], grad_fn=<MaximumBackward0>)


In [20]:
class Linear(nn.Module):
    def __init__(self, input_size, output_size):
        super(Linear, self).__init__()
        
        self.perceptrons = nn.ModuleList([
            Perceptron(input_size) for _ in range(output_size)
        ])
        
    def forward(self, x):
        outputs = [
            perceptron(x) for perceptron in self.perceptrons
        ]
        outputs = torch.stack(outputs, dim=1)
        return outputs

In [22]:
linear = Linear(input_size, 3)
output = linear(sample_input)
print(output.shape)

torch.Size([1, 3])


In [33]:
class DigitClassifier(nn.Module):
    def __init__(self, input_size=28 * 28, output_size=10):
        super(DigitClassifier, self).__init__()
        self.fc1 = Linear(input_size, 256)
        self.fc2 = Linear(256, 128)
        self.fc3 = Linear(128, output_size)
        self.relu = ReLU()
        
    def forward(self, x):
        # Let say x = [1, 2, 3, ....]   shape:  [784]
        x = x.view(-1, input_size)
        # Let say x = [1, 2, 3, ....]   shape:  [1, 784]
        x = self.fc1(x)
        # Let say x = [-1, 2.2, 3.3, ...]   shape:  [1, 128]
        x = self.relu(x)
        # Let say x = [0, 2.2, 3.3, ...]   shape:  [1, 128]
        x = self.fc2(x)
        # Let say x = [-.50, 4.2, -3.3, ...]   shape:  [1, 64]
        x = self.relu(x)
        # Let say x = [0, 4.2, 0, ...]   shape:  [1, 64]
        x = self.fc3(x)
        # Let say x = [5, -4.2, .3, ...]   shape:  [1, 10]
        return x

In [34]:
model = DigitClassifier(input_size=28 * 28, output_size=10).to(device)
sample_input = sample_input.to(device)

output = model(sample_input)
print(output.shape)
print(output)

torch.Size([1, 10])
tensor([[ -716.5026, -1683.2001,  1311.7974,  1034.5662,  1836.6747,   535.4442,
          -678.9324, -1437.9171,  -224.9353, -2418.5049]], device='cuda:0',
       grad_fn=<StackBackward0>)


In [35]:
print(output)

_, predicted = torch.max(output, 1)
print("Class label:", predicted)

tensor([[ -716.5026, -1683.2001,  1311.7974,  1034.5662,  1836.6747,   535.4442,
          -678.9324, -1437.9171,  -224.9353, -2418.5049]], device='cuda:0',
       grad_fn=<StackBackward0>)
Class label: tensor([4], device='cuda:0')


In [36]:
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

In [37]:
num_epochs = 2

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        outputs = model(data)
        loss = criterion(outputs, target)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        if batch_idx % 100 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Step [{batch_idx+1}/{len(train_loader)}], Loss: {loss.item():.4f}')
    
    print(f"Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}")

Epoch [1/2], Step [1/938], Loss: 4287.7012
Epoch [1/2], Step [101/938], Loss: 480.1638
Epoch [1/2], Step [201/938], Loss: 386.3238
Epoch [1/2], Step [301/938], Loss: 318.1131
Epoch [1/2], Step [401/938], Loss: 156.4583
Epoch [1/2], Step [501/938], Loss: 83.3444
Epoch [1/2], Step [601/938], Loss: 146.2842
Epoch [1/2], Step [701/938], Loss: 151.8065
Epoch [1/2], Step [801/938], Loss: 34.2748
Epoch [1/2], Step [901/938], Loss: 25.7679
Epoch 1, Loss: 277.03577635079813
Epoch [2/2], Step [1/938], Loss: 74.6780
Epoch [2/2], Step [101/938], Loss: 169.3121
Epoch [2/2], Step [201/938], Loss: 23.9952
Epoch [2/2], Step [301/938], Loss: 66.5253
Epoch [2/2], Step [401/938], Loss: 46.6273
Epoch [2/2], Step [501/938], Loss: 53.3300
Epoch [2/2], Step [601/938], Loss: 47.1615
Epoch [2/2], Step [701/938], Loss: 122.5683
Epoch [2/2], Step [801/938], Loss: 53.4044
Epoch [2/2], Step [901/938], Loss: 105.3979
Epoch 2, Loss: 67.89323507112735


In [38]:
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for data, target in test_loader:
        data, target = data.to(device), target.to(device)
        outputs = model(data)
        _, predicted = torch.max(outputs.data, 1)
        total += target.size(0)
        correct += (predicted == target).sum().item()

accuracy = 100 * correct / total
print(f'Accuracy on the test set: {accuracy:.2f}%')

Accuracy on the test set: 89.84%
