# Deep Learning | Multilayer Perceptron

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

In [2]:
torch.cuda.is_available()

False

## Prepare Dataset

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cpu')

## Task: Handwritten Digits classification <br>
Our dataset: MNIST <br>
Input type: Image <br>
Image shape: 28 x 28 <br>

In [5]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

In [None]:
train_dataset = datasets.MNIST(
    root='../Datasets', 
    train=True, 
    download=True, 
    transform=transform
)

test_dataset = datasets.MNIST(
    root='../Datasets', 
    train=False, 
    download=True, 
    transform=transform
)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ../Datasets/MNIST/test/data\MNIST\raw\train-images-idx3-ubyte.gz


100%|██████████| 9.91M/9.91M [00:31<00:00, 313kB/s] 


Extracting ../Datasets/MNIST/test/data\MNIST\raw\train-images-idx3-ubyte.gz to ../Datasets/MNIST/test/data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ../Datasets/MNIST/test/data\MNIST\raw\train-labels-idx1-ubyte.gz


100%|██████████| 28.9k/28.9k [00:00<00:00, 57.1kB/s]


Extracting ../Datasets/MNIST/test/data\MNIST\raw\train-labels-idx1-ubyte.gz to ../Datasets/MNIST/test/data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ../Datasets/MNIST/test/data\MNIST\raw\t10k-images-idx3-ubyte.gz


100%|██████████| 1.65M/1.65M [00:02<00:00, 592kB/s] 


Extracting ../Datasets/MNIST/test/data\MNIST\raw\t10k-images-idx3-ubyte.gz to ../Datasets/MNIST/test/data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ../Datasets/MNIST/test/data\MNIST\raw\t10k-labels-idx1-ubyte.gz


100%|██████████| 4.54k/4.54k [00:00<00:00, 1.82MB/s]


Extracting ../Datasets/MNIST/test/data\MNIST\raw\t10k-labels-idx1-ubyte.gz to ../Datasets/MNIST/test/data\MNIST\raw



In [7]:
print(type(train_dataset))
print(len(train_dataset))

<class 'torchvision.datasets.mnist.MNIST'>
60000


In [8]:
print(type(test_dataset))
print(len(test_dataset))

<class 'torchvision.datasets.mnist.MNIST'>
10000


Training images 60,000 <br>
When you train the model we calculate losses<br>
Suppose N is the number of examples<br>
Suppose to calculate loss of a single example is O(L)<br><br>

Then the time complexity of calculating loss at each step: O(L x N)<br>
It is very time and computationally expensive.<br><br>

Instead of calculating the losses on the entire dataset we calculate loss on a mini batch.<br>
A sample collection of examples from the dataset.<br>
Usually between 64 to 512

In [9]:
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Create Model

In [10]:
class Perceptron(nn.Module):
    def __init__(self, input_size):
        super(Perceptron, self).__init__()
        self.w = nn.Parameter(torch.randn(input_size))
        self.b = nn.Parameter(torch.randn(1))
    
    def forward(self, x):
        x = x @ self.w + self.b
        return x

In [11]:
input_size = 28 * 28
sample_input = torch.randn(input_size)

perceptron = Perceptron(input_size)
output = perceptron(sample_input)

print(sample_input.shape)
print(output)

torch.Size([784])
tensor([22.4845], grad_fn=<AddBackward0>)


## Create an activation function
Activation function: ReLU (Rectified Linear Unit) <br>
function: relu(z) = max(0, z)

In [12]:
class ReLU(nn.Module):
    def __init__(self):
        super(ReLU, self).__init__()

    def forward(self, x):
        return torch.maximum(torch.tensor(0.0), x)

In [13]:
relu = ReLU()
output = relu(output)

print(output)

tensor([22.4845], grad_fn=<MaximumBackward0>)


## Create a linear layer
A linear layers takes a input of shape X<br>
and outputs a output of shape Y

In [14]:
class Linear(nn.Module):
    def __init__(self, input_size, output_size):
        super(Linear, self).__init__()
        self.perceptrons = nn.ModuleList([
            Perceptron(input_size) for _ in range(output_size)
        ])
        
    def forward(self, x):
        outputs = [perceptron(x) for perceptron in self.perceptrons]
        outputs = torch.stack(outputs, dim=1)
        return outputs

Input shape: 784<br>
Output shape: 128

In [15]:
linear = Linear(input_size, 128)
output = linear(sample_input)

print(output.shape)

torch.Size([1, 128])


# Create Hand Written Digit Classifier Model

In [16]:
class DigitClassifier(nn.Module):
    def __init__(self, input_size=28 * 28, output_size=10):
        super(DigitClassifier, self).__init__()
        self.fc1 = Linear(input_size, 128)
        self.fc2 = Linear(128, 64)
        self.fc3 = Linear(64, output_size)
        self.relu = ReLU()
        
    def forward(self, x):
        # Let say x = [1, 2, 3, ....]   shape:  [784]
        x = x.view(-1, input_size)
        # Let say x = [1, 2, 3, ....]   shape:  [1, 784]
        x = self.fc1(x)
        # Let say x = [-1, 2.2, 3.3, ...]   shape:  [1, 128]
        x = self.relu(x)
        # Let say x = [0, 2.2, 3.3, ...]   shape:  [1, 128]
        x = self.fc2(x)
        # Let say x = [-.50, 4.2, -3.3, ...]   shape:  [1, 64]
        x = self.relu(x)
        # Let say x = [0, 4.2, 0, ...]   shape:  [1, 64]
        x = self.fc3(x)
        # Let say x = [5, -4.2, .3, ...]   shape:  [1, 10]
        return x

In [17]:
model = DigitClassifier(input_size=28 * 28, output_size=10).to(device)
output = model(sample_input)
print(output.shape)

torch.Size([1, 10])


In [18]:
print(output)

_, predicted = torch.max(output, 1)
print("Class label:", predicted)

tensor([[  519.5203,  -810.1940,  -753.1400, -2025.4454,  -807.8885, -2164.3464,
         -1057.3344, -1511.3717,   -59.9989,  -794.5802]],
       grad_fn=<StackBackward0>)
Class label: tensor([0])


# Train model

In [19]:
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

In [20]:
num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        outputs = model(data)
        loss = criterion(outputs, target)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        if batch_idx % 100 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Step [{batch_idx+1}/{len(train_loader)}], Loss: {loss.item():.4f}')
    
    print(f"Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}")

Epoch [1/10], Step [1/938], Loss: 1838.8225
Epoch [1/10], Step [101/938], Loss: 351.1593
Epoch [1/10], Step [201/938], Loss: 224.2021
Epoch [1/10], Step [301/938], Loss: 159.2395
Epoch [1/10], Step [401/938], Loss: 84.1171
Epoch [1/10], Step [501/938], Loss: 56.6741
Epoch [1/10], Step [601/938], Loss: 34.3045
Epoch [1/10], Step [701/938], Loss: 78.6175
Epoch [1/10], Step [801/938], Loss: 82.5765
Epoch [1/10], Step [901/938], Loss: 118.0238
Epoch 1, Loss: 171.02317666194077
Epoch [2/10], Step [1/938], Loss: 22.0084
Epoch [2/10], Step [101/938], Loss: 39.4197
Epoch [2/10], Step [201/938], Loss: 52.4026
Epoch [2/10], Step [301/938], Loss: 23.3177
Epoch [2/10], Step [401/938], Loss: 72.7036
Epoch [2/10], Step [501/938], Loss: 36.6984
Epoch [2/10], Step [601/938], Loss: 20.6708
Epoch [2/10], Step [701/938], Loss: 34.9740
Epoch [2/10], Step [801/938], Loss: 28.0219
Epoch [2/10], Step [901/938], Loss: 58.5584
Epoch 2, Loss: 39.09290978039252
Epoch [3/10], Step [1/938], Loss: 31.8018
Epoch [3/

# Evaluate Model

In [21]:
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for data, target in test_loader:
        data, target = data.to(device), target.to(device)
        outputs = model(data)
        _, predicted = torch.max(outputs.data, 1)
        total += target.size(0)
        correct += (predicted == target).sum().item()

accuracy = 100 * correct / total
print(f'Accuracy on the test set: {accuracy:.2f}%')

Accuracy on the test set: 92.27%
