In [174]:
import torch
from torch import Tensor
import numpy as np
import torchvision.datasets as datasets
import torchvision.transforms as transform
from torch.utils.data import DataLoader

from random import randint

In [167]:
batch_size = 100

#MNIST datasets
mnist_trainset = datasets.MNIST(root='./data', train=True, download=True, transform=transform.ToTensor())
mnist_testset = datasets.MNIST(root='./data', train=False, download=True, transform=transform.ToTensor())

mnist_trainloader : DataLoader = DataLoader(mnist_trainset, batch_size=batch_size, shuffle=True)
mnist_testloader : DataLoader = DataLoader(mnist_testset, batch_size=batch_size, shuffle=True)

In [118]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [187]:
epochs = 30
W_hidden = torch.randn(784, 100)
b_hidden = torch.randn(100)
mu = 0.3

W_output = torch.randn(100, 10)
b_output = torch.randn(10)

def train():
    global W_hidden
    global b_hidden
    global W_output
    global b_output

    #only for acc & loss
    cross_entropy_loss = torch.nn.CrossEntropyLoss()
    ok = 0
    samples = 0
    loss = 0

    num_batches = len(mnist_testloader)//batch_size
    dropout_threshold = 5 * num_batches // 100

    for epoch in range(epochs):
        for inputs, labels in mnist_trainloader:
            
            if randint(0, num_batches) < dropout_threshold:
                continue

            #forwardprop
            x = inputs.view(batch_size, -1)
            
            Z_hidden = (x @ W_hidden) + b_hidden
            Y_hidden = sigmoid(Z_hidden)

            Z_output = (Y_hidden @ W_output) + b_output
            Y_output = sigmoid(Z_output)

            #calculate loss & acc during training
            loss += cross_entropy_loss(Z_output, labels).item()
            predictions = torch.argmax(Y_output, dim=1)

            ok += (predictions == labels).sum().item()
            samples += predictions.size(0)

            encoded_label = torch.zeros(batch_size, 10)
            for index, label in enumerate(labels):
                encoded_label[index][label] = 1

            #backprop batch

            #derivs for output layer
            output_error = Y_output - encoded_label
            deltaWL_output : Tensor = (Y_hidden.t() @ output_error) / batch_size
            deltabL_output : Tensor = torch.sum(output_error, dim=0, keepdim=True) / batch_size

            #derivs for hidden layer
            Error_hidden = (output_error @ W_output.t()) * (Y_hidden * (1 - Y_hidden)).float()
            deltaWL_hidden = (x.t() @ Error_hidden) / batch_size
            deltabL_hidden = torch.sum(Error_hidden, dim=0, keepdim=True) / batch_size

            #update
            W_hidden -= mu * deltaWL_hidden
            b_hidden -= mu * deltabL_hidden.squeeze(0)
            W_output -= mu * deltaWL_output
            b_output -= mu * deltabL_output.squeeze(0)

    loss /= len(mnist_testloader)
    accuracy = ok / samples
    print(f"Loss: {loss:.2f}, Accuracy: {accuracy * 100:.2f}%")

train()

Loss: 43.10, Accuracy: 93.53%


In [171]:
def predict(dataset_loader):
    global W_hidden
    global b_hidden
    global W_output
    global b_output

    ok = 0
    samples = 0
    loss = 0
    cross_entropy_loss = torch.nn.CrossEntropyLoss()
    
    with torch.no_grad():
        for inputs, labels in dataset_loader:
            x = inputs.view(batch_size, -1)

            Z_hidden = (x @ W_hidden) + b_hidden
            Y_hidden = sigmoid(Z_hidden)

            Z_output = (Y_hidden @ W_output) + b_output
            Y_output = sigmoid(Z_output)
            # print(Y_output)

            loss += cross_entropy_loss(Z_output, labels).item()
            predictions = torch.argmax(Y_output, dim=1)

            ok += (predictions == labels).sum().item()
            samples += predictions.size(0)

    loss /= len(dataset_loader)
    accuracy = ok / samples
    print(f"Loss: {loss:.2f}, Accuracy: {accuracy * 100:.2f}%")


In [189]:
predict(mnist_trainloader)

Loss: 0.13, Accuracy: 96.55%


In [188]:
predict(mnist_testloader)

Loss: 0.20, Accuracy: 95.28%
