In [61]:
from torchvision import datasets
from torch.utils.data import DataLoader
import numpy as np


In [None]:
import torch
from torchvision import datasets, transforms

# Define a transform (convert to tensor and normalize)
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])

# Download MNIST dataset
train_dataset = datasets.MNIST(root="./data", train=True, download=True, transform=transform)
test_dataset = datasets.MNIST(root="./data", train=False, download=True, transform=transform)

# Print dataset size
print("Training set size:", len(train_dataset))
print("Test set size:", len(test_dataset))

Training set size: 60000
Test set size: 10000


In [31]:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [42]:
# Get one batch of data
for images, labels in train_loader:
    print("Batch of images shape:", images.shape)  # (batch_size, 1, 28, 28)
    print("Batch of labels shape:", labels.shape)  # (batch_size,)
    break  # Just to print one batch

Batch of images shape: torch.Size([32, 1, 28, 28])
Batch of labels shape: torch.Size([32])


In [88]:
X_train = train_loader.dataset.data.numpy().reshape(-1,28*28)/255.0
y_train = train_loader.dataset.targets.numpy()
X_test = test_loader.dataset.data.numpy().reshape(-1,28*28)/255.0
y_test = test_loader.dataset.targets.numpy()

def one_hot_encode(y, num_classes=10):
    return np.eye(num_classes)[y]

y_train_oh = one_hot_encode(y_train)
y_test_oh = one_hot_encode(y_test)


In [80]:
def relu(Z):
    return np.maximum(0,Z)

def relu_derivative(Z):
    return Z > 0

def softmax(Z):
    expZ = np.exp(Z - np.max(Z,axis=1,keepdims=True))
    return expZ/np.sum(expZ,axis=1,keepdims=True)

def cross_entropy_loss(y_true, y_pred):
    return -np.mean(np.sum(y_true * np.log(y_pred + 1e-9), axis=1))



In [81]:
def initialize_weights(input_dim,hidden_layer,output_dim):
    W1 = np.random.randn(input_dim,hidden_layer)*0.01
    b1 = np.zeros((1,hidden_layer))
    W2 = np.random.randn(hidden_layer,output_dim)*0.01
    b2 = np.zeros((1,output_dim))
    return W1,b1,W2,b2

In [91]:
def forward(X,W1,b1,W2,b2):
    Z1 = np.dot(X,W1) + b1
    A1 = relu(Z1)
    Z2 = np.dot(A1,W2) + b2
    A2 = softmax(Z2)
    return Z1,A1,Z2,A2

In [83]:
def backward_propagation(X, y, Z1, A1, Z2, A2, W1, W2):
    m = X.shape[0]

    # Gradients
    dZ2 = A2 - y  # Error in output layer
    dW2 = np.dot(A1.T, dZ2) / m
    db2 = np.sum(dZ2, axis=0, keepdims=True) / m

    dA1 = np.dot(dZ2, W2.T)
    dZ1 = dA1 * relu_derivative(Z1)  # Applying ReLU derivative
    dW1 = np.dot(X.T, dZ1) / m
    db1 = np.sum(dZ1, axis=0, keepdims=True) / m

    return dW1, db1, dW2, db2

In [110]:
# Hyperparameters
input_size = 784  # 28x28
hidden_size = 128
output_size = 10
learning_rate = 0.01
epochs = 15
batch_size = 32

# Initialize weights
W1, b1, W2, b2 = initialize_weights(input_size, hidden_size, output_size)

losses = []

# Training loop
for epoch in range(epochs):
    permutation = np.random.permutation(X_train.shape[0])  # Shuffle data
    X_train_shuffled = X_train[permutation]
    y_train_shuffled = y_train_oh[permutation]

    total_loss = 0
    for i in range(0, X_train.shape[0], batch_size):
        X_batch = X_train_shuffled[i:i+batch_size]
        y_batch = y_train_shuffled[i:i+batch_size]

        # Forward pass
        Z1, A1, Z2, A2 = forward(X_batch, W1, b1, W2, b2)

        # Compute loss
        loss = cross_entropy_loss(y_batch, A2)
        losses.append(loss)
        total_loss += loss

        # Backpropagation
        dW1, db1, dW2, db2 = backward_propagation(X_batch, y_batch, Z1, A1, Z2, A2, W1, W2)

        # Update weights
        W1 -= learning_rate * dW1
        b1 -= learning_rate * db1
        W2 -= learning_rate * dW2
        b2 -= learning_rate * db2

    print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss / (X_train.shape[0] / batch_size):.4f}")

Epoch 1/15, Loss: 1.2724
Epoch 2/15, Loss: 0.4308
Epoch 3/15, Loss: 0.3498
Epoch 4/15, Loss: 0.3154
Epoch 5/15, Loss: 0.2916
Epoch 6/15, Loss: 0.2715
Epoch 7/15, Loss: 0.2537
Epoch 8/15, Loss: 0.2380
Epoch 9/15, Loss: 0.2237
Epoch 10/15, Loss: 0.2111
Epoch 11/15, Loss: 0.1997
Epoch 12/15, Loss: 0.1895
Epoch 13/15, Loss: 0.1799
Epoch 14/15, Loss: 0.1714
Epoch 15/15, Loss: 0.1636


In [111]:
def predict(X, W1, b1, W2, b2):
    _, _, _, A2 = forward(X, W1, b1, W2, b2)
    return np.argmax(A2, axis=1)

y_pred = predict(X_test, W1, b1, W2, b2)
accuracy = np.mean(y_pred == y_test) * 100
print(f"Test Accuracy: {accuracy:.2f}%")

Test Accuracy: 95.43%


In [98]:
import matplotlib.pyplot as plt

In [106]:
predict(X_train[1],W1,b1,W2,b2)

array([0])