In [1]:
import numpy as np
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

In [2]:
# Load MNIST dataset
mnist = fetch_openml('mnist_784', version=1)

In [3]:
X, y = mnist.data.astype('float32'), mnist.target.astype('int')

In [4]:
X /= 255.0  # Normalize pixel values to [0, 1]

In [5]:
# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [7]:
# One-hot encode labels
num_classes = 10
y_train_encoded = np.eye(num_classes)[y_train]
y_test_encoded = np.eye(num_classes)[y_test]

##  Define neural network architecture

In [19]:
class NeuralNetwork:
    def __init__(self, input_dim, hidden_dim, output_dim):
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim
        self.weights1 = np.random.randn(self.input_dim, self.hidden_dim) * np.sqrt(2 / self.input_dim)
        self.biases1 = np.zeros((1, self.hidden_dim))
        self.weights2 = np.random.randn(self.hidden_dim, self.output_dim) * np.sqrt(2 / self.hidden_dim)
        self.biases2 = np.zeros((1, self.output_dim))

    def softmax(self, x):
        exp_scores = np.exp(x - np.max(x, axis=1, keepdims=True))
        return exp_scores / np.sum(exp_scores, axis=1, keepdims=True)

    def forward(self, X):
        self.hidden_output = 1 / (1 + np.exp(-np.dot(X, self.weights1) - self.biases1))
        self.output = self.softmax(np.dot(self.hidden_output, self.weights2) + self.biases2)
        return self.output

    def backward(self, X, y, learning_rate):
        output_error = self.output - y
        hidden_error = np.dot(output_error, self.weights2.T) * self.hidden_output * (1 - self.hidden_output)
        
        self.weights2 -= learning_rate * np.dot(self.hidden_output.T, output_error)
        self.biases2 -= learning_rate * np.sum(output_error, axis=0, keepdims=True)
        self.weights1 -= learning_rate * np.dot(X.T, hidden_error)
        self.biases1 -= learning_rate * np.sum(hidden_error, axis=0, keepdims=True)

    def train(self, X, y, learning_rate, epochs):
        for epoch in range(epochs):
            output = self.forward(X)
            loss = -np.sum(y * np.log(output + 1e-9)) / len(y)  # Add a small epsilon to prevent log(0)
            print(f"Epoch {epoch}: Loss = {loss}")
            self.backward(X, y, learning_rate)

    def predict(self, X):
        hidden_output = 1 / (1 + np.exp(-np.dot(X, self.weights1) - self.biases1))
        output = self.softmax(np.dot(hidden_output, self.weights2) + self.biases2)
        return np.argmax(output, axis=1)

In [21]:
# Train the neural network
input_dim = X_train.shape[1]
hidden_dim = 100
output_dim = num_classes
learning_rate = 0.1
epochs = 100

In [22]:
nn = NeuralNetwork(input_dim, hidden_dim, output_dim)

In [23]:
nn.train(X_train, y_train_encoded, learning_rate, epochs)

Epoch 0: Loss = 2.8910003653009837


  self.hidden_output = 1 / (1 + np.exp(-np.dot(X, self.weights1) - self.biases1))


Epoch 1: Loss = 18.680434879638465
Epoch 2: Loss = 15.777346481144846
Epoch 3: Loss = 15.869617988771834
Epoch 4: Loss = 17.005589295165503
Epoch 5: Loss = 14.982141618326969
Epoch 6: Loss = 15.542154428274532
Epoch 7: Loss = 15.612583024131398
Epoch 8: Loss = 16.43101308886039
Epoch 9: Loss = 15.74167301374361
Epoch 10: Loss = 18.01362594784198
Epoch 11: Loss = 13.716101737448842
Epoch 12: Loss = 13.558423882333136
Epoch 13: Loss = 11.278413032530462
Epoch 14: Loss = 9.89346726070972
Epoch 15: Loss = 9.542324214275636
Epoch 16: Loss = 8.881774719561054
Epoch 17: Loss = 8.251278544388205
Epoch 18: Loss = 8.353943923591942
Epoch 19: Loss = 8.656238557262812
Epoch 20: Loss = 7.1911989586874325
Epoch 21: Loss = 6.396445172036595
Epoch 22: Loss = 5.933124658207709
Epoch 23: Loss = 5.842021635187154
Epoch 24: Loss = 5.808119122993977
Epoch 25: Loss = 5.880893193556762
Epoch 26: Loss = 5.631371150136208
Epoch 27: Loss = 5.843811644596619
Epoch 28: Loss = 5.094941057444593
Epoch 29: Loss = 5.

In [24]:
# Make predictions on the test set
y_pred = nn.predict(X_test)

  hidden_output = 1 / (1 + np.exp(-np.dot(X, self.weights1) - self.biases1))


In [25]:
# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")


Accuracy: 0.8383571428571429
