# Manual Implementation

In [13]:
import numpy as np
from sklearn.datasets import fetch_openml
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split

def initialize_weights(input_size, hidden_size, output_size):
    W1 = np.random.randn(input_size, hidden_size) * np.sqrt(1. / input_size)
    b1 = np.zeros((1, hidden_size))
    W2 = np.random.randn(hidden_size, output_size) * np.sqrt(1. / hidden_size)
    b2 = np.zeros((1, output_size))
    return W1, b1, W2, b2

def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def softmax(z):
    exp_z = np.exp(z - np.max(z))
    return exp_z / exp_z.sum(axis=1, keepdims=True)

def feed_forward(X, W1, b1, W2, b2):
    Z1 = np.dot(X, W1) + b1
    A1 = sigmoid(Z1)
    Z2 = np.dot(A1, W2) + b2
    A2 = softmax(Z2)
    return A1, A2

# Cross-entropy loss
def compute_loss(Y, A2):
    m = Y.shape[0]
    log_likelihood = -np.log(A2[range(m), np.argmax(Y, axis=1)])
    loss = np.sum(log_likelihood) / m
    return loss

def back_propagation(X, Y, W1, b1, W2, b2, A1, A2):
    m = X.shape[0]
    dZ2 = A2 - Y
    dW2 = np.dot(A1.T, dZ2) / m
    db2 = np.sum(dZ2, axis=0, keepdims=True) / m
    dA1 = np.dot(dZ2, W2.T)
    dZ1 = dA1 * A1 * (1 - A1)
    dW1 = np.dot(X.T, dZ1) / m
    db1 = np.sum(dZ1, axis=0, keepdims=True) / m
    return dW1, db1, dW2, db2

mnist = fetch_openml('mnist_784', version=1)
X, y = mnist['data'], mnist['target'].astype(int)

X = X / 255.0

y = np.array(y)
encoder = OneHotEncoder(categories='auto')
y_one_hot = encoder.fit_transform(y.reshape(-1, 1)).toarray()

X_train, X_test, y_train, y_test = train_test_split(X, y_one_hot, test_size=10000, train_size=60000, random_state=42)

# Hyperparameters
input_size = 784
hidden_size = 100
output_size = 10
learning_rate = 1.6
epochs = 100

W1, b1, W2, b2 = initialize_weights(input_size, hidden_size, output_size)

for epoch in range(epochs):
    A1, A2 = feed_forward(X_train, W1, b1, W2, b2)
    loss = compute_loss(y_train, A2)
    dW1, db1, dW2, db2 = back_propagation(X_train, y_train, W1, b1, W2, b2, A1, A2)
    W1 -= learning_rate * dW1
    b1 -= learning_rate * db1
    W2 -= learning_rate * dW2
    b2 -= learning_rate * db2
    if epoch % 7 == 0:  # Print loss every 5 epochs
        print(f'Epoch {epoch + 1}/{epochs}, Loss: {loss}')

# Evaluate the model
_, A2_test = feed_forward(X_test, W1, b1, W2, b2)
predictions = np.argmax(A2_test, axis=1)
labels = np.argmax(y_test, axis=1)
manual_accuracy = np.mean(predictions == labels)
print(f'Manual Implementation Accuracy: {manual_accuracy * 100:.2f}%')


  warn(


Epoch 1/100, Loss: 2.4504393709811434
Epoch 8/100, Loss: 1.7719853101746554
Epoch 15/100, Loss: 1.1776729908183021
Epoch 22/100, Loss: 0.925153318650677
Epoch 29/100, Loss: 0.7375225789630429
Epoch 36/100, Loss: 0.6481489155949324
Epoch 43/100, Loss: 0.5371172326222754
Epoch 50/100, Loss: 0.5206745466747266
Epoch 57/100, Loss: 0.4560617990472848
Epoch 64/100, Loss: 0.41128787881966994
Epoch 71/100, Loss: 0.3863139514802078
Epoch 78/100, Loss: 0.3698284780939989
Epoch 85/100, Loss: 0.357207156428861
Epoch 92/100, Loss: 0.34702322451289874
Epoch 99/100, Loss: 0.33847161524275926
Manual Implementation Accuracy: 90.22%


## Implementation with TensorFlow

In [14]:
import tensorflow as tf
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split

mnist = fetch_openml('mnist_784', version=1)
X, y = mnist['data'], mnist['target'].astype(int)
X = X / 255.0
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=10000, train_size=60000, random_state=42)

model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(100, activation='sigmoid', input_shape=(784,)),
    tf.keras.layers.Dense(10, activation='softmax')
])

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

model.fit(X_train, y_train, epochs=15, batch_size=32)

test_loss, test_acc = model.evaluate(X_test, y_test, verbose=2)
print(f'TensorFlow Implementation Accuracy: {test_acc * 100:.2f}%')


  warn(


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
313/313 - 1s - loss: 0.0840 - accuracy: 0.9728 - 883ms/epoch - 3ms/step
TensorFlow Implementation Accuracy: 97.28%
