In [None]:
#experiment-2 -> building a neural network using numpy
'''-> USING MNIST DATASET
-> make a repo in github and put all experimnets inside that
-> make sure proper comments in the code
-> submit github link
-> connect directly colab with github
-> update README file experimentwise'''

In [1]:
#NOTE -> I am using tensorflow just to load MNIST dataset
import numpy as np
from tensorflow.keras.datasets import mnist

**MATH FUNCTION UTILITIES**

In [8]:
#sigmoid activation function
def sigmoid(z):
  return 1/(1+np.exp(-z))

#derivative used for back propagation
def sigmoid_derivative(z):
  return z*(1-z)

# def softmax(z):
#     exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))  # stability
#     return exp_z / np.sum(exp_z, axis=1, keepdims=True)


#Converts labels (e.g., 3) into vectors
def one_hot_encode(labels, num_classes=10):
    output = np.zeros((len(labels), num_classes))
    for i, label in enumerate(labels):
        output[i][label] = 1
    return output


#limiting our dataset to 1000 only
def load_data(limit=1000):

    """Loads MNIST, normalizes inputs, and encodes labels."""
    (X_train, y_train), (X_test, y_test) = mnist.load_data()

    # taking only initial 1000 samples for faster training
    X = X_train[:limit]
    y = y_train[:limit]

    # Flatten images (28x28 -> 784) and Normalize (0-255 -> 0-1) for better convergence
    X_flat = X.reshape(X.shape[0], 784) / 255.0

    # One hot encode y
    y_encoded = one_hot_encode(y)

    return X_flat, y_encoded, y # Return raw y for accuracy checking


# Load the data once
X, y_encoded, y = load_data(1000)
print(f"Data Loaded: X shape: {X.shape}, y shape: {y_encoded.shape}")

Data Loaded: X shape: (1000, 784), y shape: (1000, 10)


**NETWORK TRAINING WITH 1 HIDDEN LAYER ONLY**

In [9]:
#you could definitely use He or xavier initialization based on activation function you using
def init_1_layer(input_size, hidden_size, output_size):
    np.random.seed(40)#for same reproducibility
    weights = {
        'w1': 2 * np.random.random((input_size, hidden_size)) - 1,
        'w2': 2 * np.random.random((hidden_size, output_size)) - 1
    }
    return weights
'''
np.random.random()       → [0, 1)
2 * random               → [0, 2)
2 * random - 1           → [-1, 1)

Centered Around Zero(Mean ≈ 0)
Positive and negative weights. This is important because:
Activations stay balanced
Gradients don’t explode immediately'''

def forward_1_layer(X, weights):
    z1 = np.dot(X, weights['w1'])
    a1 = sigmoid(z1)

    z2 = np.dot(a1, weights['w2'])
    output = sigmoid(z2)#softmax for multiclass classification

    return a1, output


def train_1_layer(X, y, epochs=1000, lr=0.005):
    print("\n**** Training 1 Hidden Layer ANN ******")
    weights = init_1_layer(784, 64, 10)

    for i in range(epochs):
        # 1. Forward
        a1, output = forward_1_layer(X, weights)

        # 2. Backward (Calculate Errors)
        output_error = y - output
        output_delta = output_error * sigmoid_derivative(output)
        # #categorical cross entropy
        # output_delta = output - y

        a1_error = output_delta.dot(weights['w2'].T)#transpose
        a1_delta = a1_error * sigmoid_derivative(a1)

        # 3. Update Weights
        weights['w2'] += lr * a1.T.dot(output_delta)
        weights['w1'] += lr * X.T.dot(a1_delta)

        # Monitor Progress
        if (i % 200 == 0 or i==999):
            acc = np.mean(np.argmax(output, axis=1) == np.argmax(y, axis=1))
            print(f"Epoch {i}: Accuracy = {acc * 100:.2f}%")

    return weights

# Run it
final_weights_1 = train_1_layer(X, y_encoded, epochs=1000)


**** Training 1 Hidden Layer ANN ******
Epoch 0: Accuracy = 9.20%
Epoch 200: Accuracy = 62.40%
Epoch 400: Accuracy = 77.00%
Epoch 600: Accuracy = 94.80%
Epoch 800: Accuracy = 96.90%
Epoch 999: Accuracy = 98.00%
