In [1]:
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.datasets import fashion_mnist

In [2]:
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
[1m29515/29515[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
[1m26421880/26421880[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
[1m5148/5148[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz
[1m4422102/4422102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [3]:
assert x_train.shape == (60000,28,28)
assert x_test.shape == (10000,28,28)
assert y_train.shape == (60000,)
assert y_test.shape == (10000,)

In [4]:
x_train = x_train.reshape(x_train.shape[0],-1)/255.0
x_test = x_test.reshape(x_test.shape[0], -1)/255.0

In [5]:
def one_hot_encode( y,num_classes):
    return np.eye(num_classes)[y]

In [6]:
y_train_encoded = one_hot_encode(y_train, 10)
y_test_encoded = one_hot_encode(y_test, 10)

In [10]:
class NeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size, learning_rate=0.05, l1_lambda=0.0, l2_lambda=0.0):
        self.W1 = np.random.randn(input_size, hidden_size) * 0.05
        self.b1 = np.zeros((1, hidden_size))
        self.W2 = np.random.randn(hidden_size, output_size) * 0.05
        self.b2 = np.zeros((1, output_size))
        self.learning_rate = learning_rate
        self.l1_lambda = l1_lambda
        self.l2_lambda = l2_lambda

    def softmax(self, x):
        exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
        return exp_x / np.sum(exp_x, axis=1, keepdims=True)

    def forward(self, X):
        self.z1 = np.dot(X, self.W1) + self.b1
        self.a1 = np.tanh(self.z1)
        self.z2 = np.dot(self.a1, self.W2) + self.b2
        self.a2 = self.softmax(self.z2)
        return self.a2

    def backward(self, X, y, output):
        m = X.shape[0]
        output_error = output - y
        hidden_error = np.dot(output_error, self.W2.T) * (1 - np.tanh(self.z1) ** 2)

        dW2 = np.dot(self.a1.T, output_error) / m
        db2 = np.sum(output_error, axis=0, keepdims=True) / m
        dW1 = np.dot(X.T, hidden_error) / m
        db1 = np.sum(hidden_error, axis=0, keepdims=True) / m

        # L1 regularization
        dW1 += self.l1_lambda * np.sign(self.W1)
        dW2 += self.l1_lambda * np.sign(self.W2)

        # L2 regularization
        dW1 += self.l2_lambda * self.W1
        dW2 += self.l2_lambda * self.W2

        self.W1 -= self.learning_rate * dW1
        self.b1 -= self.learning_rate * db1
        self.W2 -= self.learning_rate * dW2
        self.b2 -= self.learning_rate * db2

    def train(self, X, y, epochs):
        for epoch in range(epochs):
            output = self.forward(X)
            self.backward(X, y, output)
            if epoch % 100 == 0:
                loss = -np.mean(np.sum(y * np.log(output + 1e-10), axis=1))
                print(f"Epoch {epoch}, Loss: {loss:.4f}")

    def predict(self, X):
        output = self.forward(X)
        return np.argmax(output, axis=1)

In [11]:
# Hyperparameters
input_size = 784
hidden_size = 180
output_size = 10
learning_rate = 0.05
epochs = 800

In [12]:
# Train with L1 regularization
nn_l1 = NeuralNetwork(input_size, hidden_size, output_size, learning_rate, l1_lambda=0.001)
nn_l1.train(x_train, y_train_encoded, epochs)
predictions_l1 = nn_l1.predict(x_test)
accuracy_l1 = np.mean(predictions_l1 == y_test)
print(f"Test Accuracy (L1 Regularization): {accuracy_l1:.4f}")

Epoch 0, Loss: 2.4259
Epoch 100, Loss: 0.8888
Epoch 200, Loss: 0.7339
Epoch 300, Loss: 0.6689
Epoch 400, Loss: 0.6328
Epoch 500, Loss: 0.6105
Epoch 600, Loss: 0.5962
Epoch 700, Loss: 0.5871
Test Accuracy (L1 Regularization): 0.8055


In [13]:

# Train with L2 regularization
nn_l2 = NeuralNetwork(input_size, hidden_size, output_size, learning_rate, l2_lambda=0.001)
nn_l2.train(x_train, y_train_encoded, epochs)
predictions_l2 = nn_l2.predict(x_test)
accuracy_l2 = np.mean(predictions_l2 == y_test)
print(f"Test Accuracy (L2 Regularization): {accuracy_l2:.4f}")

Epoch 0, Loss: 2.3296
Epoch 100, Loss: 0.8414
Epoch 200, Loss: 0.6850
Epoch 300, Loss: 0.6130
Epoch 400, Loss: 0.5693
Epoch 500, Loss: 0.5398
Epoch 600, Loss: 0.5185
Epoch 700, Loss: 0.5021
Test Accuracy (L2 Regularization): 0.8215
