In [2]:
import numpy as np
#[Problem 1] Classifying fully connected layers

class FC:
    def __init__(self, n_nodes1, n_nodes2, initializer, optimizer):
        self.n_nodes1 = n_nodes1
        self.n_nodes2 = n_nodes2
        self.initializer = initializer
        self.optimizer = optimizer

        self.W = self.initializer.W(n_nodes1, n_nodes2)
        self.B = self.initializer.B(n_nodes2)

    def forward(self, X):
        self.X = X
        A = np.dot(X, self.W) + self.B
        return A

    def backward(self, dA):
        batch_size = self.X.shape[0]
        self.dW = np.dot(self.X.T, dA) / batch_size
        self.dB = np.mean(dA, axis=0)
        dZ = np.dot(dA, self.W.T)
        self.optimizer.update(self)
        return dZ


In [3]:
#[Problem 2] Classifying the initialization method
class SimpleInitializer:
    def __init__(self, sigma):
        self.sigma = sigma

    def W(self, n_nodes1, n_nodes2):
        W = self.sigma * np.random.randn(n_nodes1, n_nodes2)
        return W

    def B(self, n_nodes2):
        B = np.zeros(n_nodes2)
        return B


In [4]:
#[Problem 3] Classifying optimization methods
class SGD:
    def __init__(self, lr=0.01):
        self.lr = lr

    def update(self, layer):
        layer.W -= self.lr * layer.dW
        layer.B -= self.lr * layer.dB
        return layer


In [5]:
#[Problem 4] Classifying activation functions

class SoftmaxCrossEntropy:
    def forward(self, Z):
        exp_Z = np.exp(Z - np.max(Z, axis=1, keepdims=True))
        self.A = exp_Z / np.sum(exp_Z, axis=1, keepdims=True)
        return self.A

    def backward(self, Z, Y):
        batch_size = Y.shape[0]
        dZ = (self.A - Y) / batch_size
        return dZ


In [6]:
#[Problem 5] ReLU class creation
class ReLU:
    def forward(self, X):
        self.mask = (X > 0)
        return np.maximum(0, X)

    def backward(self, dZ):
        return dZ * self.mask



In [7]:
#[Problem 6] Initial value of weight
class XavierInitializer:
    def __init__(self):
        pass

    def W(self, n_nodes1, n_nodes2):
        sigma = 1.0 / np.sqrt(n_nodes1)
        return sigma * np.random.randn(n_nodes1, n_nodes2)

    def B(self, n_nodes2):
        return np.zeros(n_nodes2)

class HeInitializer:
    def __init__(self):
        pass

    def W(self, n_nodes1, n_nodes2):
        sigma = np.sqrt(2.0 / n_nodes1)
        return sigma * np.random.randn(n_nodes1, n_nodes2)

    def B(self, n_nodes2):
        return np.zeros(n_nodes2)



In [18]:
#[Problem 7] Optimization method
class AdaGrad:
    def __init__(self, lr=0.01, epsilon=1e-8):
        self.lr = lr
        self.epsilon = epsilon

    def update(self, layer):
        if not hasattr(layer, "h_W"):
            layer.h_W = np.zeros_like(layer.W)
            layer.h_B = np.zeros_like(layer.B)

        layer.h_W += layer.dW ** 2
        layer.h_B += layer.dB ** 2

        layer.W -= self.lr * layer.dW / (np.sqrt(layer.h_W) + self.epsilon)
        layer.B -= self.lr * layer.dB / (np.sqrt(layer.h_B) + self.epsilon)

        return layer

In [19]:
#[Problem 8] Class completion
class ScratchDeepNeuralNetworkClassifier:
    def __init__(self, n_features, n_hidden_nodes_list, n_output,
                 initializer, optimizer, activations):
        self.layers = []
        self.activations = activations

        layer_sizes = [n_features] + n_hidden_nodes_list + [n_output]
        for i in range(len(layer_sizes) - 1):
            fc = FC(layer_sizes[i], layer_sizes[i+1], initializer, optimizer)
            self.layers.append(fc)

    def forward(self, X):
        A = X
        for layer, activation in zip(self.layers, self.activations):
            Z = layer.forward(A)
            A = activation.forward(Z)
        return A

    def backward(self, Z_last, Y):
        dA = self.activations[-1].backward(Z_last, Y)
        for i in reversed(range(len(self.layers))):
            dZ = self.layers[i].backward(dA)
            if i != 0:
                dA = self.activations[i-1].backward(dZ)

    def fit(self, X, Y, epochs=100):
        for epoch in range(epochs):
            Z_last = self.forward(X)
            self.backward(Z_last, Y)

    def predict(self, X):
        Z_last = self.forward(X)
        return np.argmax(Z_last, axis=1)



In [22]:
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler


mnist = fetch_openml("mnist_784", version=1)
X = mnist.data.astype(np.float32) / 255.0
y = mnist.target.astype(np.int64)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

y_train_onehot = one_hot_encode(y_train, 10)
optimizer = AdaGrad(lr=0.01)
initializer = XavierInitializer()
activations = [ReLU(), ReLU(), SoftmaxCrossEntropy()]

model = ScratchDeepNeuralNetworkClassifier(
    n_features=784,
    n_hidden_nodes_list=[128, 64],
    n_output=10,
    initializer=initializer,
    optimizer=optimizer,
    activations=activations
)

model.fit(X_train, y_train_onehot, epochs=10)

y_pred = model.predict(X_test)
accuracy = np.mean(y_pred == y_test)
print(f"Test Accuracy: {accuracy:.4f}")


Test Accuracy: 0.7391


In [23]:
#[Problem 9] Learning and estimation
def one_hot_encode(Y, n_classes):
    return np.eye(n_classes)[Y]


n_features = 784
n_hidden = [128, 64]
n_output = 10
initializer = XavierInitializer()
optimizer = AdaGrad(lr=0.01)
activations = [ReLU(), ReLU(), SoftmaxCrossEntropy()]

model = ScratchDeepNeuralNetworkClassifier(n_features, n_hidden, n_output,
                                           initializer, optimizer, activations)

Y_train_onehot = one_hot_encode(y_train, n_output)
model.fit(X_train, Y_train_onehot, epochs=20)

Y_pred = model.predict(X_test)
accuracy = np.mean(Y_pred == y_test)
print(f"Accuracy: {accuracy:.4f}")


Accuracy: 0.8499
