Question 2 
Implement a feedforward neural network which takes images from the fashion-mnist data as input and outputs a probability distribution over the 10 classes.Your code should be flexible such that it is easy to change the number of hidden layers and the number of neurons in each hidden layer.

In [2]:
# Load Dataset
def load_dataset(dataset="fashion_mnist"):
    from keras.datasets import mnist, fashion_mnist
    if dataset == "fashion_mnist":
        (x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()
    elif dataset == "mnist":
        (x_train, y_train), (x_test, y_test) = mnist.load_data()
    else:
        raise ValueError("Invalid dataset choice.")
    
    x_train = x_train.reshape(x_train.shape[0], -1) / 255.0
    x_test = x_test.reshape(x_test.shape[0], -1) / 255.0
    
    # Flatten the label arrays so each label is a scalar
    y_train = y_train.flatten()
    y_test = y_test.flatten()
    
    return x_train, y_train, x_test, y_test

x_train, y_train, x_test, y_test = load_dataset()
y_train = y_train.flatten()

# Reshape and normalize data
x_train = x_train.reshape(x_train.shape[0], 784) / 256
x_test = x_test.reshape(x_test.shape[0], 784) / 256

def get_user_defined_layers(num_hidden_layers=1, neurons_per_layer=[4]):
    # If neurons_per_layer is an int, convert it to a list of that int repeated num_hidden_layers times.
    if isinstance(neurons_per_layer, int):
        neurons_per_layer = [neurons_per_layer] * num_hidden_layers
    
    input_size = 784
    output_size = 10

    if len(neurons_per_layer) != num_hidden_layers:
        raise ValueError(f"Number of hidden layers ({num_hidden_layers}) does not match the number of neuron lists provided ({len(neurons_per_layer)}).")
    
    return [input_size] + neurons_per_layer + [output_size]

# # Function to take user-defined network architecture
# def get_user_defined_layers(num_hidden_layers=1, neurons_per_layer=[4]):
#     input_size = 784
#     output_size = 10

#     # Check if the number of hidden layers matches the number of neuron lists provided
#     if len(neurons_per_layer) != num_hidden_layers:
#         raise ValueError(f"Number of hidden layers ({num_hidden_layers}) does not match the number of neuron lists provided ({len(neurons_per_layer)}).")

#     return [input_size] + neurons_per_layer + [output_size]

# Get the architecture from the user
layer_sizes = get_user_defined_layers(num_hidden_layers=3, neurons_per_layer=[4, 5, 6])

# Weight Initialization Methods
def random_init(input_size, output_size):
    return np.random.randn(input_size, output_size) * 0.01

def xavier_init(input_size, output_size):
    return np.random.randn(input_size, output_size) * np.sqrt(2 / (input_size + output_size))

def initialize_weights_and_biases(layer_sizes, init_method="random"):
    """
    Initialize weights and biases for a neural network.

    Parameters:
    - layer_sizes: List of integers representing the sizes of each layer.
    - init_method: String indicating the initialization method. Defaults to "random". Can be "random" or "xavier".

    Returns:
    - weights: List of initialized weight matrices.
    - biases: List of initialized bias vectors.
    """
    if init_method not in ["random", "xavier"]:
        raise ValueError(f"Invalid initialization method: {init_method}")

    weights = []
    biases = []

    for i in range(len(layer_sizes) - 1):
        if init_method == "random":
            W = random_init(layer_sizes[i], layer_sizes[i + 1])
        elif init_method == "xavier":
            W = xavier_init(layer_sizes[i], layer_sizes[i + 1])
        
        b = np.zeros((1, layer_sizes[i + 1]))
        weights.append(W)
        biases.append(b)

    return weights, biases

# Initialize weights and biases with default random initialization
weights, biases = initialize_weights_and_biases(layer_sizes)

# Activation Functions
def sigmoid(Z):
    return 1 / (1 + np.exp(-Z))

def identity(Z):
    return Z

def tanh(Z):
    return np.tanh(Z)

def relu(Z):
    return np.maximum(0, Z)

# Derivatives of Activation Functions
def d_identity(x):
    return 1

def d_relu(x):
    return (x > 0).astype(int)  # Vectorized ReLU derivative

def d_sigmoid(z):
    return sigmoid(z) * (1 - sigmoid(z))

def d_tanh(x):
    return 1 - np.square(np.tanh(x))

# Softmax Function
def softmax(Z):
    expZ = np.exp(Z)
    return expZ / np.sum(expZ, axis=1, keepdims=True)
# Dictionary to map function names to implementations
activation_functions = {
    "identity": identity,
    "sigmoid": sigmoid,
    "tanh": tanh,
    "relu": relu
}
activation_func = activation_functions
# Forward Propagation with selectable activation function
def forward_propagation(X, weights, biases, activation="sigmoid"):
    if activation not in activation_functions:
        raise ValueError(f"Invalid activation function: {activation}")

    activation_func = activation_functions[activation]

    h = [X]  # Store all activation outputs
    a = []   # Store all weighted sums

    for i in range(len(weights) - 1):  
        a_i = np.dot(h[-1], weights[i]) + biases[i]
        a.append(a_i)
        h_i = activation_func(a_i)  
        h.append(h_i)

    # Output Layer (always using softmax)
    a_final = np.dot(h[-1], weights[-1]) + biases[-1]
    a.append(a_final)
    h_final = softmax(a_final)  
    h.append(h_final)

    return h, a  
# Forward Propagation for Training Data
yhat, a_values = forward_propagation(x_train, weights, biases)

# Prediction
def predict(X, weights, biases):
    _, _ = forward_propagation(X, weights, biases)
    # Since forward_propagation returns lists, we need to get the last element of h (which is the output)
    # However, the last element of h is already the softmax output, so we directly use it
    h_values, _ = forward_propagation(X, weights, biases)
    return np.argmax(h_values[-1], axis=1)

# Accuracy Calculation
def compute_accuracy(y_true, y_pred):
    return np.mean(y_true == y_pred) * 100  

# Apply Model on Test Data (Random Weights)
y_pred = predict(x_test, weights, biases)

# Compute Accuracy
accuracy = compute_accuracy(y_test, y_pred)
print(f"Test Accuracy (Random Weights): {accuracy:.2f}%")

# Forward Propagation for Training Data
h_values, a_values = forward_propagation(x_train, weights, biases, activation="relu")

Test Accuracy (Random Weights): 10.00%
