In [1]:
import numpy as np

In [2]:
#Weights initialization
def initialize_parameters(layer_dims):
    parameters = {}
    for i in range(len(1,len(layer_dims))):
        parameters['W' + str(i)] = np.random.randn((layer_dims[i],layer_dims[i-1])) * 0.01
        parameters['b' + str(i)] = np.random.randn((layer_dims[i],1)) * 0.01
    
    return parameters

In [3]:
#Activations and their derivatives
def linear(W,x,b):
    return np.dot(W,x) + b
def sigmoid(z):
    return 1/(1+np.exp(-z))

def relu(z):
    return np.where(z > 0, z, 0)

def sigmoid_der(z):
    return sigmoid(z)*(1-sigmoid(z))

def relu_der(z):
    return np.where(z > 0, 1, 0)

def softmax(z):
    exp_z = np.exp(z - np.max(z,axis=1,keepdims=True))
    return exp_z / np.sum(exp_z,axis=1,keepdims=True)

In [4]:
#Cost function and derivative
def cross_entropy_loss(y_true,y_pred):
    n_samples = y_true.shape[0]
    y_pred = np.clip(y_pred,1e-12,1-1e-12)
    return -np.sum(y_true*np.log(y_pred))/n_samples

def cross_entropy_loss_derivative(y_true,y_pred):
    return y_pred - y_true

In [None]:
#Forward propagation single iteration:
def forward(A_prev,W,b,activation):
    Z = np.dot(W,A_prev) + b

    if activation == 'linear':
        return Z
    elif activation == 'sigmoid':
        return sigmoid(Z)
    elif activation == 'relu':
        return relu(Z)
    elif activation == 'softmax':
        return softmax(Z)


#Forward propagation along all layers once
def forward_propagation(A,parameters,activations):
    for i in range(len(activations)-1):
        parameters['A'+str(i)] = forward(A,parameters['W'+str(i+1)],parameters['b'+str(i+1)],activations[i])
    
    y_pred = parameters['A' + str(len(activations)-1)]
    return y_pred,parameters



In [None]:
# Backward propagation single iteration
def backward(dA, Z, A_prev, W, activation):
    m = A_prev.shape[1]

    if activation == 'relu':
        dZ = dA * relu_der(Z)
    elif activation == 'sigmoid':
        dZ = dA * sigmoid_der(Z)
    elif activation == 'linear':
        dZ = dA

    dW = np.dot(dZ, A_prev.T) / m
    db = np.sum(dZ, axis=1, keepdims=True) / m
    dA_prev = np.dot(W.T, dZ)

    return dA_prev, dW, db

# Backward propagation along all layers
def backward_propagation(y_true, y_pred, parameters, activations):
    grads = {}
    m = y_true.shape[1]
    L = len(activations) - 1  # number of layers

    # Derivative of the loss with respect to the output (softmax layer)
    dA = cross_entropy_loss_derivative(y_true, y_pred)

    for l in reversed(range(L)):
        dA, dW, db = backward(dA, parameters['A'+str(l)], parameters['A'+str(l-1)], parameters['W'+str(l+1)], activations[l])
        grads['dW'+str(l+1)] = dW
        grads['db'+str(l+1)] = db

    return grads

#Update parameters using gradient descent
def update_parameters(parameters, grads, learning_rate):
    L = len(parameters) // 2  # number of layers in the neural network

    for l in range(L):
        parameters['W'+str(l+1)] -= learning_rate * grads['dW'+str(l+1)]
        parameters['b'+str(l+1)] -= learning_rate * grads['db'+str(l+1)]

    return parameters



In [None]:
def train(X, y, layer_dims, activations, epochs, learning_rate):
    parameters = initialize_parameters(layer_dims)

    for epoch in range(epochs):
        # Forward propagation
        y_pred, parameters = forward_propagation(X, parameters, activations)

        # Compute loss
        loss = cross_entropy_loss(y, y_pred)

        # Backward propagation
        grads = backward_propagation(y, y_pred, parameters, activations)

        # Update parameters
        parameters = update_parameters(parameters, grads, learning_rate)

        if (epoch + 1) % 100 == 0:
            print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss}")

    return parameters

In [1]:
from tensorflow.keras.datasets import mnist
# Load and preprocess the MNIST dataset
def load_mnist_data():
    # Load the dataset
    (X_train, y_train), (X_test, y_test) = mnist.load_data()
    
    # Flatten the images from 28x28 to 784 and normalize pixel values to [0, 1]
    X_train = X_train.reshape(-1, 784).T / 255.0  # Shape: (784, n_train_samples)
    X_test = X_test.reshape(-1, 784).T / 255.0    # Shape: (784, n_test_samples)
    
    # Convert labels to one-hot encoding if needed
    y_train_one_hot = np.eye(10)[y_train].T  # Shape: (10, n_train_samples)
    y_test_one_hot = np.eye(10)[y_test].T    # Shape: (10, n_test_samples)
    
    # Return data with integer labels or one-hot encoded labels
    return X_train, y_train_one_hot, X_test, y_test_one_hot

def predict(X, parameters):
    y_pred, _ = forward_propagation(X, parameters)
    predictions = np.argmax(y_pred, axis=0)
    return predictions

ModuleNotFoundError: No module named 'tensorflow'