In [1]:
import copy

import matplotlib.pyplot as plt
import numpy as np

In [2]:
def initialize_parameters(layer_dims, init_method="xavier"):
    """
    Initializes the parameters of the neural network.

    Arguments:
    layer_dims -- list containing the dimensions of each layer (including input layer)
    init_method -- string specifying the initialization method,
                   can be "random", "xavier" or "he"

    Returns:
    parameters -- dictionary containing the initialized parameters
    """
    np.random.seed(1)
    parameters = {}
    L = len(layer_dims)  # no. of layers + 1 (1 for input layer)
    # Loop from l = 1 to L-1
    for l in range(1, L):
        if init_method == "random":
            parameters[f"W_{l}"] = (
                np.random.randn(layer_dims[l], layer_dims[l - 1]) * 0.01
            )
        elif init_method == "xavier":
            parameters[f"W_{l}"] = np.random.randn(
                layer_dims[l], layer_dims[l - 1]
            ) / np.sqrt(layer_dims[l - 1])
        elif init_method == "he":
            parameters[f"W_{l}"] = np.random.randn(
                layer_dims[l], layer_dims[l - 1]
            ) * np.sqrt(2.0 / layer_dims[l - 1])
        else:
            raise ValueError(f"Invalid initialization method: {init_method}")

        parameters[f"b_{l}"] = np.zeros((layer_dims[l], 1))
    return parameters

In [3]:
def sigmoid(Z):
    """
    Computes the sigmoid of Z element-wise.

    Arguments:
    Z -- input array

    Returns:
    A -- output of the sigmoid function
    """
    return 1 / (1 + np.exp(-Z))


def sigmoid_derivative(Z):
    """
    Computes the derivative of the sigmoid function with respect to Z.

    Arguments:
    Z -- input array

    Returns:
    dZ -- derivative of the sigmoid function
    """
    sig_Z = sigmoid(Z)
    return sig_Z * (1 - sig_Z)


def sigmoid_backward(dA, Z):
    """
    Computes the backward pass for a layer with sigmoid activation.

    Arguments:
    dA -- gradient of the cost with respect to the activated output A
    Z -- input array

    Returns:
    dZ -- gradient of the cost with respect to Z
    """
    dZ = dA * sigmoid_derivative(Z)
    return dZ


def relu(Z):
    """
    Computes the ReLU (Rectified Linear Unit) of Z element-wise.

    Arguments:
    Z -- input array

    Returns:
    A -- output of the ReLU function
    """
    return np.maximum(0, Z)


def relu_derivative(z):
    """
    Computes the derivative of the ReLU function with respect to Z.

    Arguments:
    Z -- input array

    Returns:
    dZ -- derivative of the ReLU function
    """
    return np.where(z <= 0, 0, 1)


def relu_backward(dA, Z):
    """
    Computes the backward pass for a layer with ReLU activation.

    Arguments:
    dA -- gradient of the cost with respect to the activated output A
    Z -- input array

    Returns:
    dZ -- gradient of the cost with respect to Z
    """
    dZ = dA * relu_derivative(Z)
    return dZ


def leaky_relu(Z, alpha=0.01):
    """
    Computes the Leaky ReLU of Z element-wise.

    Arguments:
    Z -- input array
    alpha -- slope of the negative part (default is 0.01)

    Returns:
    A -- output of the Leaky ReLU function
    """
    return np.where(Z > 0, Z, alpha * Z)


def leaky_relu_derivative(Z, alpha=0.01):
    """
    Computes the derivative of the Leaky ReLU function with respect to Z.

    Arguments:
    Z -- input array
    alpha -- slope of the negative part (default is 0.01)

    Returns:
    dZ -- gradient of the Leaky ReLU function
    """
    return np.where(Z > 0, 1, alpha)


def leaky_relu_backward(dA, Z, alpha=0.01):
    """
    Computes the backward pass for a layer with Leaky ReLU activation.

    Arguments:
    dA -- gradient of the cost with respect to the activated output A
    Z -- input array
    alpha -- slope of the negative part (default is 0.01)

    Returns:
    dZ -- gradient of the cost with respect to Z
    """
    return np.where(Z > 0, dA, alpha * dA)


def tanh(Z):
    """
    Computes the hyperbolic tangent of Z element-wise.

    Arguments:
    Z -- input array

    Returns:
    A -- output of the tanh function
    """
    return np.tanh(Z)


def tanh_derivative(Z):
    """
    Computes the derivative of the tanh function with respect to Z.

    Arguments:
    Z -- input array

    Returns:
    dZ -- derivative of the tanh function
    """
    tanh_Z = tanh(Z)
    return 1 - np.square(tanh_Z)


def tanh_backward(dA, Z):
    """
    Computes the backward pass for a layer with tanh activation.

    Arguments:
    dA -- gradient of the cost with respect to the activated output A
    Z -- input array

    Returns:
    dZ -- gradient of the cost with respect to Z
    """
    dZ = dA * (1 - np.square(tanh(Z)))
    return dZ

In [4]:
def linear_forward(A_prev, W, b, activation):
    """
    Computes the linear part of a layer's forward propagation.

    Arguments:
    A_prev -- activations from previous layer (or input data), shape (size of previous layer, number of examples)
    W -- weights matrix, shape (size of current layer, size of previous layer)
    b -- bias vector, shape (size of current layer, 1)
    activation -- string representing the activation function to use in this layer:
                    "sigmoid", "tanh", "relu", or "leaky_relu"

    Returns:
    A -- the output of the activation function (post-activation value)
    cache -- tuple containing "A_prev", "Z" (linear part), "W", and "b"
    """
    Z = np.dot(W, A_prev) + b

    if activation == "sigmoid":
        A = sigmoid(Z)
    elif activation == "tanh":
        A = tanh(Z)
    elif activation == "relu":
        A = relu(Z)
    elif activation == "leaky_relu":
        A = leaky_relu(Z)
    else:
        raise ValueError(f"Invalid activation: {activation}")

    cache = (A_prev, Z, W, b)
    return A, cache

In [5]:
def forward_prop(
    X, parameters, last_activation="sigmoid", hidden_activation="relu", keep_probs=None
):
    """
    Forward propagation for the deep neural network.

    Arguments:
    X -- input data, shape (input size, number of examples)
    parameters -- dictionary containing the parameters (output of initialization function)
    last_activation -- string representing the activation function to use in the last layer (default is "sigmoid")
    hidden_activation -- string representing the activation function to use in the hidden layers (default is "relu")
    keep_probs -- list of probabilities for dropout (optional)

    Returns:
    A_L -- the output of the last activation (post-activation value)
    caches -- list of caches containing:
                every cache of linear_forward() (there are L-1 of them, indexed from 0 to L-2)
    """
    np.random.seed(1)
    caches = []
    A = X
    L = len(parameters) // 2  # number of layers in the neural network
    # Loop from l = 1 to L-1 (with activation == 'relu')
    for l in range(1, L):
        A_prev = A
        A, cache = linear_forward(
            A_prev,
            parameters[f"W_{l}"],
            parameters[f"b_{l}"],
            activation=hidden_activation,
        )
        # Apply dropout
        if keep_probs is not None:
            D = np.random.rand(A.shape[0], A.shape[1]) < keep_probs[l - 1]
            A = np.multiply(A, D)
            A /= keep_probs[l - 1]
        caches.append(cache)
    # for l = L (with activation == 'sigmoid')
    A_L, cache = linear_forward(
        A, parameters[f"W_{L}"], parameters[f"b_{L}"], activation=last_activation
    )
    caches.append(cache)
    return A_L, caches

In [6]:
def get_cost(A_L, Y, regularization=None, lambd1=0, lambd2=0):
    """
    Computes the cross-entropy cost for binary classification with optional regularization.

    Arguments:
    A_L -- probability vector corresponding to the label predictions, shape (1, number of examples)
    Y -- true "label" vector (1 for cat, 0 for non-cat), shape (1, number of examples)
    regularization -- type of regularization: None, "L1", "L2", "L1_L2"
    lambd1 -- L1 regularization hyperparameter
    lambd2 -- L2 regularization hyperparameter

    Returns:
    cost -- cross-entropy cost with optional regularization
    """
    epsilon = 1e-8  # Small value to prevent log(0)
    m = Y.shape[1]
    cross_entropy_cost = (
        -np.sum(Y * np.log(A_L + epsilon) + (1 - Y) * (np.log(1 - A_L + epsilon))) / m
    )
    if regularization is None:
        cost = cross_entropy_cost
    elif "L1" in regularization:
        L1_regularization = (lambd1 / (2 * m)) * np.sum(
            np.abs([parameters[f"W{l}"] for l in range(1, L + 1)])
        )
        cost = cross_entropy_cost + L1_regularization
    elif "L2" in regularization:
        L2_regularization = (lambd2 / (2 * m)) * np.sum(
            np.square([parameters[f"W{l}"] for l in range(1, L + 1)])
        )
        cost = cross_entropy_cost + L2_regularization
    return cost

In [7]:
def linear_backward(dA, A_prev, Z, W, b, activation):
    """
    Implements the linear portion of backward propagation for one layer.

    Arguments:
    dA -- gradient of the cost with respect to the activation of the current layer
    A_prev -- activations from previous layer (or input data), shape (size of previous layer, number of examples)
    Z -- the linear part of the layer's forward propagation
    W -- weights matrix, shape (size of current layer, size of previous layer)
    b -- bias vector, shape (size of current layer, 1)
    activation -- string representing the activation function used in this layer:
                    "sigmoid", "tanh", "relu", or "leaky_relu"

    Returns:
    dA_prev -- gradient of the cost with respect to the activation of the previous layer
    dW -- gradient of the cost with respect to W (current layer's weights)
    db -- gradient of the cost with respect to b (current layer's bias)
    """
    if activation == "sigmoid":
        dZ = sigmoid_backward(dA, Z)
    elif activation == "tanh":
        dZ = tanh_backward(dA, Z)
    elif activation == "relu":
        dZ = relu_backward(dA, Z)
    elif activation == "leaky_relu":
        dZ = leaky_relu_backward(dA, Z)
    else:
        raise ValueError(f"Activation {activation} not supported")

    m = A_prev.shape[1]
    dA_prev = np.dot(W.T, dZ)
    dW = np.dot(dZ, A_prev.T) / m
    db = np.sum(dZ, axis=1, keepdims=True) / m
    return dA_prev, dW, db

In [8]:
def backward_prop(
    A_L,
    Y,
    caches,
    last_activation="sigmoid",
    hidden_activation="relu",
    regularization=None,
    lambd1=0,
    lambd2=0,
    keep_probs = None,
):
    """
    Implements the backward propagation for the entire neural network.

    Arguments:
    A_L -- the output of the last activation (post-activation value)
    Y -- true "label" vector (containing 0 if non-cat, 1 if cat)
    caches -- list of caches containing:
                every cache of linear_forward() with "hidden_activation" (there are L-1 of them, indexed from 0 to L-2)
                the cache of linear_forward() with "last_activation" (at index L-1)
    last_activation -- string representing the activation function used in the last layer (default is "sigmoid")
    hidden_activation -- string representing the activation function used in the hidden layers (default is "relu")
    hidden_activation -- the activation for the hidden layers, a string: "sigmoid", "tanh", "relu", "leaky_relu"
    regularization -- type of regularization: None, "L1", "L2", "L1_L2"
    lambd1 -- L1 regularization hyperparameter
    lambd2 -- L2 regularization hyperparameter
    keep_probs -- 

    Returns:
    grads -- dictionary containing the gradients with respect to each parameter
             grads["dA_L"] = ...
             grads["dW_L"] = ...
             grads["db_L"] = ...
             ...
             grads["dW_1"] = ...
             grads["db_1"] = ...
    """
    epsilon = 1e-8  # Small value to prevent division by zero
    grads = {}
    L = len(caches)  # the number of layers
    Y = Y.reshape(A_L.shape)  # after this line, Y is the same shape as A_L
    dA_L = -np.divide(Y, A_L + epsilon) + np.divide(1 - Y, 1 - A_L + epsilon)
    # loop from l = L to 1
    for l in reversed(range(1, L + 1)):
        A_prev, Z, W, b = caches[l - 1]
        if l == L:
            dA = dA_L
        else:
            dA = dA_prev
        dA_prev, dW, db = linear_backward(dA, A_prev, Z, W, b, activation=last_activation if l == L else hidden_activation)

        # Apply dropout if keep_probs is provided and not for the last layer
        if keep_probs is not None and l > 1:
            cache, D = caches[l - 1]
            dA_prev = np.multiply(dA_prev, D)
            dA_prev /= keep_probs[l - 2]  # Scaling
            
        grads[f"dW_{l}"] = dW
        grads[f"db_{l}"] = db

        # Regularization gradient
        if regularization is None:
            pass
        elif "L1" in regularization:
            grads[f"dW{l}"] += (lambd1 / m) * np.sign(grads[f"dW{l}"])
        elif "L2" in regularization:
            grads[f"dW{l}"] += (lambd2 / m) * parameters[f"W{l}"]
    return grads

In [9]:
def update_parameters(parameters, grads, learning_rate):
    """
    Update parameters using gradient descent.

    Arguments:
    parameters -- dictionary containing parameters:
                    parameters['W_l'] = weight matrix of shape (size of current layer, size of previous layer)
                    parameters['b_l'] = bias vector of shape (size of current layer, 1)
    grads -- dictionary containing gradients:
                    grads['dW_l'] = gradient of the cost with respect to weight matrix W for layer l
                    grads['db_l'] = gradient of the cost with respect to bias vector b for layer l
    learning_rate -- learning rate for gradient descent update

    Returns:
    parameters -- dictionary containing updated parameters
    """
    parameters = copy.deepcopy(parameters)
    L = len(parameters) // 2  # number of layers in the neural network
    for l in range(1, L + 1):
        parameters[f"W_{l}"] -= learning_rate * grads[f"dW_{l}"]
        parameters[f"b_{l}"] -= learning_rate * grads[f"db_{l}"]
    return parameters

In [10]:
def model(
    X,
    Y,
    layers_dims,
    optimizer="adam",
    learning_rate=0.0075,
    n_iters=2500,
    regularization=None,
    lambd1=0, 
    lambd2=0,
    keep_probs=None,
    print_cost=False,
    print_cost_iters=100,
):
    """
    Implements a deep neural network model.

    Arguments:
    X -- input data of shape (input size, number of examples)
    Y -- true "label" vector (1 for blue dot / 0 for red dot), shape (1, number of examples)
    layer_dims -- list containing the input size and each layer size, length is the number of layers + 1
    optimizer -- optimization algorithm: "gd", "adam"
    learning_rate -- learning rate of the optimization
    n_iters -- number of iterations
    #lambd -- regularization hyperparameter
    keep_probs -- list of probabilities for dropout for each layer, if None, dropout is not used
    print_cost -- True to print the cost every print_cost_iters iterations
    print_cost_iters -- number of iterations between printing the cost

    Returns:
    parameters -- final parameters learned by the model
    costs -- list of costs over training
    """
    costs = []  # keep track of cost
    parameters = initialize_parameters(layers_dims)
    for i in range(1, n_iters + 1):
        A_L, caches = forward_prop(X, parameters)
        cost = get_cost(A_L, Y, regularization=None, lambd1=0, lambd2=0)
        grads = backward_prop(A_L, Y, caches, regularization=regularization, lambd1=lambd1, lambd2=lambd2, keep_probs=keep_probs)
        parameters = update_parameters(parameters, grads, learning_rate)
        if i % 100 == 0 or i == n_iters:
            costs.append(cost)
        if print_cost and (i % print_cost_iters == 0 or i == n_iters):
            print(f"Cost after iteration {i}: {cost}")
    return parameters, costs

In [11]:
def predict(X, parameters):
    """
    Predicts the labels for given input data using the learned parameters.

    Arguments:
    X -- input data of shape (input size, number of examples)
    parameters -- dictionary containing the learned parameters

    Returns:
    predictions -- array of predictions (0 or 1)
    """
    A_L, _ = forward_prop(X, parameters)
    predictions = (A_L > 0.5).astype(int)
    return predictions


def accuracy(Y_true, Y_pred):
    """
    Calculates the accuracy of the predictions.

    Arguments:
    Y_true -- true labels (ground truth)
    Y_pred -- predicted labels

    Returns:
    accuracy -- percentage of correct predictions
    """
    accuracy = np.mean(Y_pred == Y_true) * 100
    return accuracy

def plot_costs(costs):
    """
    Plots the cost function over iterations.

    Arguments:
    costs -- list of costs over iterations

    Returns:
    None
    """
    plt.figure(figsize=(8, 6))  # Set the size of the figure
    plt.plot(np.squeeze(costs))
    plt.ylabel("Cost")
    plt.xlabel("Iterations (per hundreds)")
    plt.title("Cost vs. Iterations")
    plt.show()

In [12]:
def print_mislabeled_images(classes, X, y, p):
    """
    Prints mislabeled images along with their predictions and true labels.

    Arguments:
    classes -- array of class labels
    X -- input data of shape (features, number of examples)
    y -- true labels of shape (1, number of examples)
    p -- predicted labels of shape (1, number of examples)

    Returns:
    None
    """
    # Calculate indices of mislabeled images
    a = p + y
    mislabeled_indices = np.asarray(np.where(a == 1))
    
    num_images = len(mislabeled_indices[0])

    # Display mislabeled images with their predictions and true labels
    for i in range(num_images):
        index = mislabeled_indices[1][i]
        
        # Configure subplot
        plt.subplot(2, num_images, i + 1)
                
        # Display image with increased height and width
        plt.imshow(X[:, index].reshape(64, 64, 3), interpolation="nearest")
        plt.axis("off")
        
        # Set title
        plt.title(
            f"Pred.: {classes[int(p[0,index])].decode('utf-8')}\n Class: {classes[y[0,index]].decode('utf-8')}",
            fontsize = 5
        )

In [13]:
def preprocess_image(image_path, image_size):
    """
    Preprocesses an image for classification.

    Parameters:
    image_path (str): The path to the image file.
    image_size (tuple): The target size of the image after resizing.

    Returns:
    numpy.ndarray: The preprocessed image as a flattened array.
    """
    img = Image.open(image_path)
    img = img.resize(image_size)
    img_array = np.array(img)
    img_flattened = img_array.reshape((1, -1)).T / 255.0
    return img_flattened

def show_image(image_path):
    """
    Displays an image.

    Parameters:
    image_path (str): The path to the image file.
    """
    img = Image.open(image_path)
    plt.figure(figsize=(2, 2))
    plt.imshow(img)
    plt.axis("off")
    plt.show()

def classify_image(image_path, model_parameters, image_size=(64, 64)):
    """
    Classifies an image as "Cat" or "Not Cat".

    Parameters:
    image_path (str): The path to the image file.
    model_parameters (dict): Parameters of the trained model.
    image_size (tuple): The target size of the image after resizing.

    Returns:
    str: The predicted class label ("Cat" or "Not Cat").
    """
    show_image(image_path)
    img = preprocess_image(image_path, image_size)
    prediction = predict(img, model_parameters)
    if prediction == 1:
        return "Cat"
    else:
        return "Not Cat"

## Authors

Alok Ranjan

[GitHub Profile](https://www.linkedin.com/in/alokranjan-in/)

 [LinkedIn Profile](https://github.com/AlokRanjanIN)