## 📦 1. Import and setup

We import required packages and set the global parameters.


In [1]:
import numpy as np
import h5py
import matplotlib.pyplot as plt

from dnn_utils import sigmoid, sigmoid_backward, relu, relu_backward

import copy

plt.rcParams['figure.figsize'] = (5.0, 4.0)
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'
np.random.seed(1)


## ⚙️ 2. Initialize parameters for a 2-layer network

We randomly initialize weights and set biases to zero for a simple 2-layer neural network.


In [2]:
def initialize_parameters(n_x, n_h, n_y):
    np.random.seed(1)
    W1 = np.random.randn(n_h, n_x) * 0.01
    b1 = np.zeros((n_h, 1))
    W2 = np.random.randn(n_y, n_h) * 0.01
    b2 = np.zeros((n_y, 1))
    
    return {"W1": W1, "b1": b1, "W2": W2, "b2": b2}

parameters = initialize_parameters(4, 3, 2)
for k, v in parameters.items():
    print(f"{k} =\n{v}")


W1 =
[[ 0.01624345 -0.00611756 -0.00528172 -0.01072969]
 [ 0.00865408 -0.02301539  0.01744812 -0.00761207]
 [ 0.00319039 -0.0024937   0.01462108 -0.02060141]]
b1 =
[[0.]
 [0.]
 [0.]]
W2 =
[[-0.00322417 -0.00384054  0.01133769]
 [-0.01099891 -0.00172428 -0.00877858]]
b2 =
[[0.]
 [0.]]


## 🔁 3. Initialize deep network

We generalize parameter initialization to any number of layers using `layer_dims`.


In [3]:
def initialize_parameters_deep(layer_dims):
    np.random.seed(3)
    parameters = {}
    L = len(layer_dims)
    for l in range(1, L):
        parameters['W' + str(l)] = np.random.randn(layer_dims[l], layer_dims[l-1]) * 0.01
        parameters['b' + str(l)] = np.zeros((layer_dims[l], 1))
    return parameters

parameters = initialize_parameters_deep([5, 4, 3])
for k, v in parameters.items():
    print(f"{k} =\n{v}")


W1 =
[[ 0.01788628  0.0043651   0.00096497 -0.01863493 -0.00277388]
 [-0.00354759 -0.00082741 -0.00627001 -0.00043818 -0.00477218]
 [-0.01313865  0.00884622  0.00881318  0.01709573  0.00050034]
 [-0.00404677 -0.0054536  -0.01546477  0.00982367 -0.01101068]]
b1 =
[[0.]
 [0.]
 [0.]
 [0.]]
W2 =
[[-0.01185047 -0.0020565   0.01486148  0.00236716]
 [-0.01023785 -0.00712993  0.00625245 -0.00160513]
 [-0.00768836 -0.00230031  0.00745056  0.01976111]]
b2 =
[[0.]
 [0.]
 [0.]]


## ➕ 4. Linear forward

Computes \( Z = W dot A + b \) for one layer.


In [4]:
def linear_forward(A, W, b):
    Z = np.dot(W, A) + b
    cache = (A, W, b)
    return Z, cache


## ⚡ 5. Linear + Activation forward

Applies activation (ReLU or Sigmoid) after linear forward.


In [5]:
def linear_activation_forward(A_prev, W, b, activation):
    Z, linear_cache = np.dot(W, A_prev) + b, (A_prev, W, b)
    if activation == "sigmoid":
        A = 1 / (1 + np.exp(-Z))
    elif activation == "relu":
        A = np.maximum(0, Z)
    activation_cache = Z
    return A, (linear_cache, activation_cache)


## 🔄 6. Model forward

Computes forward propagation for the full L-layer model using ReLU for hidden layers and sigmoid for output.


In [6]:
def L_model_forward(X, parameters):
    caches = []
    A = X
    L = len(parameters) // 2
    for l in range(1, L):
        A_prev = A
        A, cache = linear_activation_forward(A_prev, parameters["W" + str(l)], parameters["b" + str(l)], "relu")
        caches.append(cache)
    AL, cache = linear_activation_forward(A, parameters["W" + str(L)], parameters["b" + str(L)], "sigmoid")
    caches.append(cache)
    return AL, caches


## 📉 7. Compute cost

Applies cross-entropy cost function between predicted `AL` and true labels `Y`.


In [7]:
def compute_cost(AL, Y):
    m = Y.shape[1]
    cost = -1/m * np.sum(Y*np.log(AL) + (1 - Y)*np.log(1 - AL))
    return np.squeeze(cost)


## 🔙 8. Linear backward

Computes gradients of cost w.r.t. \( W, b \) and the previous layer's activations.


In [8]:
def linear_backward(dZ, cache):
    A_prev, W, b = cache
    m = A_prev.shape[1]
    dW = (1/m) * np.dot(dZ, A_prev.T)
    db = (1/m) * np.sum(dZ, axis=1, keepdims=True)
    dA_prev = np.dot(W.T, dZ)
    return dA_prev, dW, db


## 🔁 9. Linear + Activation backward

Computes backward pass for a layer with ReLU or Sigmoid activation.


In [9]:
def linear_activation_backward(dA, cache, activation):
    linear_cache, activation_cache = cache
    if activation == "relu":
        dZ = np.array(dA, copy=True)
        dZ[activation_cache <= 0] = 0
    elif activation == "sigmoid":
        s = 1 / (1 + np.exp(-activation_cache))
        dZ = dA * s * (1 - s)
    return linear_backward(dZ, linear_cache)


## 🧠 10. Model backward

Applies backpropagation through the full network, layer by layer.


In [10]:
def L_model_backward(AL, Y, caches):
    grads = {}
    L = len(caches)
    Y = Y.reshape(AL.shape)
    dAL = - (Y/AL - (1 - Y)/(1 - AL))
    current_cache = caches[L - 1]
    grads["dA" + str(L - 1)], grads["dW" + str(L)], grads["db" + str(L)] = \
        linear_activation_backward(dAL, current_cache, "sigmoid")

    for l in reversed(range(L - 1)):
        current_cache = caches[l]
        dA_prev_temp, dW_temp, db_temp = \
            linear_activation_backward(grads["dA" + str(l + 1)], current_cache, "relu")
        grads["dA" + str(l)] = dA_prev_temp
        grads["dW" + str(l + 1)] = dW_temp
        grads["db" + str(l + 1)] = db_temp
    return grads


## 🔧 11. Update parameters

Updates weights and biases using gradient descent.


In [11]:
def update_parameters(params, grads, learning_rate):
    parameters = copy.deepcopy(params)
    L = len(parameters) // 2
    for l in range(L):
        parameters["W" + str(l + 1)] -= learning_rate * grads["dW" + str(l + 1)]
        parameters["b" + str(l + 1)] -= learning_rate * grads["db" + str(l + 1)]
    return parameters
