# Building and Training a Deep Neural Network

### Importing Libraries

In [1]:
import numpy as np

### Define functions

In [2]:
# Define the sigmoid activation function
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

# Define the derivative of sigmoid function
def sigmoid_derivative(z):
    return z * (1 - z)

### Initialize parameters (weights and biases) for a single layer

- This function initializes the weights (W) and biases (b) for each layer of a neural network. 
- It takes a list layer_dims that specifies the number of units in each layer. 
- It uses random initialization for weights and sets biases to zero.

In [3]:
def initialize_parameters(layer_dims):
    np.random.seed(0)
    parameters = {}
    L = len(layer_dims) - 1  # Number of layers (excluding input layer)
    
    for l in range(1, L + 1):
        parameters[f'W{l}'] = np.random.randn(layer_dims[l], layer_dims[l-1]) * 0.01
        parameters[f'b{l}'] = np.zeros((layer_dims[l], 1))
    
    return parameters

### Forward propagation for a single layer

- This function computes the forward propagation for a single layer of the neural network. It takes the previous layer's activation A_prev, the weights W, biases b, and an activation function (sigmoid or relu).
- It computes the linear transformation Z, applies the specified activation function, and returns the activation A along with a cache containing intermediate values for later use in backward propagation.

In [4]:
def forward_propagation(A_prev, W, b, activation):
    Z = np.dot(W, A_prev) + b
    if activation == "sigmoid":
        A = sigmoid(Z)
    elif activation == "relu":
        A = np.maximum(0, Z)
    
    cache = (A_prev, W, b, Z)
    
    return A, cache

### Forward propagation for the entire deep neural network

- This function performs forward propagation for the entire deep neural network. 
- It takes the input X and the neural network's parameters, including weights and biases.
- It iteratively computes forward propagation for each hidden layer using the forward_propagation function and stores intermediate values in the caches. The final output AL is the output of the last layer.

In [5]:
def deep_forward_propagation(X, parameters):
    caches = []
    A = X
    L = len(parameters) // 2  # Number of layers in the neural network
    
    for l in range(1, L):
        A_prev = A
        A, cache = forward_propagation(A_prev, parameters[f'W{l}'], parameters[f'b{l}'], activation="relu")
        caches.append(cache)
    
    AL, cache = forward_propagation(A, parameters[f'W{L}'], parameters[f'b{L}'], activation="sigmoid")
    caches.append(cache)
    
    return AL, caches

### Compute the cross-entropy loss

- This function computes the cross-entropy loss between the predicted AL and the true labels Y for binary classification.

In [6]:
def compute_loss(AL, Y):
    m = Y.shape[1]
    loss = -1/m * np.sum(Y * np.log(AL) + (1 - Y) * np.log(1 - AL))
    return loss

### Backward propagation for a single layer

- This function computes backward propagation for a single layer. 
- It takes the gradient dA from the next layer, the cache containing intermediate values, and the activation function used in that layer.
- It computes the gradients with respect to the parameters (dW and db) and the gradient with respect to the previous layer's activations dA_prev.

In [7]:
def backward_propagation(dA, cache, activation):
    A_prev, W, b, Z = cache
    m = A_prev.shape[1]
    
    if activation == "sigmoid":
        dZ = dA * sigmoid_derivative(Z)
    elif activation == "relu":
        dZ = dA * (Z > 0)
    
    dW = 1/m * np.dot(dZ, A_prev.T)
    db = 1/m * np.sum(dZ, axis=1, keepdims=True)
    dA_prev = np.dot(W.T, dZ)
    
    return dA_prev, dW, db


### Backward propagation for the entire deep neural network

- This function performs backward propagation for the entire deep neural network. 
- It takes the final output AL, true labels Y, and the caches containing intermediate values from forward propagation.
- It computes gradients for each layer using the backward_propagation function and stores them in the grads dictionary.

In [8]:
def deep_backward_propagation(AL, Y, caches):
    grads = {}
    L = len(caches)
    m = AL.shape[1]
    Y = Y.reshape(AL.shape)
    
    dAL = -(np.divide(Y, AL) - np.divide(1 - Y, 1 - AL))
    
    current_cache = caches[L - 1]
    grads[f'dA{L-1}'], grads[f'dW{L}'], grads[f'db{L}'] = backward_propagation(dAL, current_cache, activation="sigmoid")
    
    for l in reversed(range(L-1)):
        current_cache = caches[l]
        grads[f'dA{l}'], grads[f'dW{l+1}'], grads[f'db{l+1}'] = backward_propagation(grads[f'dA{l+1}'], current_cache, activation="relu")
    
    return grads

### Update parameters using gradient descent

- This function updates the neural network's parameters (weights and biases) using gradient descent. 
- It takes the current parameters, gradients grads, and a learning rate.

In [9]:
def update_parameters(parameters, grads, learning_rate):
    L = len(parameters) // 2
    
    for l in range(1, L + 1):
        parameters[f'W{l}'] -= learning_rate * grads[f'dW{l}']
        parameters[f'b{l}'] -= learning_rate * grads[f'db{l}']
    
    return parameters

### Build and train a deep neural network

- This function builds and trains a deep neural network. 
- It takes input data X, true labels Y, the layer dimensions specified in layer_dims, learning rate, and the number of training iterations.
- It initializes parameters, performs forward and backward propagation, updates parameters iteratively, and prints the loss during training.

In [10]:
def deep_neural_network(X, Y, layer_dims, learning_rate, num_iterations):
    parameters = initialize_parameters(layer_dims)
    
    for i in range(num_iterations):
        AL, caches = deep_forward_propagation(X, parameters)
        loss = compute_loss(AL, Y)
        grads = deep_backward_propagation(AL, Y, caches)
        parameters = update_parameters(parameters, grads, learning_rate)
        
        if i % 100 == 0:
            print(f"Iteration {i}: Loss = {loss:.4f}")
    
    return parameters


### Make predictions using the trained model

- This function makes predictions using the trained neural network. 
- It takes the parameters and input data X.
- It applies the sigmoid activation function to the final output AL and converts the probabilities to binary predictions.

In [11]:
def predict(parameters, X):
    AL, _ = deep_forward_propagation(X, parameters)
    predictions = (AL > 0.5).astype(int)
    return predictions

### Example usage:

- This section provides an example of how to use the functions defined earlier to build, train, and make predictions using a deep neural network. 
- It generates sample data, specifies the network architecture, sets hyperparameters, trains the network, and makes predictions on new data.

In [12]:
# Generate sample data
np.random.seed(1)
X = np.random.randn(2, 400)
Y = ((X[0, :] ** 2 + X[1, :] ** 2) < 1).astype(int)

# Reshape Y to be a 1D array
Y = Y.reshape(1, -1)

# Define the architecture of the deep neural network
layer_dims = [2, 5, 5, 1]  # Input layer, hidden layers, output layer (2-5-5-1 architecture)

# Set hyperparameters
learning_rate = 0.1
num_iterations = 2000

# Train the deep neural network
trained_parameters = deep_neural_network(X, Y, layer_dims, learning_rate, num_iterations)

# Make predictions on new data
new_data = np.array([[-0.8, -0.8], [0.8, 0.8]])
predictions = predict(trained_parameters, new_data)

print("Predictions for new data:")
print(predictions)

Iteration 0: Loss = 0.6931
Iteration 100: Loss = 0.6931
Iteration 200: Loss = 0.6931
Iteration 300: Loss = 0.6931
Iteration 400: Loss = 0.6931
Iteration 500: Loss = 0.6931
Iteration 600: Loss = 0.6931
Iteration 700: Loss = 0.6931
Iteration 800: Loss = 0.6931
Iteration 900: Loss = 0.6931
Iteration 1000: Loss = 0.6931
Iteration 1100: Loss = 0.6931
Iteration 1200: Loss = 0.6931
Iteration 1300: Loss = 0.6931
Iteration 1400: Loss = 0.6931
Iteration 1500: Loss = 0.6931
Iteration 1600: Loss = 0.6931
Iteration 1700: Loss = 0.6931
Iteration 1800: Loss = 0.6931
Iteration 1900: Loss = 0.6931
Predictions for new data:
[[0 0]]


- In this case, new_data contains two data points:
- [-0.8, -0.8] is predicted as 0, indicating a negative class.
- [0.8, 0.8] is predicted as 1, indicating a positive class.
- These predictions are based on the learned model's decision boundary, where values closer to 0 are classified as the negative class, and values closer to 1 are classified as the positive class.