##Mastering Deep Learning
###understanding a basic perceptron with both forward and backward propagation

##Forward propagation:



In [None]:
import numpy as np

X = np.array([[0.5], [0.8]])  # 2 input features
y = 1  #target variables

In [None]:
np.random.seed(1)

W1 = np.random.randn(2, 2)   # weights for input → hidden layer (2x2)
b1 = np.random.randn(2, 1)   # biases for hidden layer (2x1)

W2 = np.random.randn(1, 2)   # weights for hidden → output (1x2)
b2 = np.random.randn(1, 1)

###Activation functions

In [None]:
def relu(z):
    return np.maximum(0, z)

def sigmoid(z):
    return 1 / (1 + np.exp(-z))

###We use different activation functions for hidden and output layers

In [None]:
# Hidden Layer
Z1 = np.dot(W1, X) + b1
A1 = relu(Z1)

# Output Layer
Z2 = np.dot(W2, A1) + b2
A2 = sigmoid(Z2)

In [None]:
print("Output (A2):", A2)

Output (A2): [[0.92135202]]


###the output that we get is using random weights so we have to check how close it is to the actual value, we can use a loss function (Log loss in this case)

In [None]:
def binary_cross_entropy(y_true, y_pred):
    epsilon = 1e-8  # to avoid log(0)
    return - (y_true * np.log(y_pred + epsilon) + (1 - y_true) * np.log(1 - y_pred + epsilon))

In [None]:
loss = binary_cross_entropy(y, A2)
print("Loss:", loss)

Loss: [[0.08748879]]


##Back propagation and parameter tuning using gradient discent

###we have to do back propagation for both the layer specific activation functions and update the weights

In [None]:
# Derivative of sigmoid
def sigmoid_derivative(a):
    return a * (1 - a)

# Derivative of ReLU
def relu_derivative(z):
    return (z > 0).astype(float)

In [None]:
# Step 1: Output layer
dZ2 = A2 - y
dW2 = np.dot(dZ2, A1.T)
db2 = dZ2

# Step 2: Hidden layer
dA1 = np.dot(W2.T, dZ2)
dZ1 = dA1 * relu_derivative(Z1)
dW1 = np.dot(dZ1, X.T)
db1 = dZ1

In [None]:
learning_rate = 0.1

W2 = W2 - learning_rate * dW2
b2 = b2 - learning_rate * db2

W1 = W1 - learning_rate * dW1
b1 = b1 - learning_rate * db1

#Building a 2 layered multi epoch neural network on realtime dataset (Breast cancer pediction)

In [None]:
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load data
data = load_breast_cancer()
X = data.data     # features
y = data.target   # labels (0 = benign, 1 = malignant)

# Normalize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Reshape y to (n_samples, 1)
y = y.reshape(-1, 1)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

###always we use random weights and bias!!

In [None]:
def initialize_parameters(input_size, hidden_size, output_size):
    np.random.seed(42)
    W1 = np.random.randn(hidden_size, input_size) * 0.01
    b1 = np.zeros((hidden_size, 1))
    W2 = np.random.randn(output_size, hidden_size) * 0.01
    b2 = np.zeros((output_size, 1))
    return W1, b1, W2, b2

In [None]:
def relu(Z):
    return np.maximum(0, Z)

def relu_derivative(Z):
    return (Z > 0).astype(float)

def sigmoid(Z):
    return 1 / (1 + np.exp(-Z))

def sigmoid_derivative(A):
    return A * (1 - A)

In [None]:
def forward_propagation(X, W1, b1, W2, b2):
    Z1 = np.dot(W1, X.T) + b1
    A1 = relu(Z1)
    Z2 = np.dot(W2, A1) + b2
    A2 = sigmoid(Z2)
    cache = (Z1, A1, Z2, A2)
    return A2, cache

In [None]:
def compute_loss(Y, A2):
    m = Y.shape[0]
    epsilon = 1e-8
    log_loss = -np.mean(Y.T * np.log(A2 + epsilon) + (1 - Y.T) * np.log(1 - A2 + epsilon))
    return log_loss

In [None]:
def backward_propagation(X, Y, cache, W2):
    Z1, A1, Z2, A2 = cache
    m = X.shape[0]

    dZ2 = A2 - Y.T
    dW2 = (1/m) * np.dot(dZ2, A1.T)

    dA1 = np.dot(W2.T, dZ2)
    dZ1 = dA1 * relu_derivative(Z1)
    dW1 = (1/m) * np.dot(dZ1, X)
    db1 = (1/m) * np.sum(dZ1, axis=1, keepdims=True)

    return dW1, db1, dW2, 1/m * np.sum(dZ2, axis=1, keepdims=True)

In [None]:
def update_parameters(W1, b1, W2, b2, dW1, db1, dW2, db2, lr):
    W1 -= lr * dW1
    b1 -= lr * db1
    W2 -= lr * dW2
    b2 -= lr * db2
    return W1, b1, W2, b2

##Actual training for 1000 epochs, each epoch has a forward and backward propagation

In [None]:
def train(X, Y, input_size=30, hidden_size=10, output_size=1, epochs=1000, lr=0.1):
    W1, b1, W2, b2 = initialize_parameters(input_size, hidden_size, output_size)

    for i in range(epochs):
        A2, cache = forward_propagation(X, W1, b1, W2, b2)
        loss = compute_loss(Y, A2)
        dW1, db1, dW2, db2 = backward_propagation(X, Y, cache, W2)
        W1, b1, W2, b2 = update_parameters(W1, b1, W2, b2, dW1, db1, dW2, db2, lr)

        if i % 100 == 0:
            print(f"Epoch {i} - Loss: {loss:.4f}")

    return W1, b1, W2, b2

In [None]:
W1, b1, W2, b2 = train(X_train, y_train, epochs=1000, lr=0.1)

Epoch 0 - Loss: 0.6932
Epoch 100 - Loss: 0.1465
Epoch 200 - Loss: 0.0753
Epoch 300 - Loss: 0.0622
Epoch 400 - Loss: 0.0560
Epoch 500 - Loss: 0.0520
Epoch 600 - Loss: 0.0492
Epoch 700 - Loss: 0.0468
Epoch 800 - Loss: 0.0448
Epoch 900 - Loss: 0.0430


##Evaluating the model

In [None]:
def predict(X, W1, b1, W2, b2):
    A2, _ = forward_propagation(X, W1, b1, W2, b2)
    return (A2 > 0.5).astype(int).T

y_pred = predict(X_test, W1, b1, W2, b2)
accuracy = np.mean(y_pred == y_test) * 100
print(f"Test Accuracy: {accuracy:.2f}%")

Test Accuracy: 99.12%
