In [3]:
# ------------------------------------------------------
# 1. Import Libraries
# ------------------------------------------------------

import numpy as np
import matplotlib.pyplot as plt

# ------------------------------------------------------
# 2. Generate Dummy Dataset (10 samples × 3 features)
# ------------------------------------------------------

np.random.seed(42)  # for reproducibility
X = np.random.rand(10, 3)  # shape: (10, 3)
print("Input Data (X):\n", X)

Input Data (X):
 [[0.37454012 0.95071431 0.73199394]
 [0.59865848 0.15601864 0.15599452]
 [0.05808361 0.86617615 0.60111501]
 [0.70807258 0.02058449 0.96990985]
 [0.83244264 0.21233911 0.18182497]
 [0.18340451 0.30424224 0.52475643]
 [0.43194502 0.29122914 0.61185289]
 [0.13949386 0.29214465 0.36636184]
 [0.45606998 0.78517596 0.19967378]
 [0.51423444 0.59241457 0.04645041]]


In [4]:
# ------------------------------------------------------
# 3. Initialize Random Weights and Biases
# ------------------------------------------------------

# Hidden layer: 4 neurons → W1 shape (3, 4), b1 shape (1, 4)
W1 = np.random.randn(3, 4)
b1 = np.random.randn(1, 4)

# Output layer: 1 neuron → W2 shape (4, 1), b2 shape (1, 1)
W2 = np.random.randn(4, 1)
b2 = np.random.randn(1, 1)

print("\nWeights and Biases Initialized:")
print("W1:\n", W1)
print("b1:\n", b1)
print("W2:\n", W2)
print("b2:\n", b2)


Weights and Biases Initialized:
W1:
 [[-1.15099358  0.37569802 -0.60063869 -0.29169375]
 [-0.60170661  1.85227818 -0.01349722 -1.05771093]
 [ 0.82254491 -1.22084365  0.2088636  -1.95967012]]
b1:
 [[-1.32818605  0.19686124  0.73846658  0.17136828]]
W2:
 [[-0.11564828]
 [-0.3011037 ]
 [-1.47852199]
 [-0.71984421]]
b2:
 [[-0.46063877]]


In [5]:
# ------------------------------------------------------
# 4. Define the Sigmoid Activation Function
# ------------------------------------------------------

def sigmoid(z):
    return 1 / (1 + np.exp(-z))


In [6]:
# ------------------------------------------------------
# 5. Forward Propagation Steps
# ------------------------------------------------------

# Layer 1: Hidden Layer
Z1 = np.dot(X, W1) + b1
A1 = sigmoid(Z1)

# Layer 2: Output Layer
Z2 = np.dot(A1, W2) + b2
A2 = sigmoid(Z2)

In [7]:
# ------------------------------------------------------
# 6. Print Forward Pass Outputs
# ------------------------------------------------------

print("\nHidden Layer Output (A1):\n", np.round(A1, 3))
print("\nFinal Output (A2):\n", np.round(A2, 3))



Hidden Layer Output (A1):
 [[0.151 0.769 0.658 0.085]
 [0.121 0.627 0.601 0.384]
 [0.194 0.748 0.694 0.126]
 [0.205 0.336 0.626 0.124]
 [0.094 0.664 0.568 0.342]
 [0.216 0.547 0.676 0.226]
 [0.183 0.538 0.646 0.188]
 [0.204 0.585 0.674 0.29 ]
 [0.103 0.829 0.621 0.234]
 [0.096 0.807 0.606 0.333]]

Final Output (A2):
 [[0.149]
 [0.138]
 [0.139]
 [0.168]
 [0.147]
 [0.14 ]
 [0.15 ]
 [0.134]
 [0.141]
 [0.136]]


Forward propagation is the process of passing input data through the network
layers using weights and biases to produce an output prediction.
Each neuron applies a linear transformation followed by an activation function
(sigmoid here) to introduce non-linearity, allowing the network to learn complex
relationships. The hidden layer captures feature interactions, and the output
layer produces the final prediction (between 0 and 1 in this case). This process
is the foundation of how neural networks make decisions before training.

In [8]:
def softmax(z):
    exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))  # for numerical stability
    return exp_z / np.sum(exp_z, axis=1, keepdims=True)

In [11]:
# ------------------------------------------------------
# 7.cross-entropy
# ------------------------------------------------------

# For bonus, modify output layer to have 3 neurons (3 classes)
W2_softmax = np.random.randn(4, 3)  # hidden layer size = 4
b2_softmax = np.random.randn(1, 3)

# Softmax activation function
def softmax(z):
    exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))  # stability improvement
    return exp_z / np.sum(exp_z, axis=1, keepdims=True)

# Forward pass with softmax output
Z2_softmax = np.dot(A1, W2_softmax) + b2_softmax
A2_softmax = softmax(Z2_softmax)

print("\nSoftmax Output (A2_softmax - class probabilities):\n", np.round(A2_softmax, 3))

# Create random one-hot encoded labels for 3 classes (for loss calculation)
Y_softmax = np.eye(3)[np.random.choice(3, 10)]  # 10 samples, 3 classes

# Cross-entropy loss for multi-class
epsilon = 1e-9  # small value to avoid log(0)
loss_softmax = -np.mean(np.sum(Y_softmax * np.log(A2_softmax + epsilon), axis=1))

print("\nOne-hot Encoded Labels (Y_softmax):\n", Y_softmax)
print("Cross-Entropy Loss (using softmax output):", round(loss_softmax, 4))



Softmax Output (A2_softmax - class probabilities):
 [[0.771 0.129 0.1  ]
 [0.693 0.15  0.156]
 [0.772 0.132 0.096]
 [0.715 0.158 0.127]
 [0.7   0.145 0.155]
 [0.734 0.149 0.117]
 [0.73  0.148 0.122]
 [0.726 0.15  0.124]
 [0.746 0.132 0.122]
 [0.725 0.137 0.138]]

One-hot Encoded Labels (Y_softmax):
 [[0. 0. 1.]
 [0. 1. 0.]
 [1. 0. 0.]
 [0. 0. 1.]
 [0. 1. 0.]
 [0. 0. 1.]
 [0. 0. 1.]
 [1. 0. 0.]
 [0. 0. 1.]
 [1. 0. 0.]]
Cross-Entropy Loss (using softmax output): 1.5449
