<a href="https://colab.research.google.com/github/EricSiq/DeepLearning/blob/main/DLLIrisFeedforwardNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder


In [15]:

class FeedForwardNN:
    def __init__(self, input_size, hidden_size, output_size):
        # Initialize weights and biases
        self.weights1 = np.random.randn(input_size, hidden_size) * 0.01 # Smaller initial weights
        self.bias1 = np.zeros((1, hidden_size))
        self.weights2 = np.random.randn(hidden_size, output_size) * 0.01
        self.bias2 = np.zeros((1, output_size))

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def sigmoid_derivative(self, x):
        return x * (1 - x) # Derivative of sigmoid, assuming x is already sigmoid(activation)

    def softmax(self, x):
        exp_scores = np.exp(x - np.max(x, axis=1, keepdims=True)) # For numerical stability
        return exp_scores / np.sum(exp_scores, axis=1, keepdims=True)

    def forward(self, x):
        # Hidden layer
        self.hidden_layer_activation = np.dot(x, self.weights1) + self.bias1
        self.hidden_layer_output = self.sigmoid(self.hidden_layer_activation)

        # Output layer
        self.output_layer_activation = np.dot(self.hidden_layer_output, self.weights2) + self.bias2
        self.predictions = self.softmax(self.output_layer_activation) # Using softmax for multi-class

        return self.predictions

    def backward(self, X, Y, learning_rate):
        num_samples = X.shape[0]

        # Calculate gradients for output layer
        # For cross-entropy with softmax, the error is simply (predictions - Y_true)
        error_output = self.predictions - Y
        d_weights2 = np.dot(self.hidden_layer_output.T, error_output) / num_samples
        d_bias2 = np.sum(error_output, axis=0, keepdims=True) / num_samples

        # Calculate gradients for hidden layer
        error_hidden = np.dot(error_output, self.weights2.T) * self.sigmoid_derivative(self.hidden_layer_output)
        d_weights1 = np.dot(X.T, error_hidden) / num_samples
        d_bias1 = np.sum(error_hidden, axis=0, keepdims=True) / num_samples

        # Update weights and biases
        self.weights2 -= learning_rate * d_weights2
        self.bias2 -= learning_rate * d_bias2
        self.weights1 -= learning_rate * d_weights1
        self.bias1 -= learning_rate * d_bias1

In [16]:


# Loss functions
def mse_loss(y_true, y_pred):
    return np.mean((y_true - y_pred)**2)

def cross_entropy_loss(y_true, y_pred):
    # Avoid log(0) by clipping predictions
    y_pred = np.clip(y_pred, 1e-12, 1 - 1e-12)
    return -np.mean(np.sum(y_true * np.log(y_pred), axis=1))

# Accuracy metric
def accuracy(y_true, y_pred):
    # Convert one-hot encoded to class labels
    y_true_labels = np.argmax(y_true, axis=1)
    y_pred_labels = np.argmax(y_pred, axis=1)
    return np.mean(y_true_labels == y_pred_labels)

In [17]:


# Load and preprocess Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# One-hot encode the target labels
encoder = OneHotEncoder(sparse_output=False)
Y_one_hot = encoder.fit_transform(y.reshape(-1, 1))

# Split data into training and testing sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y_one_hot, test_size=0.2, random_state=42)

# Normalize features (important for NN performance)
X_mean = X_train.mean(axis=0)
X_std = X_train.std(axis=0)
X_train = (X_train - X_mean) / (X_std + 1e-8) # Add small epsilon to avoid division by zero
X_test = (X_test - X_mean) / (X_std + 1e-8)

# Initialize the neural network
input_size = X_train.shape[1]  # 4 features
hidden_size = 10               # Can be tuned
output_size = Y_train.shape[1] # 3 classes

In [18]:


model = FeedForwardNN(input_size, hidden_size, output_size)

print("--- Before Training ---")
predictions_before = model.forward(X_test)
loss_before = cross_entropy_loss(Y_test, predictions_before)
accuracy_before = accuracy(Y_test, predictions_before)
print(f"Loss: {loss_before:.4f}")
print(f"Accuracy: {accuracy_before:.4f}")

# Training loop
learning_rate = 0.01
epochs = 5000  # Increased epochs for better training


--- Before Training ---
Loss: 1.0982
Accuracy: 0.3667


In [19]:
print("\n--- Training ---")
for epoch in range(epochs):
    # Forward pass
    predictions_train = model.forward(X_train)

    # Backward pass (update weights and biases)
    model.backward(X_train, Y_train, learning_rate)

    if (epoch + 1) % 500 == 0:
        loss_train = cross_entropy_loss(Y_train, predictions_train)
        accuracy_train = accuracy(Y_train, predictions_train)
        print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss_train:.4f}, Accuracy: {accuracy_train:.4f}")




--- Training ---
Epoch 500/5000, Loss: 1.0967, Accuracy: 0.3417
Epoch 1000/5000, Loss: 1.0815, Accuracy: 0.6417
Epoch 1500/5000, Loss: 0.9706, Accuracy: 0.8917
Epoch 2000/5000, Loss: 0.7176, Accuracy: 0.8500
Epoch 2500/5000, Loss: 0.5536, Accuracy: 0.9000
Epoch 3000/5000, Loss: 0.4697, Accuracy: 0.9167
Epoch 3500/5000, Loss: 0.4164, Accuracy: 0.9250
Epoch 4000/5000, Loss: 0.3767, Accuracy: 0.9250
Epoch 4500/5000, Loss: 0.3446, Accuracy: 0.9250
Epoch 5000/5000, Loss: 0.3177, Accuracy: 0.9417

--- After Training ---
Loss on Test Set: 0.2991
Accuracy on Test Set: 0.9333


In [20]:
print("\n--- After Training ---")
predictions_after = model.forward(X_test)
loss_after = cross_entropy_loss(Y_test, predictions_after)
accuracy_after = accuracy(Y_test, predictions_after)
print(f"Loss on Test Set: {loss_after:.4f}")
print(f"Accuracy on Test Set: {accuracy_after:.4f}")


--- After Training ---
Loss on Test Set: 0.2991
Accuracy on Test Set: 0.9333
