# Multi-Layer Perceptron ANN for Iris Dataset

This notebook implements a simple multi-layer perceptron (MLP) neural network to classify all 3 iris species.

## MLP Overview:
- **Multi-layer neural network** with hidden layers
- Uses **sigmoid activation** for smooth gradients
- Can solve **non-linearly separable** problems
- **Backpropagation** algorithm for training
- Handles **multi-class classification** (3 iris species)

In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import seaborn as sns

In [None]:
# Load and prepare the iris dataset
def load_iris_data():
    """Load iris dataset for multi-class classification"""
    iris = load_iris()
    
    # Create DataFrame
    df = pd.DataFrame(iris.data, columns=iris.feature_names)
    df['species'] = iris.target_names[iris.target]
    df['target'] = iris.target  # Numeric target (0, 1, 2)
    
    return df

# Load the data
iris_df = load_iris_data()
print("Iris Dataset Shape:", iris_df.shape)
print("\nFirst 5 rows:")
print(iris_df.head())
print("\nTarget distribution:")
print(iris_df['species'].value_counts())
print("\nNumeric targets:")
print("Setosa (0):", sum(iris_df['target'] == 0))
print("Versicolor (1):", sum(iris_df['target'] == 1))
print("Virginica (2):", sum(iris_df['target'] == 2))

In [None]:
# Simple Multi-Layer Perceptron Implementation
class MultiLayerPerceptron:
    """Simple MLP for multi-class classification"""
    
    def __init__(self, input_size, hidden_size, output_size, learning_rate=0.1):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.learning_rate = learning_rate
        
        # Initialize weights randomly (small values)
        self.weights_input_hidden = np.random.uniform(-0.5, 0.5, (input_size, hidden_size))
        self.weights_hidden_output = np.random.uniform(-0.5, 0.5, (hidden_size, output_size))
        
        # Initialize biases
        self.bias_hidden = np.zeros((1, hidden_size))
        self.bias_output = np.zeros((1, output_size))
        
        # Track training history
        self.loss_history = []
        self.accuracy_history = []
    
    def sigmoid(self, x):
        """Sigmoid activation function"""
        # Clip x to prevent overflow
        x = np.clip(x, -500, 500)
        return 1 / (1 + np.exp(-x))
    
    def sigmoid_derivative(self, x):
        """Derivative of sigmoid function"""
        return x * (1 - x)
    
    def one_hot_encode(self, y, num_classes):
        """Convert labels to one-hot encoding"""
        encoded = np.zeros((len(y), num_classes))
        for i, label in enumerate(y):
            encoded[i, label] = 1
        return encoded
    
    def forward_pass(self, X):
        """Forward propagation"""
        # Input to hidden layer
        self.hidden_input = np.dot(X, self.weights_input_hidden) + self.bias_hidden
        self.hidden_output = self.sigmoid(self.hidden_input)
        
        # Hidden to output layer
        self.output_input = np.dot(self.hidden_output, self.weights_hidden_output) + self.bias_output
        self.output = self.sigmoid(self.output_input)
        
        return self.output
    
    def backward_pass(self, X, y, output):
        """Backward propagation"""
        m = X.shape[0]  # Number of samples
        
        # Calculate output layer error
        output_error = y - output
        output_delta = output_error * self.sigmoid_derivative(output)
        
        # Calculate hidden layer error
        hidden_error = output_delta.dot(self.weights_hidden_output.T)
        hidden_delta = hidden_error * self.sigmoid_derivative(self.hidden_output)
        
        # Update weights and biases
        self.weights_hidden_output += self.hidden_output.T.dot(output_delta) * self.learning_rate / m
        self.bias_output += np.sum(output_delta, axis=0, keepdims=True) * self.learning_rate / m
        
        self.weights_input_hidden += X.T.dot(hidden_delta) * self.learning_rate / m
        self.bias_hidden += np.sum(hidden_delta, axis=0, keepdims=True) * self.learning_rate / m
    
    def calculate_loss(self, y_true, y_pred):
        """Calculate mean squared error loss"""
        return np.mean((y_true - y_pred) ** 2)
    
    def fit(self, X, y, epochs=1000, verbose=True):
        """Train the MLP"""
        # Convert labels to one-hot encoding
        y_one_hot = self.one_hot_encode(y, self.output_size)
        
        for epoch in range(epochs):
            # Forward pass
            output = self.forward_pass(X)
            
            # Calculate loss
            loss = self.calculate_loss(y_one_hot, output)
            self.loss_history.append(loss)
            
            # Calculate accuracy
            predictions = np.argmax(output, axis=1)
            accuracy = np.mean(predictions == y)
            self.accuracy_history.append(accuracy)
            
            # Backward pass
            self.backward_pass(X, y_one_hot, output)
            
            # Print progress
            if verbose and (epoch + 1) % 100 == 0:
                print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss:.4f}, Accuracy: {accuracy:.4f}")
        
        return self
    
    def predict(self, X):
        """Make predictions"""
        output = self.forward_pass(X)
        return np.argmax(output, axis=1)
    
    def predict_proba(self, X):
        """Get prediction probabilities"""
        return self.forward_pass(X)

print("Multi-Layer Perceptron class implemented successfully!")

In [None]:
# Prepare data for training
X = iris_df[['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']]
y = iris_df['target']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y
)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print(f"Training set shape: {X_train_scaled.shape}")
print(f"Test set shape: {X_test_scaled.shape}")
print(f"\nTraining set target distribution:")
for i, species in enumerate(['Setosa', 'Versicolor', 'Virginica']):
    count = sum(y_train == i)
    print(f"{species}: {count}")
    
print(f"\nTest set target distribution:")
for i, species in enumerate(['Setosa', 'Versicolor', 'Virginica']):
    count = sum(y_test == i)
    print(f"{species}: {count}")

print(f"\nFeatures: {list(X.columns)}")
print(f"Network architecture will be: 4 -> ? -> 3")

In [None]:
# Train the Multi-Layer Perceptron
print("Training Multi-Layer Perceptron...")
print("="*50)

# Create MLP with simple architecture: 4 inputs -> 6 hidden -> 3 outputs
mlp = MultiLayerPerceptron(
    input_size=4,      # 4 iris features
    hidden_size=6,     # 6 hidden neurons (simple)
    output_size=3,     # 3 iris species
    learning_rate=0.1
)

print(f"Network Architecture: 4 -> 6 -> 3")
print(f"Total parameters: {4*6 + 6 + 6*3 + 3} (weights + biases)")
print("Starting training...")
print()

# Train the model
mlp.fit(X_train_scaled, y_train.values, epochs=500, verbose=True)

print("\nTraining completed!")
print("="*50)

In [None]:
# Make predictions and evaluate
print("Making predictions...")

# Predict on training set
y_train_pred = mlp.predict(X_train_scaled)
train_accuracy = accuracy_score(y_train, y_train_pred)

# Predict on test set
y_test_pred = mlp.predict(X_test_scaled)
test_accuracy = accuracy_score(y_test, y_test_pred)

# Get prediction probabilities
y_test_proba = mlp.predict_proba(X_test_scaled)

print(f"\nPerformance Results:")
print(f"Training Accuracy: {train_accuracy:.4f}")
print(f"Test Accuracy: {test_accuracy:.4f}")
print(f"Final Training Loss: {mlp.loss_history[-1]:.4f}")

print("\n" + "="*50)
print("CLASSIFICATION REPORT:")
print("="*50)
target_names = ['Setosa', 'Versicolor', 'Virginica']
print(classification_report(y_test, y_test_pred, target_names=target_names))

# Show some example predictions with probabilities
print("\n" + "="*50)
print("SAMPLE PREDICTIONS WITH PROBABILITIES:")
print("="*50)
for i in range(min(8, len(X_test))):
    actual = y_test.iloc[i]
    predicted = y_test_pred[i]
    proba = y_test_proba[i]
    features = X_test.iloc[i]
    
    print(f"Sample {i+1}:")
    print(f"  Features: SL={features[0]:.2f}, SW={features[1]:.2f}, PL={features[2]:.2f}, PW={features[3]:.2f}")
    print(f"  Actual: {target_names[actual]}")
    print(f"  Predicted: {target_names[predicted]}")
    print(f"  Probabilities: Setosa={proba[0]:.3f}, Versicolor={proba[1]:.3f}, Virginica={proba[2]:.3f}")
    print(f"  {'✓ Correct' if actual == predicted else '✗ Wrong'}")
    print()

In [None]:
# Visualize results and training progress
fig, axes = plt.subplots(2, 2, figsize=(15, 12))

# 1. Training History (Loss and Accuracy)
epochs = range(1, len(mlp.loss_history) + 1)

ax1 = axes[0,0]
color = 'tab:red'
ax1.set_xlabel('Epoch')
ax1.set_ylabel('Loss', color=color)
ax1.plot(epochs, mlp.loss_history, color=color, linewidth=2)
ax1.tick_params(axis='y', labelcolor=color)
ax1.grid(True, alpha=0.3)

ax2 = ax1.twinx()
color = 'tab:blue'
ax2.set_ylabel('Accuracy', color=color)
ax2.plot(epochs, mlp.accuracy_history, color=color, linewidth=2)
ax2.tick_params(axis='y', labelcolor=color)

axes[0,0].set_title('Training Progress: Loss and Accuracy')

# 2. Confusion Matrix
cm = confusion_matrix(y_test, y_test_pred)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=target_names, yticklabels=target_names,
            ax=axes[0,1])
axes[0,1].set_title('Confusion Matrix')
axes[0,1].set_xlabel('Predicted')
axes[0,1].set_ylabel('Actual')

# 3. Prediction Confidence Distribution
confidence_scores = np.max(y_test_proba, axis=1)  # Max probability for each prediction
correct_predictions = (y_test.values == y_test_pred)

axes[1,0].hist(confidence_scores[correct_predictions], bins=15, alpha=0.7, 
               label='Correct', color='green', density=True)
axes[1,0].hist(confidence_scores[~correct_predictions], bins=15, alpha=0.7, 
               label='Incorrect', color='red', density=True)
axes[1,0].set_xlabel('Prediction Confidence (Max Probability)')
axes[1,0].set_ylabel('Density')
axes[1,0].set_title('Prediction Confidence Distribution')
axes[1,0].legend()
axes[1,0].grid(True, alpha=0.3)

# 4. Network Architecture and Summary
network_info = f"""Multi-Layer Perceptron Summary:

Architecture:
• Input Layer:    4 neurons (iris features)
• Hidden Layer:   6 neurons (sigmoid)
• Output Layer:   3 neurons (sigmoid)
• Total Parameters: {4*6 + 6 + 6*3 + 3}

Training Configuration:
• Learning Rate:  {mlp.learning_rate}
• Epochs:         {len(mlp.loss_history)}
• Activation:     Sigmoid
• Loss Function:  Mean Squared Error

Performance:
• Training Accuracy: {train_accuracy:.4f}
• Test Accuracy:     {test_accuracy:.4f}
• Final Loss:        {mlp.loss_history[-1]:.4f}

Key Features:
• Handles multi-class classification
• Backpropagation learning
• Non-linear decision boundaries
• Probability outputs"""

axes[1,1].text(0.05, 0.95, network_info, transform=axes[1,1].transAxes, 
               verticalalignment='top', fontsize=10, fontfamily='monospace',
               bbox=dict(boxstyle="round,pad=0.3", facecolor="lightgreen", alpha=0.7))
axes[1,1].set_title('Network Summary')
axes[1,1].axis('off')

plt.tight_layout()
plt.show()

# Display weight matrices (simplified view)
print("\n" + "="*60)
print("NETWORK WEIGHTS (SIMPLIFIED VIEW)")
print("="*60)
print("Input to Hidden Layer weights shape:", mlp.weights_input_hidden.shape)
print("Hidden to Output Layer weights shape:", mlp.weights_hidden_output.shape)
print("\nHidden Layer Biases:", mlp.bias_hidden.round(3))
print("Output Layer Biases:", mlp.bias_output.round(3))

print(f"\nFinal Results Summary:")
print(f"{'='*50}")
print(f"Model: Multi-Layer Perceptron ANN")
print(f"Problem: Multi-class Classification (3 iris species)")
print(f"Architecture: 4 -> 6 -> 3")
print(f"Features: 4 iris measurements (standardized)")
print(f"Training Accuracy: {train_accuracy:.4f}")
print(f"Test Accuracy: {test_accuracy:.4f}")
print(f"Training Epochs: {len(mlp.loss_history)}")
print(f"Final Loss: {mlp.loss_history[-1]:.4f}")
print("✓ Multi-Layer Perceptron implementation completed successfully!")