In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import make_classification, make_moons, make_circles
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report
import warnings
warnings.filterwarnings('ignore')

# Set random seed for reproducibility
np.random.seed(42)

# Set plotting style
plt.style.use('default')
sns.set_palette("husl")

print("Libraries imported successfully!")


In [None]:
class Perceptron:
    """Simple perceptron implementation from scratch"""
    
    def __init__(self, learning_rate=0.01, max_epochs=1000):
        self.learning_rate = learning_rate
        self.max_epochs = max_epochs
        self.weights = None
        self.bias = None
        self.training_errors = []
        
    def activation_function(self, x):
        """Step function activation"""
        return np.where(x >= 0, 1, 0)
    
    def fit(self, X, y):
        """Train the perceptron"""
        n_samples, n_features = X.shape
        
        # Initialize weights and bias
        self.weights = np.random.normal(0, 0.1, n_features)
        self.bias = 0
        
        # Training loop
        for epoch in range(self.max_epochs):
            errors = 0
            
            for i in range(n_samples):
                # Forward pass
                linear_output = np.dot(X[i], self.weights) + self.bias
                prediction = self.activation_function(linear_output)
                
                # Calculate error
                error = y[i] - prediction
                
                # Update weights and bias if there's an error
                if error != 0:
                    self.weights += self.learning_rate * error * X[i]
                    self.bias += self.learning_rate * error
                    errors += 1
            
            self.training_errors.append(errors)
            
            # Stop if no errors (convergence)
            if errors == 0:
                print(f"Converged after {epoch + 1} epochs")
                break
                
        return self
    
    def predict(self, X):
        """Make predictions"""
        linear_output = np.dot(X, self.weights) + self.bias
        return self.activation_function(linear_output)
    
    def decision_function(self, X):
        """Return decision function values"""
        return np.dot(X, self.weights) + self.bias

# Test perceptron on linearly separable data
print("=== Testing Perceptron on Linearly Separable Data ===")

# Create simple linearly separable dataset
np.random.seed(42)
X_simple = np.array([
    [0, 0], [0, 1], [1, 0], [1, 1],
    [0.1, 0.1], [0.1, 0.9], [0.9, 0.1], [0.9, 0.9]
])
y_simple = np.array([0, 0, 0, 1, 0, 0, 0, 1])  # AND-like function

# Train perceptron
perceptron = Perceptron(learning_rate=0.1, max_epochs=100)
perceptron.fit(X_simple, y_simple)

# Make predictions
predictions = perceptron.predict(X_simple)
accuracy = accuracy_score(y_simple, predictions)

print(f"Final weights: {perceptron.weights}")
print(f"Final bias: {perceptron.bias}")
print(f"Accuracy: {accuracy:.2f}")
print(f"Predictions: {predictions}")
print(f"True labels: {y_simple}")


In [None]:
# Visualize perceptron decision boundary and training process
fig, axes = plt.subplots(1, 3, figsize=(18, 5))

# Plot 1: Training error over epochs
axes[0].plot(perceptron.training_errors, linewidth=2)
axes[0].set_title('Training Errors Over Epochs')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Number of Errors')
axes[0].grid(True, alpha=0.3)

# Plot 2: Data points and decision boundary
x_min, x_max = X_simple[:, 0].min() - 0.1, X_simple[:, 0].max() + 0.1
y_min, y_max = X_simple[:, 1].min() - 0.1, X_simple[:, 1].max() + 0.1

# Create mesh for decision boundary
xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100),
                     np.linspace(y_min, y_max, 100))
mesh_points = np.c_[xx.ravel(), yy.ravel()]
Z = perceptron.decision_function(mesh_points)
Z = Z.reshape(xx.shape)

# Plot decision boundary
axes[1].contour(xx, yy, Z, levels=[0], colors='red', linestyles='--', linewidths=2)
axes[1].contourf(xx, yy, Z, levels=50, alpha=0.3, cmap='RdYlBu')

# Plot data points
scatter = axes[1].scatter(X_simple[:, 0], X_simple[:, 1], c=y_simple, 
                         cmap='RdYlBu', s=100, edgecolors='black', linewidth=2)
axes[1].set_title('Perceptron Decision Boundary')
axes[1].set_xlabel('Feature 1')
axes[1].set_ylabel('Feature 2')
axes[1].colorbar(scatter)

# Plot 3: Weight and bias evolution (simplified visualization)
axes[2].bar(['Weight 1', 'Weight 2', 'Bias'], 
           [perceptron.weights[0], perceptron.weights[1], perceptron.bias])
axes[2].set_title('Final Learned Parameters')
axes[2].set_ylabel('Parameter Value')
axes[2].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# Test on XOR problem (non-linearly separable)
print("\n=== Testing Perceptron on XOR Problem (Non-linearly Separable) ===")
X_xor = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y_xor = np.array([0, 1, 1, 0])  # XOR function

perceptron_xor = Perceptron(learning_rate=0.1, max_epochs=100)
perceptron_xor.fit(X_xor, y_xor)

predictions_xor = perceptron_xor.predict(X_xor)
accuracy_xor = accuracy_score(y_xor, predictions_xor)

print(f"XOR Accuracy: {accuracy_xor:.2f}")
print(f"XOR Predictions: {predictions_xor}")
print(f"XOR True labels: {y_xor}")
print("Note: Perceptron cannot solve XOR problem (non-linearly separable)!")


In [None]:
# Define common activation functions
def sigmoid(x):
    """Sigmoid activation function"""
    return 1 / (1 + np.exp(-np.clip(x, -500, 500)))  # Clip to prevent overflow

def sigmoid_derivative(x):
    """Derivative of sigmoid"""
    s = sigmoid(x)
    return s * (1 - s)

def tanh(x):
    """Hyperbolic tangent activation function"""
    return np.tanh(x)

def tanh_derivative(x):
    """Derivative of tanh"""
    return 1 - np.tanh(x)**2

def relu(x):
    """ReLU activation function"""
    return np.maximum(0, x)

def relu_derivative(x):
    """Derivative of ReLU"""
    return (x > 0).astype(float)

def leaky_relu(x, alpha=0.01):
    """Leaky ReLU activation function"""
    return np.where(x > 0, x, alpha * x)

def leaky_relu_derivative(x, alpha=0.01):
    """Derivative of Leaky ReLU"""
    return np.where(x > 0, 1, alpha)

# Visualize activation functions and their derivatives
x = np.linspace(-5, 5, 1000)

fig, axes = plt.subplots(2, 4, figsize=(20, 10))

# Row 1: Activation functions
activation_functions = [
    (sigmoid, "Sigmoid"),
    (tanh, "Tanh"),
    (relu, "ReLU"),
    (lambda x: leaky_relu(x, 0.1), "Leaky ReLU")
]

# Row 2: Derivatives
derivative_functions = [
    (sigmoid_derivative, "Sigmoid Derivative"),
    (tanh_derivative, "Tanh Derivative"),
    (relu_derivative, "ReLU Derivative"),
    (lambda x: leaky_relu_derivative(x, 0.1), "Leaky ReLU Derivative")
]

for i, (func, name) in enumerate(activation_functions):
    y = func(x)
    axes[0, i].plot(x, y, linewidth=2, label=name)
    axes[0, i].set_title(f'{name} Function')
    axes[0, i].set_xlabel('Input')
    axes[0, i].set_ylabel('Output')
    axes[0, i].grid(True, alpha=0.3)
    axes[0, i].legend()

for i, (func, name) in enumerate(derivative_functions):
    y = func(x)
    axes[1, i].plot(x, y, linewidth=2, label=name, color='orange')
    axes[1, i].set_title(f'{name}')
    axes[1, i].set_xlabel('Input')
    axes[1, i].set_ylabel('Derivative')
    axes[1, i].grid(True, alpha=0.3)
    axes[1, i].legend()

plt.tight_layout()
plt.show()

# Compare activation function properties
print("Activation Function Properties:")
print("="*60)
print("Function    | Range        | Derivative Range | Zero-Centered | Non-linear")
print("-" * 60)
print("Sigmoid     | (0, 1)       | (0, 0.25]       | No            | Yes")
print("Tanh        | (-1, 1)      | (0, 1]          | Yes           | Yes")
print("ReLU        | [0, ∞)       | {0, 1}          | No            | Yes")
print("Leaky ReLU  | (-∞, ∞)      | {α, 1}          | No            | Yes")
print("\nKey Insights:")
print("- Sigmoid: Saturates (vanishing gradients), output not zero-centered")
print("- Tanh: Zero-centered, still saturates")
print("- ReLU: Solves vanishing gradient, but can 'die' (always output 0)")
print("- Leaky ReLU: Prevents dying ReLU problem")


In [None]:
class MLP:
    """Multi-Layer Perceptron implementation from scratch"""
    
    def __init__(self, layer_sizes, activation='sigmoid', learning_rate=0.01, max_epochs=1000):
        self.layer_sizes = layer_sizes
        self.activation = activation
        self.learning_rate = learning_rate
        self.max_epochs = max_epochs
        
        # Initialize weights and biases
        self.weights = []
        self.biases = []
        
        for i in range(len(layer_sizes) - 1):
            # Xavier/Glorot initialization
            w = np.random.randn(layer_sizes[i], layer_sizes[i+1]) * np.sqrt(2.0 / layer_sizes[i])
            b = np.zeros((1, layer_sizes[i+1]))
            self.weights.append(w)
            self.biases.append(b)
        
        self.training_losses = []
        
    def _activation_function(self, x):
        """Apply activation function"""
        if self.activation == 'sigmoid':
            return sigmoid(x)
        elif self.activation == 'tanh':
            return tanh(x)
        elif self.activation == 'relu':
            return relu(x)
        else:
            raise ValueError(f"Unknown activation: {self.activation}")
    
    def _activation_derivative(self, x):
        """Apply activation derivative"""
        if self.activation == 'sigmoid':
            return sigmoid_derivative(x)
        elif self.activation == 'tanh':
            return tanh_derivative(x)
        elif self.activation == 'relu':
            return relu_derivative(x)
        else:
            raise ValueError(f"Unknown activation: {self.activation}")
    
    def forward_pass(self, X):
        """Forward propagation through the network"""
        self.activations = [X]
        self.z_values = []
        
        current_input = X
        
        for i, (W, b) in enumerate(zip(self.weights, self.biases)):
            # Linear transformation
            z = np.dot(current_input, W) + b
            self.z_values.append(z)
            
            # Apply activation (except for output layer in regression)
            if i == len(self.weights) - 1:  # Output layer
                if self.layer_sizes[-1] == 1:  # Binary classification
                    a = sigmoid(z)  # Always use sigmoid for binary output
                else:
                    a = self._activation_function(z)
            else:  # Hidden layers
                a = self._activation_function(z)
            
            self.activations.append(a)
            current_input = a
        
        return self.activations[-1]
    
    def backward_pass(self, X, y, output):
        """Backward propagation to compute gradients"""
        m = X.shape[0]
        
        # Calculate output layer error
        if self.layer_sizes[-1] == 1:  # Binary classification
            dz = output - y.reshape(-1, 1)
        else:
            dz = output - y
        
        # Store gradients
        dW = []
        db = []
        
        # Propagate error backwards
        for i in reversed(range(len(self.weights))):
            # Gradient w.r.t weights and biases
            dW_i = (1/m) * np.dot(self.activations[i].T, dz)
            db_i = (1/m) * np.sum(dz, axis=0, keepdims=True)
            
            dW.insert(0, dW_i)
            db.insert(0, db_i)
            
            # Propagate error to previous layer (if not input layer)
            if i > 0:
                dz = np.dot(dz, self.weights[i].T) * self._activation_derivative(self.z_values[i-1])
        
        return dW, db
    
    def fit(self, X, y):
        """Train the MLP"""
        for epoch in range(self.max_epochs):
            # Forward pass
            output = self.forward_pass(X)
            
            # Calculate loss (binary cross-entropy for classification)
            if self.layer_sizes[-1] == 1:
                loss = -np.mean(y * np.log(output + 1e-15) + (1-y) * np.log(1-output + 1e-15))
            else:
                loss = np.mean((output - y)**2)  # MSE for regression
            
            self.training_losses.append(loss)
            
            # Backward pass
            dW, db = self.backward_pass(X, y, output)
            
            # Update weights and biases
            for i in range(len(self.weights)):
                self.weights[i] -= self.learning_rate * dW[i]
                self.biases[i] -= self.learning_rate * db[i]
            
            # Print progress
            if epoch % 100 == 0:
                print(f"Epoch {epoch}, Loss: {loss:.6f}")
        
        return self
    
    def predict(self, X):
        """Make predictions"""
        output = self.forward_pass(X)
        if self.layer_sizes[-1] == 1:  # Binary classification
            return (output > 0.5).astype(int).flatten()
        else:
            return output
    
    def predict_proba(self, X):
        """Predict probabilities"""
        return self.forward_pass(X)

# Test MLP on XOR problem
print("=== Testing MLP on XOR Problem ===")

# XOR dataset
X_xor = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y_xor = np.array([0, 1, 1, 0])

# Create MLP: 2 inputs -> 4 hidden -> 1 output
mlp = MLP(layer_sizes=[2, 4, 1], activation='sigmoid', learning_rate=1.0, max_epochs=1000)

# Train the network
mlp.fit(X_xor, y_xor)

# Make predictions
predictions = mlp.predict(X_xor)
probabilities = mlp.predict_proba(X_xor).flatten()

print(f"\nXOR Results:")
print(f"True labels:    {y_xor}")
print(f"Predictions:    {predictions}")
print(f"Probabilities:  {probabilities}")
print(f"Accuracy:       {accuracy_score(y_xor, predictions):.2f}")

# Test on a more complex dataset
print("\n=== Testing MLP on Moons Dataset ===")
X_moons, y_moons = make_moons(n_samples=1000, noise=0.1, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X_moons, y_moons, test_size=0.2, random_state=42)

# Standardize the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Create and train MLP
mlp_moons = MLP(layer_sizes=[2, 10, 10, 1], activation='relu', learning_rate=0.01, max_epochs=1000)
mlp_moons.fit(X_train_scaled, y_train)

# Evaluate
train_pred = mlp_moons.predict(X_train_scaled)
test_pred = mlp_moons.predict(X_test_scaled)

print(f"Training Accuracy: {accuracy_score(y_train, train_pred):.3f}")
print(f"Test Accuracy:     {accuracy_score(y_test, test_pred):.3f}")
