In [1]:
import numpy as np
from typing import List, Tuple

class MLP:
    def __init__(self, n_inputs: int, n_hidden: int, n_outputs: int):
        self.ni = n_inputs
        self.nh = n_hidden
        self.no = n_outputs
        
        # Initialize weights with small random values
        self.w1 = np.random.uniform(-0.5, 0.5, (self.nh, self.ni))
        self.w2 = np.random.uniform(-0.5, 0.5, (self.no, self.nh))
        
        # Weight updates initialization
        self.dw1 = np.zeros((self.nh, self.ni))
        self.dw2 = np.zeros((self.no, self.nh))
        
        # Activation storage
        self.z1 = np.zeros(self.nh)
        self.z2 = np.zeros(self.no)
        self.h = np.zeros(self.nh)
        self.o = np.zeros(self.no)

    def sigmoid(self, x: np.ndarray) -> np.ndarray:
        return 1 / (1 + np.exp(-x))
    
    def sigmoid_derivative(self, x: np.ndarray) -> np.ndarray:
        return x * (1 - x)

    def forward(self, inputs: np.ndarray) -> np.ndarray:
        # Forward pass through first layer
        self.z1 = np.dot(self.w1, inputs)
        self.h = self.sigmoid(self.z1)
        
        # Forward pass through second layer
        self.z2 = np.dot(self.w2, self.h)
        self.o = self.sigmoid(self.z2)
        
        return self.o

    def backward(self, inputs: np.ndarray, targets: np.ndarray) -> float:
        # Output layer deltas
        output_deltas = (targets - self.o) * self.sigmoid_derivative(self.o)
        
        # Hidden layer deltas
        hidden_deltas = np.dot(self.w2.T, output_deltas) * self.sigmoid_derivative(self.h)
        
        # Update weights
        self.dw2 += np.outer(output_deltas, self.h)
        self.dw1 += np.outer(hidden_deltas, inputs)
        
        # Calculate error
        error = 0.5 * np.sum((targets - self.o) ** 2)
        return error

    def update_weights(self, learning_rate: float):
        self.w1 += learning_rate * self.dw1
        self.w2 += learning_rate * self.dw2
        
        # Reset weight updates
        self.dw1.fill(0)
        self.dw2.fill(0)

def train_network(mlp: MLP, 
                 training_data: List[Tuple[np.ndarray, np.ndarray]], 
                 epochs: int, 
                 learning_rate: float, 
                 batch_size: int = 1) -> List[float]:
    errors = []
    
    for epoch in range(epochs):
        total_error = 0
        np.random.shuffle(training_data)
        
        for i in range(0, len(training_data), batch_size):
            batch = training_data[i:i + batch_size]
            batch_error = 0
            
            for inputs, targets in batch:
                # Forward pass
                mlp.forward(inputs)
                # Backward pass
                batch_error += mlp.backward(inputs, targets)
            
            # Update weights after batch
            mlp.update_weights(learning_rate)
            total_error += batch_error
            
        errors.append(total_error)
        if epoch % 100 == 0:
            print(f"Epoch {epoch}: Error = {total_error:.6f}")
    
    return errors

In [3]:
import numpy as np

# XOR training data
xor_data = [
    (np.array([0, 0]), np.array([0])),
    (np.array([0, 1]), np.array([1])),
    (np.array([1, 0]), np.array([1])),
    (np.array([1, 1]), np.array([0]))
]

# Create and train network
mlp = MLP(2, 4, 1)  # 2 inputs, 4 hidden units, 1 output
errors = train_network(mlp, xor_data, epochs=5000, learning_rate=0.1)

# Test network
print("\nTesting XOR predictions:")
for inputs, targets in xor_data:
    output = mlp.forward(inputs)
    print(f"Input: {inputs}, Target: {targets[0]}, Output: {output[0]:.4f}")

Epoch 0: Error = 0.522158
Epoch 100: Error = 0.503114
Epoch 200: Error = 0.502905
Epoch 300: Error = 0.502869
Epoch 400: Error = 0.502867
Epoch 500: Error = 0.502848
Epoch 600: Error = 0.502828
Epoch 700: Error = 0.502808
Epoch 800: Error = 0.502787
Epoch 900: Error = 0.502749
Epoch 1000: Error = 0.502743
Epoch 1100: Error = 0.502717
Epoch 1200: Error = 0.502691
Epoch 1300: Error = 0.502663
Epoch 1400: Error = 0.502616
Epoch 1500: Error = 0.502597
Epoch 1600: Error = 0.502545
Epoch 1700: Error = 0.502504
Epoch 1800: Error = 0.502457
Epoch 1900: Error = 0.502419
Epoch 2000: Error = 0.502359
Epoch 2100: Error = 0.502290
Epoch 2200: Error = 0.502194
Epoch 2300: Error = 0.502118
Epoch 2400: Error = 0.501993
Epoch 2500: Error = 0.501877
Epoch 2600: Error = 0.501720
Epoch 2700: Error = 0.501528
Epoch 2800: Error = 0.501291
Epoch 2900: Error = 0.500979
Epoch 3000: Error = 0.500608
Epoch 3100: Error = 0.500139
Epoch 3200: Error = 0.499509
Epoch 3300: Error = 0.498679
Epoch 3400: Error = 0.4975

In [5]:
import numpy as np

# Generate dataset
num_samples = 500
X = np.random.uniform(-1, 1, (num_samples, 4))
y = np.sin(X[:, 0] - X[:, 1] + X[:, 2] - X[:, 3]).reshape(-1, 1)

# Split into training and test sets
train_size = 400
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

# Create training data tuples
train_data = [(X_train[i], y_train[i]) for i in range(train_size)]
test_data = [(X_test[i], y_test[i]) for i in range(len(X_test))]

# Create and train network
mlp = MLP(4, 5, 1)  # 4 inputs, 5 hidden units, 1 output
train_errors = train_network(mlp, train_data, epochs=1000, learning_rate=0.05)

# Calculate test error
test_error = 0
for inputs, targets in test_data:
    output = mlp.forward(inputs)
    test_error += 0.5 * np.sum((targets - output) ** 2)

print(f"\nFinal training error: {train_errors[-1]:.6f}")
print(f"Test error: {test_error:.6f}")

Epoch 0: Error = 108.816682
Epoch 100: Error = 49.561317
Epoch 200: Error = 49.502611
Epoch 300: Error = 49.481952
Epoch 400: Error = 49.463058
Epoch 500: Error = 49.444629
Epoch 600: Error = 49.427016
Epoch 700: Error = 49.410674
Epoch 800: Error = 49.395892
Epoch 900: Error = 49.382854

Final training error: 49.371692
Test error: 13.023525


In [9]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelBinarizer

# Load and prepare data
def prepare_letter_data(file_path):
    # Load data
    data = pd.read_csv(file_path, header=None)
    
    # Separate features and labels
    X = data.iloc[:, 1:].values  # All columns except first
    y = data.iloc[:, 0].values   # First column contains letters
    
    # Normalize features to [-1, 1]
    X = (X - X.min()) / (X.max() - X.min()) * 2 - 1
    
    # Convert letters to one-hot encoding
    lb = LabelBinarizer()
    y = lb.fit_transform(y)
    
    return X, y

# Split data into train and test sets
def train_test_split(X, y, train_ratio=0.8):
    n_samples = len(X)
    n_train = int(n_samples * train_ratio)
    
    # Shuffle indices
    indices = np.random.permutation(n_samples)
    train_idx, test_idx = indices[:n_train], indices[n_train:]
    
    return X[train_idx], X[test_idx], y[train_idx], y[test_idx]

# Prepare data
X, y = prepare_letter_data('letter-recognition.data')
X_train, X_test, y_train, y_test = train_test_split(X, y)

# Create training data tuples
train_data = [(X_train[i], y_train[i]) for i in range(len(X_train))]
test_data = [(X_test[i], y_test[i]) for i in range(len(X_test))]

# Create and train network
mlp = MLP(17, 10, 26)  # 17 inputs, 10 hidden units, 26 outputs
train_errors = train_network(mlp, train_data, epochs=1000, learning_rate=0.01, batch_size=32)

# Calculate accuracy on test set
correct = 0
total = len(test_data)
for inputs, targets in test_data:
    output = mlp.forward(inputs)
    predicted_class = np.argmax(output)
    true_class = np.argmax(targets)
    if predicted_class == true_class:
        correct += 1

accuracy = correct / total
print(f"\nTest accuracy: {accuracy:.4f}")

ValueError: shapes (10,17) and (16,) not aligned: 17 (dim 1) != 16 (dim 0)