In [2]:
import numpy as np
import pandas as pd

# Activation Functions

# Sigmoid function and its derivative for binary tasks
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def sigmoid_derivative(a):
    return a * (1 - a)

# ReLU function and its derivative for hidden layers
def relu(z):
    return np.maximum(0, z)

def relu_derivative(a):
    return (a > 0).astype(float)

# Linear activation for regression outputs
def linear(z):
    return z

def linear_derivative(a):
    return np.ones_like(a)

# Softmax activation for multi-class classification
def softmax(z):
    exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))  # numerical stability
    return exp_z / np.sum(exp_z, axis=1, keepdims=True)

# Loss Functions

# Binary cross-entropy loss for binary classification
def binary_cross_entropy(y_true, y_pred):
    y_pred = np.clip(y_pred, 1e-7, 1 - 1e-7)  # avoid log(0)
    return -np.mean(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))

def binary_cross_entropy_derivative(y_true, y_pred):
    y_pred = np.clip(y_pred, 1e-7, 1 - y_pred)
    return -(y_true / y_pred) + (1 - y_true) / (1 - y_pred)

# Mean squared error for regression tasks
def mean_squared_error(y_true, y_pred):
    return np.mean((y_true - y_pred) ** 2)

def mean_squared_error_derivative(y_true, y_pred):
    return 2 * (y_pred - y_true)

# Cross-entropy loss for multi-class classification
def categorical_cross_entropy(y_true, y_pred):
    y_pred = np.clip(y_pred, 1e-9, 1 - 1e-9)
    return -np.mean(np.sum(y_true * np.log(y_pred), axis=1))

def categorical_cross_entropy_derivative(y_true, y_pred):
    return y_pred - y_true  # softmax + cross-entropy combo derivative

# Neural Network Class

class NeuralNetwork:
    def __init__(self, input_size, hidden_layers, output_size,
                 task='classification', optimizer='sgd', activation='relu'):
        self.task = task.lower()
        self.optimizer = optimizer.lower()
        self.activation_name = activation.lower()
        self.hidden_layers = hidden_layers

        # Adam optimizer parameters
        self.t = 0
        self.beta1 = 0.9
        self.beta2 = 0.999
        self.epsilon = 1e-8

        # Choose activation functions
        self.act = relu if self.activation_name == 'relu' else sigmoid
        self.act_deriv = relu_derivative if self.activation_name == 'relu' else sigmoid_derivative

        # Choose loss and output activation based on task type
        if self.task == 'classification' and output_size == 1:
            self.out_act = sigmoid
            self.out_act_deriv = sigmoid_derivative
            self.loss = binary_cross_entropy
            self.loss_deriv = binary_cross_entropy_derivative
        elif self.task == 'classification' and output_size > 1:
            self.out_act = softmax
            self.out_act_deriv = None  # Not needed explicitly
            self.loss = categorical_cross_entropy
            self.loss_deriv = categorical_cross_entropy_derivative
        elif self.task == 'regression':
            self.out_act = linear
            self.out_act_deriv = linear_derivative
            self.loss = mean_squared_error
            self.loss_deriv = mean_squared_error_derivative
        else:
            raise ValueError("Invalid task or output configuration.")

        # Initialize layers and parameters
        self.layers = [input_size] + hidden_layers + [output_size]
        self.L = len(self.layers) - 1

        self.W, self.b = {}, {}
        self.mW, self.vW, self.mb, self.vb = {}, {}, {}, {}

        # Random initialization
        for l in range(1, len(self.layers)):
            self.W[l] = np.random.randn(self.layers[l-1], self.layers[l]) * 0.01
            self.b[l] = np.zeros((1, self.layers[l]))
            self.mW[l] = np.zeros_like(self.W[l])
            self.vW[l] = np.zeros_like(self.W[l])
            self.mb[l] = np.zeros_like(self.b[l])
            self.vb[l] = np.zeros_like(self.b[l])

    def forward(self, X):
        self.Z, self.A = {}, {}
        self.A[0] = X
        for l in range(1, self.L + 1):
            self.Z[l] = np.dot(self.A[l - 1], self.W[l]) + self.b[l]
            self.A[l] = self.out_act(self.Z[l]) if l == self.L else self.act(self.Z[l])
        return self.A[self.L]

    def backward(self, y, learning_rate):
        dZ, dW, db = {}, {}, {}

        # Output layer derivative
        dZ[self.L] = self.loss_deriv(y, self.A[self.L])
        dW[self.L] = np.dot(self.A[self.L - 1].T, dZ[self.L])
        db[self.L] = dZ[self.L]

        # Backpropagation through hidden layers
        for l in reversed(range(1, self.L)):
            dA = np.dot(dZ[l + 1], self.W[l + 1].T)
            dZ[l] = dA * self.act_deriv(self.A[l])
            dW[l] = np.dot(self.A[l - 1].T, dZ[l])
            db[l] = dZ[l]

        # Update parameters using chosen optimizer
        if self.optimizer == 'sgd':
            self._apply_sgd(dW, db, learning_rate)
        elif self.optimizer == 'adam':
            self._apply_adam(dW, db, learning_rate)
        else:
            raise ValueError("Unsupported optimizer")

    def _apply_sgd(self, dW, db, lr):
        for l in range(1, self.L + 1):
            self.W[l] -= lr * dW[l]
            self.b[l] -= lr * db[l]

    def _apply_adam(self, dW, db, lr):
        self.t += 1
        for l in range(1, self.L + 1):
            self.mW[l] = self.beta1 * self.mW[l] + (1 - self.beta1) * dW[l]
            self.vW[l] = self.beta2 * self.vW[l] + (1 - self.beta2) * (dW[l] ** 2)
            self.mb[l] = self.beta1 * self.mb[l] + (1 - self.beta1) * db[l]
            self.vb[l] = self.beta2 * self.vb[l] + (1 - self.beta2) * (db[l] ** 2)

            mW_hat = self.mW[l] / (1 - self.beta1 ** self.t)
            vW_hat = self.vW[l] / (1 - self.beta2 ** self.t)
            mb_hat = self.mb[l] / (1 - self.beta1 ** self.t)
            vb_hat = self.vb[l] / (1 - self.beta2 ** self.t)

            self.W[l] -= lr * mW_hat / (np.sqrt(vW_hat) + self.epsilon)
            self.b[l] -= lr * mb_hat / (np.sqrt(vb_hat) + self.epsilon)

    def train(self, X, y, epochs=1000, learning_rate=0.01, shuffle=True):
        m = X.shape[0]
        for epoch in range(epochs):
            if shuffle:
                idx = np.random.permutation(m)
                X, y = X[idx], y[idx]
            total_loss = 0
            for i in range(m):
                x_sample = X[i:i + 1]
                y_sample = y[i:i + 1]
                self.forward(x_sample)
                total_loss += self.loss(y_sample, self.A[self.L])
                self.backward(y_sample, learning_rate)
            if epoch % 100 == 0 or epoch == epochs - 1:
                print(f"Epoch {epoch}: Avg Loss = {total_loss / m:.4f}")

    def predict(self, X):
        output = self.forward(X)
        if self.task == 'classification':
            if output.shape[1] > 1:
                return np.argmax(output, axis=1)  # multiclass
            return (output > 0.5).astype(int)    # binary
        return output  # regression


In [3]:
# REGRESSION TEST CASE
np.random.seed(0)

# Generate synthetic data: y = 3x + 7 with noise
X_reg = np.random.rand(100, 1)
y_reg = 3 * X_reg + 7 + np.random.randn(100, 1) * 0.1

# Create and train the neural network for regression
reg_model = NeuralNetwork(
    input_size=1,
    hidden_layers=[10, 10],
    output_size=1,
    task='regression',
    optimizer='adam',
    activation='relu'
)

reg_model.train(X_reg, y_reg, epochs=500, learning_rate=0.01)

# Predict on training data and print MSE
y_pred_reg = reg_model.predict(X_reg)
mse = mean_squared_error(y_reg, y_pred_reg)
print("\nFinal Regression MSE:", mse)


Epoch 0: Avg Loss = 31.9167
Epoch 100: Avg Loss = 0.0193
Epoch 200: Avg Loss = 0.0184
Epoch 300: Avg Loss = 0.0193
Epoch 400: Avg Loss = 0.0262
Epoch 499: Avg Loss = 0.0193

Final Regression MSE: 0.012373456191643716


In [5]:
# MULTICLASS CLASSIFICATION TEST
from sklearn.datasets import make_classification
from sklearn.preprocessing import OneHotEncoder

# Generate synthetic 3-class data
X_clf, y_clf = make_classification(n_samples=300, n_features=4, n_classes=3,
                                   n_informative=3, n_redundant=0, random_state=42)
y_clf_oh = OneHotEncoder().fit_transform(y_clf.reshape(-1, 1)).toarray()

# Create and train the neural network for multiclass classification
clf_model = NeuralNetwork(
    input_size=4,
    hidden_layers=[12, 8],
    output_size=3,
    task='classification',
    optimizer='adam',
    activation='relu'
)

clf_model.train(X_clf, y_clf_oh, epochs=500, learning_rate=0.01)

# Predict on training data and calculate accuracy
y_pred_clf = clf_model.predict(X_clf)
accuracy = np.mean(y_pred_clf == y_clf)
print("\nMulticlass Classification Accuracy:", accuracy)

Epoch 0: Avg Loss = 0.9547
Epoch 100: Avg Loss = 0.2351
Epoch 200: Avg Loss = 0.3130
Epoch 300: Avg Loss = 0.2468
Epoch 400: Avg Loss = 0.1896
Epoch 499: Avg Loss = 0.2903

Multiclass Classification Accuracy: 0.9566666666666667
