<a href="https://colab.research.google.com/github/Rekg/neuralnetwork/blob/main/neuralnetwork.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

The dataset used in this output is synthetically generated within the code for the purpose of testing the neural network implementation.

In [None]:
import numpy as np

def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return (x > 0).astype(float)

def softmax(x):
    exp_vals = np.exp(x - np.max(x, axis=1, keepdims=True))
    return exp_vals / np.sum(exp_vals, axis=1, keepdims=True)

def cross_entropy_loss(predictions, labels):
    m = labels.shape[0]
    p = softmax(predictions)
    log_likelihood = -np.log(p[range(m), np.argmax(labels, axis=1)] + 1e-9)
    return np.sum(log_likelihood) / m

def one_hot(y, num_classes):
    one_hot_encoded = np.zeros((len(y), num_classes))
    one_hot_encoded[np.arange(len(y)), y] = 1
    return one_hot_encoded

def adam_optimizer(w, b, dw, db, lr, mw, vw, mb, vb, t, beta1=0.9, beta2=0.999, epsilon=1e-8):
    mw = beta1 * mw + (1 - beta1) * dw
    vw = beta2 * vw + (1 - beta2) * (dw ** 2)
    mw_corr = mw / (1 - beta1 ** t)
    vw_corr = vw / (1 - beta2 ** t)
    w -= lr * mw_corr / (np.sqrt(vw_corr) + epsilon)

    mb = beta1 * mb + (1 - beta1) * db
    vb = beta2 * vb + (1 - beta2) * (db ** 2)
    mb_corr = mb / (1 - beta1 ** t)
    vb_corr = vb / (1 - beta2 ** t)
    b -= lr * mb_corr / (np.sqrt(vb_corr) + epsilon)

    return w, b, mw, vw, mb, vb

class NeuralNetwork:
    def __init__(self, input_size, layers, num_classes, activation='relu', optimizer='adam', learning_rate=0.01):
        self.layers = layers
        self.learning_rate = learning_rate
        self.activation = relu
        self.activation_derivative = relu_derivative
        self.optimizer = optimizer
        self.weights = []
        self.biases = []
        self.m_w, self.v_w = [], []
        self.m_b, self.v_b = [], []
        self.t = 1

        layer_sizes = [input_size] + layers + [num_classes]
        for i in range(len(layer_sizes) - 1):
            w = np.random.randn(layer_sizes[i], layer_sizes[i+1]) * np.sqrt(2. / layer_sizes[i])
            b = np.zeros((1, layer_sizes[i+1]))
            self.weights.append(w)
            self.biases.append(b)
            self.m_w.append(np.zeros_like(w))
            self.v_w.append(np.zeros_like(w))
            self.m_b.append(np.zeros_like(b))
            self.v_b.append(np.zeros_like(b))

    def forward(self, X):
        self.layer_outputs = [X]
        for i in range(len(self.weights) - 1):
            X = self.activation(np.dot(X, self.weights[i]) + self.biases[i])
            self.layer_outputs.append(X)
        output = np.dot(X, self.weights[-1]) + self.biases[-1]
        self.layer_outputs.append(output)
        return softmax(output)

    def backward(self, X, y, y_pred):
        m = X.shape[0]
        gradients_w = []
        gradients_b = []

        # Output layer
        dZ = y_pred - y
        dW = np.dot(self.layer_outputs[-2].T, dZ) / m
        dB = np.sum(dZ, axis=0, keepdims=True) / m
        gradients_w.insert(0, dW)
        gradients_b.insert(0, dB)

        # Hidden layers
        for i in reversed(range(len(self.layers))):  # from last hidden layer back
            dA = np.dot(dZ, self.weights[i + 1].T)
            dZ = dA * self.activation_derivative(self.layer_outputs[i + 1])
            dW = np.dot(self.layer_outputs[i].T, dZ) / m
            dB = np.sum(dZ, axis=0, keepdims=True) / m
            gradients_w.insert(0, dW)
            gradients_b.insert(0, dB)

        return gradients_w, gradients_b

    def train(self, X, y, epochs, batch_size=32):
        n_samples = X.shape[0]
        for epoch in range(epochs):
            # Shuffle the dataset at the start of each epoch
            indices = np.random.permutation(n_samples)
            X_shuffled = X[indices]
            y_shuffled = y[indices]

            # Mini-batch gradient descent
            loss_epoch = 0
            for i in range(0, n_samples, batch_size):
                X_batch = X_shuffled[i:i+batch_size]
                y_batch = y_shuffled[i:i+batch_size]

                # Forward pass
                y_pred = self.forward(X_batch)

                # Compute loss
                loss = cross_entropy_loss(y_pred, y_batch)
                loss_epoch += loss

                # Backward pass and update weights
                gradients_w, gradients_b = self.backward(X_batch, y_batch, y_pred)
                for j in range(len(self.weights)):
                    self.weights[j], self.biases[j], self.m_w[j], self.v_w[j], self.m_b[j], self.v_b[j] = adam_optimizer(
                        self.weights[j], self.biases[j], gradients_w[j], gradients_b[j], self.learning_rate,
                        self.m_w[j], self.v_w[j], self.m_b[j], self.v_b[j], self.t
                    )

                self.t += 1

            if epoch % 100 == 0 or epoch == epochs - 1:
                print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss_epoch / n_samples:.4f}")

    def predict(self, X):
        output = self.forward(X)
        return np.argmax(output, axis=1)

# --- Input from user ---
input_size = int(input("Enter input shape (number of features): "))
num_layers = int(input("Enter number of layers: "))
layers = [int(input(f"Enter number of neurons for layer {i+1}: ")) for i in range(num_layers)]
epochs = int(input("Enter number of training epochs: "))
batch_size = int(input("Enter batch size: "))
num_classes = int(input("Enter number of output classes: "))

# --- Generate dynamic training data matching input and output shapes ---
n_samples = 100
X_train = np.random.rand(n_samples, input_size)
y_train_raw = np.random.randint(0, num_classes, size=n_samples)
y_train = one_hot(y_train_raw, num_classes)

# --- Initialize and train ---
nn = NeuralNetwork(input_size=input_size, layers=layers, num_classes=num_classes, activation='relu', optimizer='adam', learning_rate=0.01)
nn.train(X_train, y_train, epochs=epochs, batch_size=batch_size)

# --- Predictions ---
print("Testing predictions:")
preds = nn.predict(X_train[:10])
print("Predicted:", preds)
print("True Labels:", y_train_raw[:10])


Enter input shape (number of features): 5
Enter number of layers: 5
Enter number of neurons for layer 1: 8
Enter number of neurons for layer 2: 8
Enter number of neurons for layer 3: 8
Enter number of neurons for layer 4: 8
Enter number of neurons for layer 5: 8
Enter number of training epochs: 50
Enter batch size: 32
Enter number of output classes: 5
Epoch 1/50, Loss: 0.0641
Epoch 50/50, Loss: 0.0624
Testing predictions:
Predicted: [2 0 0 0 0 0 2 0 0 1]
True Labels: [2 0 4 1 0 1 2 2 2 4]
