Neural Network from scratch using no ML libraries

In [None]:
import numpy as np

class NeuralNetwork:
    def __init__(self, layers, activations):
        self.layers = layers
        self.activations = activations
        self.weights = []
        self.biases = []
        self.learning_rate = None

        #weights and biases initialization
        for i in range(len(layers) - 1):
            in_size = layers[i]
            out_size = layers[i + 1]
            limit = np.sqrt(6 / (in_size + out_size))
            w = np.random.uniform(-limit, limit, (out_size, in_size))
            b = np.zeros(out_size)
            self.weights.append(w)
            self.biases.append(b)

    def _activation(self, x, func):
        if func == 'sigmoid':
            return 1 / (1 + np.exp(-x))
        elif func == 'tanh':
            return np.tanh(x)
        elif func == 'relu':
            return np.maximum(0, x)
        elif func == 'leaky_relu':
            return np.where(x > 0, x, x * 0.01)
        elif func == 'linear':
            return x
        elif func == 'softmax':
            exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
            return exp_x / np.sum(exp_x, axis=1, keepdims=True)
        else:
            raise ValueError(f"Unknown activation function: {func}")

    def _activation_derivative(self, x, func):
        if func == 'sigmoid':
            s = 1 / (1 + np.exp(-x))
            return s * (1 - s)
        elif func == 'tanh':
            return 1 - np.tanh(x) ** 2
        elif func == 'relu':
            return (x > 0).astype(float)
        elif func == 'leaky_relu':
            return np.where(x > 0, 1.0, 0.01)
        elif func == 'linear':
            return np.ones_like(x)
        else:
            raise ValueError(f"Unknown activation function for derivative: {func}")

    def _cross_entropy_loss(self, y_true, y_pred):
        eps = 1e-15
        y_pred = np.clip(y_pred, eps, 1 - eps)
        return -np.sum(y_true * np.log(y_pred)) / y_true.shape[0]

    def _cross_entropy_derivative(self, y_true, y_pred):
        return (y_pred - y_true) / y_true.shape[0]

    def feedforward(self, X):
        a = X
        activations = [a]
        zs = []

        for w, b, func in zip(self.weights, self.biases, self.activations):
            z = a @ w.T + b
            a = self._activation(z, func)
            zs.append(z)
            activations.append(a)

        return activations, zs

    def backpropagate(self, X, y, activations, zs, loss_function='cross_entropy_loss'):
        delta = self._cross_entropy_derivative(y, activations[-1])

        nablaw = [np.zeros_like(w) for w in self.weights]
        nablab = [np.zeros_like(b) for b in self.biases]

        for l in reversed(range(len(self.weights))):
            nablaw[l] = delta.T @ activations[l]
            nablab[l] = np.sum(delta, axis=0)

            if l != 0:
                delta = (delta @ self.weights[l]) * self._activation_derivative(zs[l-1], self.activations[l-1])

        return nablaw, nablab

    def updateWeights(self, nabla_w, nabla_b, batch_size):
        for i in range(len(self.weights)):
            self.weights[i] -= self.learning_rate * nabla_w[i] / batch_size
            self.biases[i] -= self.learning_rate * nabla_b[i] / batch_size

    def learn(self, X_train, y_train, epochs, learning_rate, batch_size, verbose=True):
        self.learning_rate = learning_rate

        for epoch in range(1, epochs + 1):
            indices = np.arange(X_train.shape[0])
            np.random.shuffle(indices)
            X_train = X_train[indices]
            y_train = y_train[indices]

            for start in range(0, X_train.shape[0], batch_size):
                end = start + batch_size
                batch_X = X_train[start:end]
                batch_y = y_train[start:end]

                activations, zs = self.feedforward(batch_X)
                nabla_w, nabla_b = self.backpropagate(batch_X, batch_y, activations, zs)
                self.updateWeights(nabla_w, nabla_b, batch_X.shape[0])

            if verbose and epoch % 5 == 0:
                preds, _ = self.feedforward(X_train)
                loss = self._cross_entropy_loss(y_train, preds[-1])
                print(f"Epoch {epoch} - Loss: {loss:.4f}")

    def predict(self, X):
        activations, _ = self.feedforward(X)
        return activations[-1]

def to_one_hot(y, num_classes):
    one_hot = np.zeros((y.size, num_classes))
    one_hot[np.arange(y.size), y] = 1
    return one_hot

def normalize(X):
    X = X.astype(np.float32)
    X -= X.mean(axis=0)
    X /= (X.std(axis=0) + 1e-8)
    return X

def accuracy(y_true, y_pred):
    true_labels = np.argmax(y_true, axis=1)
    pred_labels = np.argmax(y_pred, axis=1)
    return np.mean(true_labels == pred_labels)


In [1]:
from neural_network import NeuralNetwork, normalize, to_one_hot, accuracy
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
data = pd.read_csv('breastfinal.csv')

X = data.iloc[:, :-1].values 
y = data.iloc[:, -1].values 

print(f"Data shape: {X.shape}, Labels shape: {y.shape}")
Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=0.3, random_state=42)
print(np.min(Xtrain), np.max(Xtrain), np.mean(Xtrain), np.std(Xtrain))
#normalize features if nonbinary dataset

#Xtrain = normalize(Xtrain)  # or use normalize(X_train) if you want zero mean
#Xtest = normalize(Xtest)

numclasses = len(np.unique(y))
unique_labels = np.unique(y)
label_to_int = {label: idx for idx, label in enumerate(unique_labels)}
ytrain = np.array([label_to_int[label] for label in ytrain])
ytest = np.array([label_to_int[label] for label in ytest])

ytrainoh = to_one_hot(ytrain, numclasses)
ytestoh = to_one_hot(ytest, numclasses)

#building the neural network
#Xtrain.shape[1]  # Number of features
#consider expanding hidden layers to 32, 32 or 64, 64 for more complex datasets
hiddenlayer1 = 16  #number of neurons first hidden layer
hiddenlayer2 = 16  #number of neurons in second hidden layer
nn = NeuralNetwork([Xtrain.shape[1], hiddenlayer1, hiddenlayer2, numclasses], ['relu', 'relu', 'softmax'])
nn.learn(Xtrain, ytrainoh, epochs=100, learning_rate=0.001, batch_size=32)
predictions = nn.predict(Xtest)
print(f"Test Accuracy: {accuracy(ytestoh, predictions) * 100:.2f}%")

'''
Current dataset is rather small, with the current batch size of 32, batches may have high variance which leads to overfitting.
'''

Data shape: (286, 41), Labels shape: (286,)
False True 0.21865853658536585 0.4133364016920263
Epoch 5 - Loss: 0.6820
Epoch 10 - Loss: 0.6819
Epoch 15 - Loss: 0.6817
Epoch 20 - Loss: 0.6815
Epoch 25 - Loss: 0.6813
Epoch 30 - Loss: 0.6812
Epoch 35 - Loss: 0.6810
Epoch 40 - Loss: 0.6808
Epoch 45 - Loss: 0.6806
Epoch 50 - Loss: 0.6804
Epoch 55 - Loss: 0.6802
Epoch 60 - Loss: 0.6800
Epoch 65 - Loss: 0.6798
Epoch 70 - Loss: 0.6796
Epoch 75 - Loss: 0.6794
Epoch 80 - Loss: 0.6792
Epoch 85 - Loss: 0.6790
Epoch 90 - Loss: 0.6789
Epoch 95 - Loss: 0.6788
Epoch 100 - Loss: 0.6787
Test Accuracy: 56.98%


'\nCurrent dataset is rather small, with the current batch size of 32, batches may have high variance which leads to overfitting.\n'