In [60]:
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
X, y =  load_breast_cancer(return_X_y=True)
y = y.reshape(-1, 1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)


class FullyConnected:
    def __init__(self, in_size, out_size):
        self.W = np.random.randn(out_size, in_size)
        self.b = np.zeros(out_size)

    def __call__(self, X):
        return (X @ self.W.T) + self.b

    def backward(self, grad_output, X):
        dW = grad_output.T @ X
        db = np.sum(grad_output, axis=0)
        dX = grad_output @ self.W
        return dW, db, dX


class BCE:
    def __call__(self, y_pred, y_true):
        epsilon = 1e-12  # to avoid log(0)
        y_pred = np.clip(y_pred, epsilon, 1 - epsilon)
        return -(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred)).mean()
    
    def backward(self, y_pred, y_true):
        y_pred = np.clip(y_pred, 1e-12, 1 - 1e-12)
        return (y_pred - y_true) / (y_pred * (1 - y_pred) * len(y_true))
    

class SGD:
    def __init__(self, params, lr=0.01):
        self.params = params
        self.lr = lr

    def step(self, grads):
        for i, param in enumerate(self.params):
            param -= self.lr * grads[i]


class Sigmoid:
    def __call__(self, x):
        clip = np.clip(x, -100, 100)
        return 1 / (1 + np.exp(-clip))
    
    def backward(self, output):
        return output * (1 - output)
        

class Classifier:
    def __init__(self, input_size, hidden_size, output_size):
        self.fc1 = FullyConnected(input_size, hidden_size)
        self.fc2 = FullyConnected(hidden_size, output_size)
        self.sigmoid = Sigmoid()

    def __call__(self, X):
        self.fc1_output = self.sigmoid(self.fc1(X))
        fc2_output = self.fc2(self.fc1_output)
        y_pred = self.sigmoid(fc2_output)
        return y_pred

    def backward(self, X, y_true, y_pred):
        loss_grad = BCE().backward(y_pred, y_true)
        grad_W2, grad_b2, grad_output_fc1 = self.fc2.backward(loss_grad, self.fc1_output)
        activation_grad = self.sigmoid.backward(self.fc1_output)
        grad_W1, grad_b1, _ = self.fc1.backward((grad_output_fc1 * activation_grad), X)
        return grad_W1, grad_b1, grad_W2, grad_b2
    
    def parameters(self):
        return [self.fc1.W, self.fc1.b, self.fc2.W, self.fc2.b]
    
    def fit(self, X, y , optimizer, loss_fn, epochs=10):
        for epoch in range(epochs):
            y_pred = self(X)
            loss = loss_fn(y_pred, y)
            grads = self.backward(X, y, y_pred)
            optimizer.step(grads)
            if epoch % 10 == 0:
                print(f"Epoch: {epoch}, Loss: {loss}")
    
    def predict(self, X):
        return np.round(self(X))

model = Classifier(input_size=X_train.shape[1], hidden_size=32, output_size=1)
loss_fn = BCE()
optimizer = SGD(model.parameters(), lr=0.01)
model.fit(X_train, y_train, optimizer, loss_fn, epochs=100) 

from sklearn.metrics import accuracy_score
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Epoch: 0, Loss: 2.301833152278058
Epoch: 10, Loss: 0.5894056268007787
Epoch: 20, Loss: 0.5776453973521732
Epoch: 30, Loss: 0.5757571462593305
Epoch: 40, Loss: 0.5719611139399483
Epoch: 50, Loss: 0.5639012391863538
Epoch: 60, Loss: 0.5922013083736206
Epoch: 70, Loss: 0.566956326500179
Epoch: 80, Loss: 0.54957764519868
Epoch: 90, Loss: 0.5457000481678524
Accuracy: 0.868421052631579
