In [4]:
import numpy as np
from scipy import signal

Collecting scikit-learn
  Using cached scikit_learn-1.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)
Collecting scipy>=1.5.0 (from scikit-learn)
  Using cached scipy-1.11.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (60 kB)
Collecting joblib>=1.1.1 (from scikit-learn)
  Using cached joblib-1.3.2-py3-none-any.whl.metadata (5.4 kB)
Collecting threadpoolctl>=2.0.0 (from scikit-learn)
  Using cached threadpoolctl-3.2.0-py3-none-any.whl.metadata (10.0 kB)
Using cached scikit_learn-1.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (10.9 MB)
Using cached joblib-1.3.2-py3-none-any.whl (302 kB)
Using cached scipy-1.11.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (36.4 MB)
Using cached threadpoolctl-3.2.0-py3-none-any.whl (15 kB)
Installing collected packages: threadpoolctl, scipy, joblib, scikit-learn
Successfully installed joblib-1.3.2 scikit-learn-1.3.2 scipy-1.11.4 threadpoolctl-3.2.0
Note: you may need 

In [11]:
class Layer(object):
    def forward():
        pass
    def backward():
        pass

class Dense(Layer):
    def __init__(self, input_size, output_size):
        self.weights = np.random.randn(output_size, input_size)
        self.biases = np.random.randn(output_size, 1)

    def forward(self, input):
        self.input = input
        return np.dot(self.weights, self.input) + self.biases

    def backward(self, output_gradient, lr):
        weights_gradient = np.dot(output_gradient, np.transpose(self.input))
        input_gradient = np.dot(np.transpose(self.weights), output_gradient)
        
        self.weights -= lr * weights_gradient
        self.biases -= lr * output_gradient
        return input_gradient

class Conv2D(Layer):
                       #(depth, width, height) 
    def __init__(self, input_shape, kernel_size, num_kernels):
        self.output_shape = (num_kernels, input_shape[2] - kernel_size + 1, input_shape[1] - kernel_size + 1)
        self.kernel_shape = (num_kernels, input_shape[0], kernel_size, kernel_size)
        self.kernels = np.random.random_sample(self.kernel_shape)
        self.biases = np.random.random_sample(self.output_shape)
    def forward(self, inp):
        self.input = inp
        self.output = np.copy(self.biases)
        for i in range(self.num_kernels):
            for j in range(self.input_shape[0]):
                self.output[i] += signal.correlate2d(self.input[j], self.kernels[i][j], 'valid')
        return self.output
    
    def backward(self, output_gradient, lr):
        dCdK = np.zeros(self.kernel_shape)
        dCdX = np.zeros(self.input_shape)
        dCdB = np.zeros(output_gradient.shape)

        for i in range(self.num_kernels):
            dCdB[i] = output_gradient[i]
            for j in range(self.input_shape[0]):
                dCdK[i][j] = signal.correlate2d(self.input[j], output_gradient[i], 'valid')
                dCdX[j] += signal.convolve2d(output_gradient[i], self.kernels[i][j], 'full')
        
        self.kernels -= lr * dCdK
        self.biases -= lr * dCdB

        return dCdX

class Reshape(Layer):
    def __init__(self, input_shape, output_shape):
        self.input_shape = input_shape
        self.output_shape = output_shape

    def forward(self, inp):
        return np.reshape(inp, self.output_shape)
    
    def backward(self, output_gradient, lr):
        return np.reshape(output_gradient, self.input_shape)
    
class Activation(Layer):
    def __init__(self, activation, activation_gradient):
        self.activation = activation
        self.activation_prime = activation_gradient

    def forward(self, input):
        self.input = input
        return self.activation(self.input)

    def backward(self, output_gradient, learning_rate):
        return np.multiply(output_gradient, self.activation_prime(self.input))
    
class Sigmoid(Activation):
    def __init__(self):
        super().__init__(self.sigmoid, self.gradient)

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def gradient(self, x):
        return self.sigmoid(x) * (1 - self.sigmoid(x))

In [12]:
def binary_cross_entropy(y, y_pred):
    return -np.mean(y * np.log(y_pred) + (1 - y) * np.log(1 - y_pred))

def binary_cross_entropy_gradient(y, y_pred):
    return ((1 - y) / (1 - y_pred) - y / y_pred) / np.size(y)