## Imports

In [1]:
import numpy as np

## Classes and functions

In [101]:

###
### Initializers
###

class BaseInitializer:
    def initialize(self, shape):
        raise NotImplementedError

class RandomInitializer(BaseInitializer):
    def initialize(self, shape):
        return np.random.randn(*shape)

class ZeroInitializer(BaseInitializer):
    def initialize(self, shape):
        return np.zeros(shape)

###
### Loss Functions
###

class BaseLoss:
    def loss(self, y_pred, y_true):
        raise NotImplementedError
    
    def gradient(self, y_pred, y_true):
        raise NotImplementedError

class MSELoss(BaseLoss):
    def loss(self, y_pred, y_true):
        return np.mean((y_pred - y_true) ** 2)
    
    def gradient(self, y_pred, y_true):
        return 2 * (y_pred - y_true) / y_true.size

###
### Optimizers
###

class BaseOptimizer:
    def update(self, layer, grad_weights, grad_biases):
        raise NotImplementedError

class SGD(BaseOptimizer):
    def __init__(self, learning_rate=0.01):
        self.learning_rate = learning_rate
    
    def update(self, layer, grad_weights, grad_biases):
        print(grad_weights, grad_biases, layer.weights, layer.biases, self.learning_rate)
        if grad_weights is not None:
            layer.weights -= self.learning_rate * grad_weights
        if layer.use_bias and grad_biases is not None:
            layer.biases -= self.learning_rate * grad_biases




###
### Layers
###

class BaseLayer:
    def __init__(self):
        self.input_shape = None
        self.output_shape = None
    
    def build(self, input_shape):
        self.input_shape = input_shape
        self.output_shape = input_shape
        self.value = None
    
    def forward(self, X):
        raise NotImplementedError
    
    def get_params_count(self):
        return 0

class InputLayer(BaseLayer):
    def __init__(self, input_shape):
        super().__init__()
        if isinstance(input_shape, int):
            input_shape = (input_shape,)
        self.output_shape = input_shape
    
    def forward(self, X):
        return X
    
    def get_config(self):
        return {'input_shape': self.output_shape}

class DenseLayer(BaseLayer):
    def __init__(self, units, activation=None, use_bias=True, initializer=RandomInitializer()):
        super().__init__()
        if not isinstance(units, int):
            raise ValueError("units must be an integer")
        self.units = units
        self.activation = activation
        self.use_bias = use_bias
        self.initializer = initializer
        self.weights = None
        self.biases = None
    
    def build(self, input_shape):
        if isinstance(input_shape, int):
            input_shape = (input_shape,)
        self.input_shape = input_shape
        self.weights = self.initializer.initialize((input_shape[-1], self.units))
        self.biases = self.initializer.initialize((self.units,)) if self.use_bias else None
        self.output_shape = (self.units,)
    
    def forward(self, X):
        if self.weights is None:
            raise ValueError("La couche doit être construite avant d'effectuer un forward pass.")
        
        output = np.tensordot(X, self.weights, axes=[-1, 0])
        if self.use_bias:
            output += self.biases
        
        if self.activation:
            output = self.activation.forward(output)
        
        return output

    def backward(self, grad_output):
        grad_activation = grad_output
        
        grad_weights = np.tensordot(self.value.T, grad_activation, axes=1)
        grad_biases = np.sum(grad_activation, axis=0) if self.use_bias else None
        
        grad_input = np.tensordot(grad_activation, self.weights.T, axes=1)
        
        return grad_input, grad_weights, grad_biases

    def get_config(self):
        return {
            'units': self.units,
            'input_shape': self.input_shape,
            'weights_shape': self.weights.shape if self.weights is not None else None,
            'biases_shape': self.biases.shape if self.biases is not None else None,
            'use_bias': self.use_bias
        }

    def get_params_count(self):
        param_count = self.weights.size if self.weights is not None else 0
        if self.use_bias:
            param_count += self.biases.size if self.biases is not None else 0
        return param_count

###
### Activation Layers
###

class ActivationLayer(BaseLayer):
    def __init__(self, activation):
        super().__init__()
        self.activation = activation
    
    def build(self, input_shape):
        self.input_shape = input_shape
        self.output_shape = input_shape
    
    def forward(self, X):
        return self.activation.forward(X)
    
    def backward(self, grad_output):
        return self.activation.backward(self.value) * grad_output

class ReLU(ActivationLayer):
    def __init__(self):
        super().__init__(activation=self)
    
    def forward(self, X):
        return np.maximum(0, X)
    
    def backward(self, X):
        return (X > 0).astype(float) # gradient of ReLU

class Sigmoid(ActivationLayer):
    def __init__(self):
        super().__init__(activation=self)
    
    def forward(self, X):
        return 1 / (1 + np.exp(-X))
    
    def backward(self, X):
        return X * (1 - X) # gradient of sigmoid


###
### Model
###

class Model:
    def __init__(self):
        self.layers = []
        self.input_shape = None
    
    def add(self, layer):
        if not self.layers:
            self.input_shape = layer.output_shape
        else:
            layer.build(self.layers[-1].output_shape)
        
        self.layers.append(layer)
    
    def compile(self, optimizer, loss_function):
        self.optimizer = optimizer
        self.loss_function = loss_function
    
    def forward(self, X):
        for layer in self.layers:
            X = layer.forward(X)
            layer.value = X
        return X

    def backward(self, loss_grad):
        for layer in reversed(self.layers):
            if isinstance(layer, DenseLayer):
                loss_grad, grad_weights, grad_biases = layer.backward(loss_grad)
                self.optimizer.update(layer, grad_weights, grad_biases)
            else:
                loss_grad = layer.backward(loss_grad)

    def train(self, X, y, epochs, batch_size):
        num_samples = X.shape[0]
        for epoch in range(epochs):
            indices = np.random.permutation(num_samples)
            X_shuffled = X[indices]
            y_shuffled = y[indices]
            
            for start in range(0, num_samples, batch_size):
                end = start + batch_size
                X_batch = X_shuffled[start:end]
                y_batch = y_shuffled[start:end]
                
                output = self.forward(X_batch)
                loss = self.loss_function.loss(output, y_batch)
                loss_grad = self.loss_function.gradient(output, y_batch)
                self.backward(loss_grad)
            
            print(f"Epoch {epoch+1}/{epochs}, Loss: {loss}")
    
    def summary(self):
        print("\nSummary:")
        print("=" * 60)
        total_params = 0
        for i, layer in enumerate(self.layers):
            params = layer.get_params_count() if hasattr(layer, 'get_params_count') else 0
            total_params += params
            print(f"Layer {i}: {layer.__class__.__name__}, Output Shape: {layer.output_shape}, Params: {params}")
        print("=" * 60)
        print(f"Total Parameters: {total_params}\n")


## Model

### Building the model

In [None]:

model = Model()
model.add(InputLayer((10)))
#model.add(DenseLayer(5))
model.add(ReLU())
model.add(DenseLayer(3))
model.add(Sigmoid())

model.summary()

### Testing the model

In [None]:
input_data = np.random.normal(size=model.input_shape)
output = model.forward(input_data)
print(output)

In [None]:
X_train = np.random.normal(size=(100, 10))
y_train = np.ones((100, 3))

model.compile(optimizer=SGD(learning_rate=0.01), loss_function=MSELoss())
model.train(X_train, y_train, epochs=100, batch_size=32)
print(grad_weights, grad_biases, layer.weights, layer.biases, self.learning_rate)
