In [None]:
# Importar las librerías necesarias
import numpy as np
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_digits  # Usaremos un dataset de ejemplo

# Definimos las funciones de activación y pérdida
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_prime(x):
    return sigmoid(x) * (1 - sigmoid(x))

def mse(y_true, y_pred):
    return np.mean(np.power(y_true - y_pred, 2))

def mse_prime(y_true, y_pred):
    return 2 * (y_pred - y_true) / y_true.size

# Clase para las capas densas
class FCLayer:
    def __init__(self, input_size, output_size, lambda_reg=0.0):
        self.weights = np.random.rand(input_size, output_size) - 0.5
        self.bias = np.random.rand(1, output_size) - 0.5
        self.lambda_reg = lambda_reg

    def forward(self, input):
        self.input = input
        self.output = np.dot(self.input, self.weights) + self.bias
        return self.output

    def backward(self, output_gradient, learning_rate):
        input_gradient = np.dot(output_gradient, self.weights.T)
        weights_gradient = np.dot(self.input.T, output_gradient)

        # Regularización L2
        weights_gradient += self.lambda_reg * self.weights

        # Actualización de pesos y bias
        self.weights -= learning_rate * weights_gradient
        self.bias -= learning_rate * np.mean(output_gradient, axis=0)
        return input_gradient

# Clase para las capas de activación
class ActivationLayer:
    def __init__(self, activation, activation_prime):
        self.activation = activation
        self.activation_prime = activation_prime

    def forward(self, input):
        self.input = input
        self.output = self.activation(self.input)
        return self.output

    def backward(self, output_gradient, learning_rate):
        return output_gradient * self.activation_prime(self.input)

# Clase para la red neuronal
class Network:
    def __init__(self):
        self.layers = []
        self.loss = None
        self.loss_prime = None

    def add(self, layer):
        self.layers.append(layer)

    def use(self, loss, loss_prime):
        self.loss = loss
        self.loss_prime = loss_prime

    def predict(self, input_data):
        samples = len(input_data)
        result = []

        for i in range(samples):
            output = input_data[i]
            for layer in self.layers:
                output = layer.forward(output)
            result.append(output)

        return result

    # Entrenamiento con mini batch
    def fit(self, X_train, y_train, epochs, learning_rate, batch_size):
        samples = len(X_train)

        for epoch in range(epochs):
            print(f'Epoch {epoch+1}/{epochs}')
            # Mezclar los datos de entrenamiento en cada época
            indices = np.arange(samples)
            np.random.shuffle(indices)
            X_train = X_train[indices]
            y_train = y_train[indices]

            # Procesar por mini batches
            for i in range(0, samples, batch_size):
                X_batch = X_train[i:i + batch_size]
                y_batch = y_train[i:i + batch_size]

                # Forward pass
                output = X_batch
                for layer in self.layers:
                    output = layer.forward(output)

                # Calcular la pérdida
                loss = self.loss(y_batch, output)

                # Backward pass
                output_gradient = self.loss_prime(y_batch, output)
                for layer in reversed(self.layers):
                    output_gradient = layer.backward(output_gradient, learning_rate)

            print(f'Loss: {loss}')

# Cargar el conjunto de datos
digits = load_digits()
X = digits.data
y = digits.target

# One-hot encoding para las etiquetas de salida
y_one_hot = np.zeros((y.size, 10))
y_one_hot[np.arange(y.size), y] = 1

# Dividir el dataset en entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y_one_hot, test_size=0.3, random_state=42)

# Crear la red neuronal
entrada_dim = X_train.shape[1]
model = Network()

# Definir regularización y capas
lambda_reg = 0.001
model.add(FCLayer(entrada_dim, 256, lambda_reg=lambda_reg))  # Capa con regularización L2
model.add(ActivationLayer(sigmoid, sigmoid_prime))
model.add(FCLayer(256, 256, lambda_reg=lambda_reg))
model.add(ActivationLayer(sigmoid, sigmoid_prime))
model.add(FCLayer(256, 10, lambda_reg=lambda_reg))
model.add(ActivationLayer(sigmoid, sigmoid_prime))

# Usar la función de pérdida
model.use(mse, mse_prime)

# Entrenar la red usando mini batches
batch_size = 32  # Hiperparámetro: tamaño del mini batch
model.fit(X_train, y_train, epochs=20, learning_rate=0.1, batch_size=batch_size)

# Hacer predicciones
y_hat = model.predict(X_test)

# Transformar las predicciones a etiquetas
y_hat = [np.argmax(i) for i in y_hat]
y_test_labels = [np.argmax(i) for i in y_test]

# Evaluar el modelo
print('Matriz de confusión:')
print(confusion_matrix(y_test_labels, y_hat))
print(f'Exactitud del modelo: {accuracy_score(y_test_labels, y_hat)}')
