<a href="https://colab.research.google.com/github/EdwSanA/DPro_Tareas/blob/main/Red_neuronal_profunda.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
# ===============================
# Scratch Deep Neural Network - Full Implementation
# Problemas 1 al 9
# ===============================

import numpy as np
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split

# OneHotEncoder compatible (scikit-learn >=1.2 y <1.2)
try:
    from sklearn.preprocessing import OneHotEncoder
    OHE_KW = {"handle_unknown": "ignore", "sparse_output": False}
except TypeError:
    from sklearn.preprocessing import OneHotEncoder
    OHE_KW = {"handle_unknown": "ignore", "sparse": False}

np.random.seed(42)

# ==================================================
# [Problema 1] Capa totalmente conectada (FC)
# ==================================================
class FC:
    """
    Capa totalmente conectada: n_nodes1 -> n_nodes2
    """
    def __init__(self, n_nodes1, n_nodes2, initializer, optimizer):
        self.W = initializer.W(n_nodes1, n_nodes2)
        self.B = initializer.B(n_nodes2)
        self.optimizer = optimizer
        # grad cache
        self.dW = None
        self.dB = None
        self.X  = None

    def forward(self, X):
        self.X = X
        return X @ self.W + self.B

    def backward(self, dA):
        # gradientes promediados por batch
        m = self.X.shape[0]
        self.dW = (self.X.T @ dA) / m
        self.dB = dA.mean(axis=0)
        dZ = dA @ self.W.T
        # actualizar parámetros vía optimizador
        self = self.optimizer.update(self)
        return dZ

# ==================================================
# [Problema 2] Inicialización Simple
# ==================================================
class SimpleInitializer:
    def __init__(self, sigma=0.01):
        self.sigma = sigma
    def W(self, n_nodes1, n_nodes2):
        return self.sigma * np.random.randn(n_nodes1, n_nodes2)
    def B(self, n_nodes2):
        return np.zeros(n_nodes2)

# ==================================================
# [Problema 6] Xavier y He Initializers
# ==================================================
class XavierInitializer:
    def W(self, n_nodes1, n_nodes2):
        sigma = 1.0 / np.sqrt(n_nodes1)
        return sigma * np.random.randn(n_nodes1, n_nodes2)
    def B(self, n_nodes2):
        return np.zeros(n_nodes2)

class HeInitializer:
    def W(self, n_nodes1, n_nodes2):
        sigma = np.sqrt(2.0 / n_nodes1)
        return sigma * np.random.randn(n_nodes1, n_nodes2)
    def B(self, n_nodes2):
        return np.zeros(n_nodes2)

# ==================================================
# [Problema 3] Optimizadores: SGD y [Problema 7] AdaGrad
# ==================================================
class SGD:
    def __init__(self, lr=0.01):
        self.lr = lr
    def update(self, layer):
        layer.W -= self.lr * layer.dW
        layer.B -= self.lr * layer.dB
        return layer

class AdaGrad:
    def __init__(self, lr=0.01):
        self.lr = lr
        self.h_W = None
        self.h_B = None
    def update(self, layer):
        if self.h_W is None:
            self.h_W = np.zeros_like(layer.W)
            self.h_B = np.zeros_like(layer.B)
        self.h_W += layer.dW * layer.dW
        self.h_B += layer.dB * layer.dB
        layer.W -= self.lr * layer.dW / (np.sqrt(self.h_W) + 1e-7)
        layer.B -= self.lr * layer.dB / (np.sqrt(self.h_B) + 1e-7)
        return layer

# ==================================================
# [Problema 4] Funciones de Activación + [Problema 5] ReLU
# ==================================================
class Tanh:
    def __init__(self):
        self.Z = None
    def forward(self, X):
        self.Z = np.tanh(X)
        return self.Z
    def backward(self, dA):
        return dA * (1.0 - self.Z**2)

class ReLU:
    def __init__(self):
        self.mask = None
    def forward(self, X):
        self.mask = X > 0
        return np.maximum(0, X)
    def backward(self, dA):
        return dA * self.mask

class Softmax:
    """
    Softmax; su backward incluye Cross-Entropy:
        dA = (softmax - y_true) / batch_size
    """
    def __init__(self):
        self.Z = None  # probabilidades
    def forward(self, X):
        Xs = X - np.max(X, axis=1, keepdims=True)
        ex = np.exp(Xs)
        self.Z = ex / np.sum(ex, axis=1, keepdims=True)
        return self.Z
    def backward(self, Z, Y_true):
        m = Y_true.shape[0]
        return (Z - Y_true) / m

# ==================================================
# [Problema 8] ScratchDeepNeuralNetrowkClassifier
# ==================================================
class ScratchDeepNeuralNetrowkClassifier:
    def __init__(self, n_epochs=10, batch_size=64, verbose=True):
        self.n_epochs = n_epochs
        self.batch_size = batch_size
        self.verbose = verbose
        self.layers = []
        self.train_loss = []
        self.val_loss = []

    def add_fc(self, input_dim, output_dim, activation="relu",
               initializer="simple", optimizer="sgd", lr=0.01, sigma=0.01):
        # Inicializador
        if initializer == "simple":
            init = SimpleInitializer(sigma)
        elif initializer == "xavier":
            init = XavierInitializer()
        elif initializer == "he":
            init = HeInitializer()
        else:
            init = SimpleInitializer(sigma)

        # Optimizador
        if optimizer == "sgd":
            opt = SGD(lr)
        elif optimizer == "adagrad":
            opt = AdaGrad(lr)
        else:
            opt = SGD(lr)

        self.layers.append(FC(input_dim, output_dim, init, opt))

        # Activación posterior
        if activation == "relu":
            self.layers.append(ReLU())
        elif activation == "tanh":
            self.layers.append(Tanh())
        elif activation == "softmax":
            self.layers.append(Softmax())
        else:
            raise ValueError("Activación desconocida")

    @staticmethod
    def _cross_entropy(y_true, y_prob):
        y_prob = np.clip(y_prob, 1e-7, 1 - 1e-7)
        return -np.mean(np.sum(y_true * np.log(y_prob), axis=1))

    def _forward_all(self, X):
        A = X
        for layer in self.layers:
            A = layer.forward(A)
        return A  # probabilidades si el último es Softmax

    def _backward_all(self, y_true):
        # Última capa debe ser Softmax
        assert isinstance(self.layers[-1], Softmax)
        dA = self.layers[-1].backward(self.layers[-1].Z, y_true)
        for layer in reversed(self.layers[:-1]):
            dA = layer.backward(dA)

    def fit(self, X, y, X_val=None, y_val=None):
        n = X.shape[0]
        for epoch in range(self.n_epochs):
            # barajar cada época
            idx = np.random.permutation(n)
            Xs, ys = X[idx], y[idx]
            # mini-batches
            for i in range(0, n, self.batch_size):
                Xb = Xs[i:i+self.batch_size]
                yb = ys[i:i+self.batch_size]
                probs = self._forward_all(Xb)
                self._backward_all(yb)

            # métricas por época
            probs_tr = self._forward_all(X)
            loss_tr = self._cross_entropy(y, probs_tr)
            self.train_loss.append(loss_tr)

            if X_val is not None and y_val is not None:
                probs_va = self._forward_all(X_val)
                loss_va = self._cross_entropy(y_val, probs_va)
                self.val_loss.append(loss_va)
                if self.verbose:
                    acc = (np.argmax(probs_va, axis=1) ==
                           np.argmax(y_val, axis=1)).mean()
                    print(f"Época {epoch+1:02d}/{self.n_epochs} | "
                          f"Loss tr {loss_tr:.4f} | Loss va {loss_va:.4f} | Acc va {acc:.4f}")
            elif self.verbose:
                print(f"Época {epoch+1:02d}/{self.n_epochs} | Loss tr {loss_tr:.4f}")

    def predict(self, X):
        probs = self._forward_all(X)
        return np.argmax(probs, axis=1)

    def predict_proba(self, X):
        return self._forward_all(X)

# ==================================================
# [Problema 9] Entrenamiento y Validación con MNIST
# ==================================================
print("Descargando MNIST (OpenML)...")
mnist = fetch_openml('mnist_784', version=1, as_frame=False)
X = mnist.data.astype(np.float32) / 255.0
y = mnist.target.astype(int)

# Para que sea rápido en CPU, usamos 12k ejemplos (ajusta si quieres)
X_train, X_valid, y_train, y_valid = train_test_split(
    X[:12000], y[:12000], test_size=0.2, random_state=42, stratify=y[:12000]
)

enc = OneHotEncoder(**OHE_KW)
Y_train = enc.fit_transform(y_train.reshape(-1, 1))
Y_valid = enc.transform(y_valid.reshape(-1, 1))

def run_and_report(cfg_name, layers_spec, epochs=10, batch=64, verbose=True):
    net = ScratchDeepNeuralNetrowkClassifier(n_epochs=epochs, batch_size=batch, verbose=verbose)
    # construir arquitectura según spec [(in,out,act,init,opt,lr)]
    for (inp, outp, act, init, opt, lr) in layers_spec:
        net.add_fc(inp, outp, activation=act, initializer=init, optimizer=opt, lr=lr)
    net.fit(X_train, Y_train, X_valid, Y_valid)
    y_pred = net.predict(X_valid)
    acc = (y_pred == y_valid).mean()
    print(f"[{cfg_name}] Accuracy validación: {acc:.4f}")
    return acc

# Configuración A: ReLU + He + AdaGrad
cfgA = [
    (784, 256, "relu",    "he",     "adagrad", 0.05),
    (256, 128, "relu",    "he",     "adagrad", 0.05),
    (128, 10,  "softmax", "xavier", "adagrad", 0.05),
]

# Configuración B: Tanh + Xavier + SGD
cfgB = [
    (784, 256, "tanh",    "xavier", "sgd",     0.1),
    (256, 128, "tanh",    "xavier", "sgd",     0.1),
    (128, 10,  "softmax", "xavier", "sgd",     0.1),
]

print("\n=== Entrenando configuraciones ===")
accA = run_and_report("A (ReLU/He/AdaGrad)", cfgA, epochs=8, batch=128, verbose=True)
accB = run_and_report("B (Tanh/Xavier/SGD)", cfgB, epochs=8, batch=128, verbose=True)

print("\nResumen:")
print(f"A: {accA:.4f} | B: {accB:.4f}")


Descargando MNIST (OpenML)...

=== Entrenando configuraciones ===
Época 01/8 | Loss tr 0.2482 | Loss va 0.2768 | Acc va 0.9137
Época 02/8 | Loss tr 0.1168 | Loss va 0.1648 | Acc va 0.9558
Época 03/8 | Loss tr 0.0915 | Loss va 0.1570 | Acc va 0.9575
Época 04/8 | Loss tr 0.0623 | Loss va 0.1501 | Acc va 0.9575
Época 05/8 | Loss tr 0.0396 | Loss va 0.1352 | Acc va 0.9604
Época 06/8 | Loss tr 0.0265 | Loss va 0.1279 | Acc va 0.9654
Época 07/8 | Loss tr 0.0235 | Loss va 0.1319 | Acc va 0.9604
Época 08/8 | Loss tr 0.0136 | Loss va 0.1292 | Acc va 0.9679
[A (ReLU/He/AdaGrad)] Accuracy validación: 0.9679
Época 01/8 | Loss tr 2.2159 | Loss va 2.2142 | Acc va 0.2213
Época 02/8 | Loss tr 2.1477 | Loss va 2.1440 | Acc va 0.2896
Época 03/8 | Loss tr 2.0829 | Loss va 2.0774 | Acc va 0.3821
Época 04/8 | Loss tr 2.0213 | Loss va 2.0140 | Acc va 0.4604
Época 05/8 | Loss tr 1.9623 | Loss va 1.9534 | Acc va 0.5171
Época 06/8 | Loss tr 1.9059 | Loss va 1.8953 | Acc va 0.5533
Época 07/8 | Loss tr 1.8519 | 