<a href="https://colab.research.google.com/github/SILVIAIRENE/Data-Scientist-Machine-Learning-Engineer-Introductory-Course/blob/master/SimpleConv1d_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# ============================================================================
# CNN 1D DESDE CERO - IMPLEMENTACIÓN COMPLETA TODOS LOS PROBLEMAS 1-8
# ============================================================================

import numpy as np
import matplotlib.pyplot as plt
from keras.datasets import mnist
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns

print("🚀 CNN 1D DESDE CERO - IMPLEMENTACIÓN COMPLETA")
print("=" * 80)

# ============================================================================
# [Problema 1] Creación de clase SimpleConv1d (1 canal)
# ============================================================================

class SimpleConv1d:
    """
    Capa convolucional 1D simple con un canal de entrada y un canal de salida.

    Parameters
    ----------
    filter_size : int
        Tamaño del filtro
    learning_rate : float
        Tasa de aprendizaje
    initializer : str
        Tipo de inicializador ('xavier' o 'he')
    """

    def __init__(self, filter_size, learning_rate=0.01, initializer='xavier'):
        self.filter_size = filter_size
        self.learning_rate = learning_rate
        self.initializer = initializer
        self._initialize_weights()

    def _initialize_weights(self):
        """Inicializa los pesos usando Xavier o He"""
        if self.initializer == 'xavier':
            std = np.sqrt(2.0 / self.filter_size)
        elif self.initializer == 'he':
            std = np.sqrt(2.0 / self.filter_size)
        else:
            std = 0.01

        self.w = np.random.normal(0, std, self.filter_size)
        self.b = np.zeros(1)

        # Para almacenar gradientes
        self.dw = np.zeros_like(self.w)
        self.db = np.zeros_like(self.b)

    def forward(self, x):
        """
        Propagación hacia adelante

        Parameters
        ----------
        x : ndarray, shape (input_size,)
            Entrada de la capa

        Returns
        -------
        a : ndarray
            Salida de la capa
        """
        self.x = x
        input_size = len(x)
        output_size = input_size - self.filter_size + 1

        a = np.zeros(output_size)

        # Convolución: a_i = sum(x[i+s] * w[s]) + b
        for i in range(output_size):
            window = x[i:i + self.filter_size]
            a[i] = np.sum(window * self.w) + self.b[0]

        self.a = a
        return a

    def backward(self, delta_a):
        """
        Retropropagación

        Parameters
        ----------
        delta_a : ndarray
            Gradiente de la pérdida respecto a la salida

        Returns
        -------
        delta_x : ndarray
            Gradiente de la pérdida respecto a la entrada
        """
        output_size = len(delta_a)
        input_size = len(self.x)

        # Inicializar gradientes
        self.dw = np.zeros_like(self.w)
        self.db = np.zeros_like(self.b)
        delta_x = np.zeros_like(self.x)

        # Gradiente de pesos: dL/dw_s = sum(dL/da_i * x[i+s])
        for s in range(self.filter_size):
            for i in range(output_size):
                self.dw[s] += delta_a[i] * self.x[i + s]

        # Gradiente de sesgo: dL/db = sum(dL/da_i)
        self.db[0] = np.sum(delta_a)

        # Gradiente de entrada: dL/dx_j = sum(dL/da_i * w_s) donde i = j - s
        for j in range(input_size):
            for s in range(self.filter_size):
                i = j - s
                if 0 <= i < output_size:
                    delta_x[j] += delta_a[i] * self.w[s]

        return delta_x

    def update_weights(self):
        """Actualiza los pesos usando descenso de gradiente"""
        self.w -= self.learning_rate * self.dw
        self.b -= self.learning_rate * self.db

print("✅ [Problema 1] Clase SimpleConv1d implementada")

# ============================================================================
# [Problema 2] Función para calcular tamaño de salida después de convolución 1D
# ============================================================================

def calculate_output_size(input_size, filter_size, padding=0, stride=1):
    """
    Calcula el tamaño de salida después de la convolución 1D

    Fórmula: N_out = (N_in + 2*P - F) / S + 1

    Parameters
    ----------
    input_size : int
        Tamaño de la entrada (N_in)
    filter_size : int
        Tamaño del filtro (F)
    padding : int
        Número de rellenos en una dirección (P)
    stride : int
        Tamaño del paso (S)

    Returns
    -------
    output_size : int
        Tamaño de la salida (N_out)
    """
    output_size = (input_size + 2 * padding - filter_size) // stride + 1
    return output_size

print("✅ [Problema 2] Función calculate_output_size implementada")

# ============================================================================
# [Problema 3] Experimento con matrices pequeñas
# ============================================================================

def test_simple_conv1d():
    """Prueba la implementación de SimpleConv1d con matrices pequeñas"""
    print("\n" + "=" * 60)
    print("[Problema 3] Experimento con matrices pequeñas")
    print("=" * 60)

    # Datos de prueba según el problema
    x = np.array([1, 2, 3, 4])
    w = np.array([3, 5, 7])
    b = np.array([1])

    print(f"Entrada x: {x}")
    print(f"Pesos w: {w}")
    print(f"Sesgo b: {b}")

    # Crear capa convolucional
    conv = SimpleConv1d(filter_size=3)
    conv.w = w.copy()
    conv.b = b.copy()

    # Propagación hacia adelante
    a = conv.forward(x)
    print(f"Salida esperada: [35, 50]")
    print(f"Salida obtenida: {a}")

    # Verificar cálculo manual
    # a[0] = x[0]*w[0] + x[1]*w[1] + x[2]*w[2] + b = 1*3 + 2*5 + 3*7 + 1 = 35
    # a[1] = x[1]*w[0] + x[2]*w[1] + x[3]*w[2] + b = 2*3 + 3*5 + 4*7 + 1 = 50
    assert np.allclose(a, [35, 50]), "Error en propagación hacia adelante"

    # Retropropagación
    delta_a = np.array([10, 20])
    print(f"\nGradiente entrada delta_a: {delta_a}")

    delta_x = conv.backward(delta_a)

    print(f"Gradiente sesgo esperado: [30]")
    print(f"Gradiente sesgo obtenido: {conv.db}")

    print(f"Gradiente pesos esperado: [50, 80, 110]")
    print(f"Gradiente pesos obtenido: {conv.dw}")

    print(f"Gradiente entrada esperado: [30, 110, 170, 140]")
    print(f"Gradiente entrada obtenido: {delta_x}")

    # Verificaciones
    assert np.allclose(conv.db, [30]), "Error en gradiente del sesgo"
    assert np.allclose(conv.dw, [50, 80, 110]), "Error en gradiente de pesos"
    assert np.allclose(delta_x, [30, 110, 170, 140]), "Error en gradiente de entrada"

    print("\n✅ Todas las pruebas pasaron correctamente!")

print("✅ [Problema 3] Función de prueba test_simple_conv1d implementada")

# ============================================================================
# [Problema 4] Clase Conv1d - Múltiples canales
# ============================================================================

class Conv1d:
    """
    Capa convolucional 1D con soporte para múltiples canales de entrada y salida.

    Parameters
    ----------
    in_channels : int
        Número de canales de entrada
    out_channels : int
        Número de canales de salida
    filter_size : int
        Tamaño del filtro
    stride : int
        Tamaño del paso
    padding : int
        Cantidad de padding
    learning_rate : float
        Tasa de aprendizaje
    """

    def __init__(self, in_channels, out_channels, filter_size, stride=1,
                 padding=0, learning_rate=0.01):
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.filter_size = filter_size
        self.stride = stride
        self.padding = padding
        self.learning_rate = learning_rate

        self._initialize_weights()

    def _initialize_weights(self):
        """Inicializa pesos usando inicialización Xavier"""
        # Pesos: (out_channels, in_channels, filter_size)
        fan_in = self.in_channels * self.filter_size
        fan_out = self.out_channels * self.filter_size
        std = np.sqrt(2.0 / (fan_in + fan_out))

        self.w = np.random.normal(0, std, (self.out_channels, self.in_channels, self.filter_size))
        self.b = np.zeros(self.out_channels)

        # Para almacenar gradientes
        self.dw = np.zeros_like(self.w)
        self.db = np.zeros_like(self.b)

    def _apply_padding(self, x):
        """Aplica padding a la entrada"""
        if self.padding == 0:
            return x

        if len(x.shape) == 1:  # Entrada 1D
            return np.pad(x, self.padding, mode='constant', constant_values=0)
        else:  # Entrada 2D (canales, features)
            return np.pad(x, ((0, 0), (self.padding, self.padding)),
                         mode='constant', constant_values=0)

    def forward(self, x):
        """
        Propagación hacia adelante

        Parameters
        ----------
        x : ndarray
            Entrada: (in_channels, input_size) o (batch_size, in_channels, input_size)

        Returns
        -------
        a : ndarray
            Salida de la capa
        """
        # Manejar diferentes formas de entrada
        if len(x.shape) == 1:
            # Convertir a formato (1, input_size) para 1 canal
            x = x.reshape(1, -1)
        elif len(x.shape) == 3:
            # Formato batch: (batch_size, in_channels, input_size)
            batch_size = x.shape[0]
            outputs = []
            for i in range(batch_size):
                outputs.append(self.forward(x[i]))
            return np.array(outputs)

        self.x_original = x

        # Aplicar padding
        x_padded = self._apply_padding(x)
        self.x_padded = x_padded

        input_size = x_padded.shape[1]
        output_size = calculate_output_size(input_size, self.filter_size, 0, self.stride)

        # Inicializar salida
        a = np.zeros((self.out_channels, output_size))

        # Convolución para cada canal de salida
        for out_ch in range(self.out_channels):
            for i in range(0, output_size * self.stride, self.stride):
                idx = i // self.stride
                if idx >= output_size:
                    break

                conv_sum = 0
                # Sumar sobre todos los canales de entrada
                for in_ch in range(self.in_channels):
                    # Extraer ventana
                    start_idx = i
                    end_idx = start_idx + self.filter_size

                    if end_idx <= input_size:
                        window = x_padded[in_ch, start_idx:end_idx]
                        conv_sum += np.sum(window * self.w[out_ch, in_ch, :])

                a[out_ch, idx] = conv_sum + self.b[out_ch]

        self.a = a
        return a

    def backward(self, delta_a):
        """
        Retropropagación

        Parameters
        ----------
        delta_a : ndarray
            Gradiente de la pérdida respecto a la salida

        Returns
        -------
        delta_x : ndarray
            Gradiente de la pérdida respecto a la entrada
        """
        if len(delta_a.shape) == 3:
            # Manejar batch
            batch_size = delta_a.shape[0]
            delta_x_batch = []

            for i in range(batch_size):
                delta_x_i = self.backward(delta_a[i])
                delta_x_batch.append(delta_x_i)

            return np.array(delta_x_batch)

        output_size = delta_a.shape[1]
        input_size = self.x_padded.shape[1]

        # Inicializar gradientes
        self.dw = np.zeros_like(self.w)
        self.db = np.zeros_like(self.b)
        delta_x_padded = np.zeros_like(self.x_padded)

        # Calcular gradientes
        for out_ch in range(self.out_channels):
            # Gradiente del sesgo
            self.db[out_ch] = np.sum(delta_a[out_ch, :])

            for i in range(0, output_size * self.stride, self.stride):
                idx = i // self.stride
                if idx >= output_size:
                    break

                for in_ch in range(self.in_channels):
                    start_idx = i
                    end_idx = start_idx + self.filter_size

                    if end_idx <= input_size:
                        # Gradiente de pesos
                        window = self.x_padded[in_ch, start_idx:end_idx]
                        self.dw[out_ch, in_ch, :] += delta_a[out_ch, idx] * window

                        # Gradiente de entrada
                        delta_x_padded[in_ch, start_idx:end_idx] += \
                            delta_a[out_ch, idx] * self.w[out_ch, in_ch, :]

        # Remover padding del gradiente
        if self.padding > 0:
            delta_x = delta_x_padded[:, self.padding:-self.padding]
        else:
            delta_x = delta_x_padded

        return delta_x

    def update_weights(self):
        """Actualiza los pesos"""
        self.w -= self.learning_rate * self.dw
        self.b -= self.learning_rate * self.db

def test_conv1d_multiple_channels():
    """Prueba Conv1d con múltiples canales"""
    print("\n" + "=" * 60)
    print("[Problema 4] Experimento con múltiples canales")
    print("=" * 60)

    # Datos de prueba
    x = np.array([[1, 2, 3, 4], [2, 3, 4, 5]])  # shape(2, 4)
    w = np.ones((3, 2, 3))  # shape(3, 2, 3) - todos los pesos = 1
    b = np.array([1, 2, 3])  # shape(3,)

    print(f"Entrada x shape: {x.shape}")
    print(f"Entrada x:\n{x}")
    print(f"Pesos w shape: {w.shape}")
    print(f"Sesgo b: {b}")

    # Crear capa convolucional
    conv = Conv1d(in_channels=2, out_channels=3, filter_size=3)
    conv.w = w.copy()
    conv.b = b.copy()

    # Propagación hacia adelante
    a = conv.forward(x)
    print(f"Salida shape: {a.shape}")
    print(f"Salida obtenida:\n{a}")

    # Cálculo manual esperado
    expected_a = np.array([
        [16, 22],  # Canal 0: (1+2+3 + 2+3+4) + 1 = 16, (2+3+4 + 3+4+5) + 1 = 22
        [17, 23],  # Canal 1: mismo + 2
        [18, 24]   # Canal 2: mismo + 3
    ])

    print(f"Salida esperada:\n{expected_a}")

    # Verificar resultado
    assert np.allclose(a, expected_a), "Error en convolución con múltiples canales"
    print("✅ Prueba de múltiples canales pasó correctamente!")

print("✅ [Problema 4] Clase Conv1d con múltiples canales implementada")

# ============================================================================
# [Problema 5] Implementación de padding
# ============================================================================

def test_padding():
    """Prueba la implementación de padding"""
    print("\n" + "=" * 60)
    print("[Problema 5] Experimento con padding")
    print("=" * 60)

    # Prueba con padding
    x = np.array([[1, 2, 3, 4]])  # 1 canal, 4 features

    conv_no_pad = Conv1d(in_channels=1, out_channels=1, filter_size=3, padding=0)
    conv_with_pad = Conv1d(in_channels=1, out_channels=1, filter_size=3, padding=1)

    # Inicializar con los mismos pesos
    conv_no_pad.w = np.ones((1, 1, 3))
    conv_no_pad.b = np.zeros(1)
    conv_with_pad.w = np.ones((1, 1, 3))
    conv_with_pad.b = np.zeros(1)

    a_no_pad = conv_no_pad.forward(x)
    a_with_pad = conv_with_pad.forward(x)

    print(f"Entrada: {x}")
    print(f"Sin padding - salida shape: {a_no_pad.shape}, valores: {a_no_pad}")
    print(f"Con padding - salida shape: {a_with_pad.shape}, valores: {a_with_pad}")

    # Con padding=1, la salida debería tener el mismo tamaño que la entrada
    assert a_with_pad.shape[1] == x.shape[1], "Error en padding"
    print("✅ Prueba de padding pasó correctamente!")

print("✅ [Problema 5] Implementación de padding completada")

# ============================================================================
# [Problema 6] Manejo de minilotes
# ============================================================================

def test_batch_processing():
    """Prueba el procesamiento por lotes"""
    print("\n" + "=" * 60)
    print("[Problema 6] Experimento con minilotes")
    print("=" * 60)

    # Datos de prueba con batch
    batch_size = 3
    in_channels = 2
    input_size = 5

    x_batch = np.random.randn(batch_size, in_channels, input_size)

    conv = Conv1d(in_channels=2, out_channels=3, filter_size=3)

    # Propagación hacia adelante
    a_batch = conv.forward(x_batch)

    print(f"Entrada batch shape: {x_batch.shape}")
    print(f"Salida batch shape: {a_batch.shape}")

    # Verificar que el batch se procesa correctamente
    assert a_batch.shape[0] == batch_size, "Error en procesamiento de batch"
    print("✅ Prueba de minilotes pasó correctamente!")

print("✅ [Problema 6] Manejo de minilotes implementado")

# ============================================================================
# [Problema 7] Cualquier número de pasos (stride)
# ============================================================================

def test_stride():
    """Prueba diferentes valores de stride"""
    print("\n" + "=" * 60)
    print("[Problema 7] Experimento con diferentes strides")
    print("=" * 60)

    x = np.array([[1, 2, 3, 4, 5, 6]])  # 1 canal, 6 features

    for stride in [1, 2, 3]:
        conv = Conv1d(in_channels=1, out_channels=1, filter_size=3, stride=stride)
        conv.w = np.ones((1, 1, 3))
        conv.b = np.zeros(1)

        a = conv.forward(x)
        expected_size = calculate_output_size(6, 3, 0, stride)

        print(f"Stride {stride}: entrada {x.shape[1]} -> salida {a.shape[1]} (esperado: {expected_size})")
        assert a.shape[1] == expected_size, f"Error en stride {stride}"

    print("✅ Prueba de stride pasó correctamente!")

print("✅ [Problema 7] Cualquier número de pasos implementado")

# ============================================================================
# [Problema 8] CNN completa para MNIST
# ============================================================================

class ReLU:
    """Función de activación ReLU CORREGIDA"""
    def forward(self, x):
        self.x = x.copy()
        self.input_shape = x.shape
        return np.maximum(0, x)

    def backward(self, delta_a):
        # Manejo inteligente de broadcasting
        if hasattr(self, 'input_shape'):
            if delta_a.shape != self.x.shape:
                mask = (self.x > 0).astype(float)
                return delta_a * mask
            else:
                return delta_a * (self.x > 0)
        else:
            return delta_a * (delta_a > 0)

class FullyConnected:
    """Capa completamente conectada"""
    def __init__(self, input_size, output_size, learning_rate=0.01):
        self.input_size = input_size
        self.output_size = output_size
        self.learning_rate = learning_rate

        # Inicialización Xavier
        std = np.sqrt(2.0 / (input_size + output_size))
        self.w = np.random.normal(0, std, (input_size, output_size))
        self.b = np.zeros(output_size)

        self.dw = np.zeros_like(self.w)
        self.db = np.zeros_like(self.b)

    def forward(self, x):
        self.x = x
        return x @ self.w + self.b

    def backward(self, delta_a):
        self.dw = self.x.T @ delta_a
        self.db = np.sum(delta_a, axis=0)
        return delta_a @ self.w.T

    def update_weights(self):
        self.w -= self.learning_rate * self.dw
        self.b -= self.learning_rate * self.db

class Softmax:
    """Función softmax"""
    def forward(self, x):
        exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
        self.a = exp_x / np.sum(exp_x, axis=1, keepdims=True)
        return self.a

    def backward(self, y_true):
        return self.a - y_true

class Scratch1dCNNClassifier:
    """Clasificador CNN 1D desde cero CORREGIDO"""

    def __init__(self, conv_filters=[16, 32], conv_filter_sizes=[7, 5],
                 fc_sizes=[64], n_classes=10, learning_rate=0.01):
        self.conv_filters = conv_filters
        self.conv_filter_sizes = conv_filter_sizes
        self.fc_sizes = fc_sizes
        self.n_classes = n_classes
        self.learning_rate = learning_rate

        self.layers = []
        self.fc_layers_objects = []
        self.build_model()

        # Historial de entrenamiento
        self.train_losses = []
        self.val_losses = []
        self.train_accuracies = []
        self.val_accuracies = []

    def build_model(self):
        """Construye la arquitectura del modelo"""
        # Capas convolucionales
        in_channels = 1

        for i, (filters, filter_size) in enumerate(zip(self.conv_filters, self.conv_filter_sizes)):
            conv_layer = Conv1d(
                in_channels=in_channels,
                out_channels=filters,
                filter_size=filter_size,
                padding=filter_size//2,
                learning_rate=self.learning_rate
            )
            relu_layer = ReLU()

            self.layers.extend([conv_layer, relu_layer])
            in_channels = filters

        self.fc_initialized = False
        self.softmax = Softmax()

    def _initialize_fc_layers(self, input_size):
        """Inicializa capas FC dinámicamente"""
        if self.fc_initialized:
            return

        prev_size = input_size

        for i, fc_size in enumerate(self.fc_sizes):
            fc_layer = FullyConnected(prev_size, fc_size, self.learning_rate)
            relu_layer = ReLU()

            self.fc_layers_objects.extend([fc_layer, relu_layer])
            prev_size = fc_size

        # Capa de salida
        output_layer = FullyConnected(prev_size, self.n_classes, self.learning_rate)
        self.fc_layers_objects.append(output_layer)

        self.fc_initialized = True

    def forward(self, x):
        """Propagación hacia adelante"""
        if len(x.shape) == 2:
            x = x.reshape(x.shape[0], 1, -1)

        current_output = x

        # Capas convolucionales
        for layer in self.layers:
            current_output = layer.forward(current_output)

        # Aplanar
        batch_size = current_output.shape[0]
        flat_size = current_output.shape[1] * current_output.shape[2]

        if not self.fc_initialized:
            self._initialize_fc_layers(flat_size)

        current_output = current_output.reshape(batch_size, -1)

        # Capas FC
        for layer in self.fc_layers_objects:
            current_output = layer.forward(current_output)

        # Softmax
        current_output = self.softmax.forward(current_output)

        return current_output

    def backward(self, y_true):
        """Retropropagación"""
        delta = self.softmax.backward(y_true)

        # FC layers
        for layer in reversed(self.fc_layers_objects):
            delta = layer.backward(delta)

        # Reshape para conv
        batch_size = delta.shape[0]
        dummy_input = np.ones((1, 1, 784))
        dummy_output = dummy_input
        for layer in self.layers:
            dummy_output = layer.forward(dummy_output)

        target_shape = (batch_size, dummy_output.shape[1], dummy_output.shape[2])
        delta = delta.reshape(target_shape)

        # Conv layers
        for layer in reversed(self.layers):
            delta = layer.backward(delta)

    def update_weights(self):
        """Actualiza todos los pesos"""
        for layer in self.layers:
            if hasattr(layer, 'update_weights'):
                layer.update_weights()

        for layer in self.fc_layers_objects:
            if hasattr(layer, 'update_weights'):
                layer.update_weights()

    def cross_entropy_loss(self, y_true, y_pred):
        """Pérdida de entropía cruzada"""
        batch_size = y_true.shape[0]
        y_pred = np.clip(y_pred, 1e-7, 1 - 1e-7)
        return -np.sum(y_true * np.log(y_pred)) / batch_size

    def accuracy(self, y_true, y_pred):
        """Calcula precisión"""
        y_true_labels = np.argmax(y_true, axis=1)
        y_pred_labels = np.argmax(y_pred, axis=1)
        return np.mean(y_true_labels == y_pred_labels)

    def fit(self, X_train, y_train, X_val=None, y_val=None, epochs=20, batch_size=16, verbose=True):
        """Entrena el modelo"""
        n_samples = X_train.shape[0]
        n_batches = max(1, n_samples // batch_size)

        print(f"🚀 Iniciando entrenamiento...")
        print(f"📊 {n_samples} muestras, {n_batches} batches por época")

        for epoch in range(epochs):
            epoch_loss = 0
            epoch_acc = 0

            # Mezclar datos
            indices = np.random.permutation(n_samples)
            X_shuffled = X_train[indices]
            y_shuffled = y_train[indices]

            # Mini-batches
            for i in range(n_batches):
                start_idx = i * batch_size
                end_idx = min((i + 1) * batch_size, n_samples)

                X_batch = X_shuffled[start_idx:end_idx]
                y_batch = y_shuffled[start_idx:end_idx]

                try:
                    y_pred = self.forward(X_batch)
                    loss = self.cross_entropy_loss(y_batch, y_pred)
                    acc = self.accuracy(y_batch, y_pred)

                    epoch_loss += loss
                    epoch_acc += acc

                    self.backward(y_batch)
                    self.update_weights()

                except Exception as e:
                    print(f"⚠️ Error en batch {i}: {e}")
                    continue

            if n_batches > 0:
                epoch_loss /= n_batches
                epoch_acc /= n_batches

            self.train_losses.append(epoch_loss)
            self.train_accuracies.append(epoch_acc)

            # Validación
            if X_val is not None and y_val is not None:
                try:
                    val_pred = self.predict_proba(X_val)
                    val_loss = self.cross_entropy_loss(y_val, val_pred)
                    val_acc = self.accuracy(y_val, val_pred)

                    self.val_losses.append(val_loss)
                    self.val_accuracies.append(val_acc)

                    if verbose and (epoch + 1) % 5 == 0:
                        print(f'Epoch {epoch+1:2d}/{epochs} - '
                              f'Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f} - '
                              f'Val Loss: {val_loss:.4f} Val Acc: {val_acc:.4f}')
                except Exception as e:
                    if verbose and (epoch + 1) % 5 == 0:
                        print(f'Epoch {epoch+1:2d}/{epochs} - '
                              f'Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

    def predict_proba(self, X):
        """Predice probabilidades"""
        return self.forward(X)

    def predict(self, X):
        """Hace predicciones"""
        proba = self.predict_proba(X)
        return np.argmax(proba, axis=1)

    def plot_learning_curves(self):
        """Grafica curvas de aprendizaje"""
        if len(self.train_losses) == 0:
            print("⚠️ No hay datos para graficar")
            return

        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))

        epochs = range(1, len(self.train_losses) + 1)

        # Pérdida
        ax1.plot(epochs, self.train_losses, 'b-', label='Train Loss', linewidth=2)
        if self.val_losses:
            ax1.plot(epochs, self.val_losses, 'r-', label='Validation Loss', linewidth=2)
        ax1.set_title('Pérdida')
        ax1.set_xlabel('Época')
        ax1.set_ylabel('Cross Entropy Loss')
        ax1.legend()
        ax1.grid(True, alpha=0.3)

        # Precisión
        ax2.plot(epochs, self.train_accuracies, 'b-', label='Train Accuracy', linewidth=2)
        if self.val_accuracies:
            ax2.plot(epochs, self.val_accuracies, 'r-', label='Validation Accuracy', linewidth=2)
        ax2.set_title('Precisión')
        ax2.set_xlabel('Época')
        ax2.set_ylabel('Accuracy')
        ax2.legend()
        ax2.grid(True, alpha=0.3)

        plt.tight_layout()
        plt.show()

def train_cnn_on_mnist():
    """Entrena CNN 1D en MNIST - VERSIÓN FINAL"""
    print("\n" + "=" * 80)
    print("[Problema 8] ENTRENAMIENTO CNN 1D EN MNIST")
    print("=" * 80)

    try:
        # Cargar datos
        print("📥 Cargando datos MNIST...")
        (X_train, y_train), (X_test, y_test) = mnist.load_data()

        # Preprocesar
        print("🔄 Preprocesando datos...")
        X_train_flat = X_train.reshape(X_train.shape[0], -1).astype(np.float32) / 255.0
        X_test_flat = X_test.reshape(X_test.shape[0], -1).astype(np.float32) / 255.0

        # One-hot encoding
        enc = OneHotEncoder(sparse_output=False)
        y_train_onehot = enc.fit_transform(y_train.reshape(-1, 1))
        y_test_onehot = enc.transform(y_test.reshape(-1, 1))

        # División
        X_train_split, X_val, y_train_split, y_val = train_test_split(
            X_train_flat, y_train_onehot, test_size=0.2, random_state=42, stratify=y_train
        )

        print(f"📊 Datos completos:")
        print(f"   • Entrenamiento: {X_train_split.shape}")
        print(f"   • Validación: {X_val.shape}")

        # Muestra para demostración
        n_train = 2000
        n_val = 400

        X_train_small = X_train_split[:n_train]
        y_train_small = y_train_split[:n_train]
        X_val_small = X_val[:n_val]
        y_val_small = y_val[:n_val]

        print(f"🔬 Muestra de demostración:")
        print(f"   • Entrenamiento: {X_train_small.shape}")
        print(f"   • Validación: {X_val_small.shape}")

        # Crear modelo
        print("\n🏗️ Creando modelo CNN 1D...")
        model = Scratch1dCNNClassifier(
            conv_filters=[8, 16],
            conv_filter_sizes=[5, 3],
            fc_sizes=[32],
            n_classes=10,
            learning_rate=0.01
        )

        # Entrenar
        print("\n🚀 Iniciando entrenamiento...")
        model.fit(
            X_train_small, y_train_small,
            X_val_small, y_val_small,
            epochs=15,
            batch_size=32,
            verbose=True
        )

        # Evaluar
        print("\n📊 Evaluando modelo...")
        val_pred = model.predict(X_val_small)
        val_true = np.argmax(y_val_small, axis=1)
        val_accuracy = np.mean(val_pred == val_true)

        print(f"🎯 Precisión final: {val_accuracy:.4f}")

        # Visualizaciones
        model.plot_learning_curves()

        print("\n📋 Reporte de clasificación:")
        print(classification_report(val_true, val_pred))

        # Matriz de confusión
        cm = confusion_matrix(val_true, val_pred)
        plt.figure(figsize=(10, 8))
        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                    xticklabels=range(10), yticklabels=range(10))
        plt.title('Matriz de Confusión - CNN 1D')
        plt.ylabel('Etiqueta Verdadera')
        plt.xlabel('Etiqueta Predicha')
        plt.show()

        print(f"\n🎉 ¡ENTRENAMIENTO COMPLETADO!")
        print(f"🏆 Precisión final: {val_accuracy:.1%}")

        return model

    except Exception as e:
        print(f"❌ Error: {e}")
        import traceback
        traceback.print_exc()
        return None

print("✅ [Problema 8] Clasificador CNN 1D completo implementado")

# ============================================================================
# FUNCIÓN PRINCIPAL - EJECUTA TODOS LOS PROBLEMAS 1-8
# ============================================================================

def main():
    """Función principal que ejecuta todos los problemas del 1 al 8"""
    print("\n🚀 EJECUTANDO TODOS LOS PROBLEMAS 1-8")
    print("=" * 80)

    # Problema 3: Experimento con matrices pequeñas
    test_simple_conv1d()

    # Problema 4: Múltiples canales
    test_conv1d_multiple_channels()

    # Problema 5: Padding
    test_padding()

    # Problema 6: Minilotes
    test_batch_processing()

    # Problema 7: Stride
    test_stride()

    print("\n" + "=" * 80)
    print("🎯 TODOS LOS EXPERIMENTOS BÁSICOS (1-7) COMPLETADOS")
    print("=" * 80)

    # Problema 8: CNN completa en MNIST
    model = train_cnn_on_mnist()

    if model:
        print(f"\n✅ ÉXITO TOTAL: TODOS LOS PROBLEMAS 1-8 COMPLETADOS")
    else:
        print(f"\n⚠️ Problemas 1-7 completados, error en Problema 8")

    return model

# EJECUTAR TODO EL PIPELINE COMPLETO
if __name__ == "__main__":
    model = main()

🚀 CNN 1D DESDE CERO - IMPLEMENTACIÓN COMPLETA
✅ [Problema 1] Clase SimpleConv1d implementada
✅ [Problema 2] Función calculate_output_size implementada
✅ [Problema 3] Función de prueba test_simple_conv1d implementada
✅ [Problema 4] Clase Conv1d con múltiples canales implementada
✅ [Problema 5] Implementación de padding completada
✅ [Problema 6] Manejo de minilotes implementado
✅ [Problema 7] Cualquier número de pasos implementado
✅ [Problema 8] Clasificador CNN 1D completo implementado

🚀 EJECUTANDO TODOS LOS PROBLEMAS 1-8

[Problema 3] Experimento con matrices pequeñas
Entrada x: [1 2 3 4]
Pesos w: [3 5 7]
Sesgo b: [1]
Salida esperada: [35, 50]
Salida obtenida: [35. 50.]

Gradiente entrada delta_a: [10 20]
Gradiente sesgo esperado: [30]
Gradiente sesgo obtenido: [30]
Gradiente pesos esperado: [50, 80, 110]
Gradiente pesos obtenido: [ 50  80 110]
Gradiente entrada esperado: [30, 110, 170, 140]
Gradiente entrada obtenido: [ 30 110 170 140]

✅ Todas las pruebas pasaron correctamente!

[Pr