# Importaciones y Dependencias



In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.special import erf  # Para calcular la función de error

# Funciones de Activación

In [None]:
def sigmoid(z):
    z = np.clip(z, -500, 500)
    den = (1 + np.exp(-z))
    den[den == 0] = 1
    return 1 / den

def sigmoid_derivative(z):
    return sigmoid(z) * (1 - sigmoid(z))

def softmax(z):
    exp_z = np.exp(z - np.max(z))  # Evita overflow
    if z.ndim == 1:  # Si z es un vector unidimensional
        sum_exp_z = np.sum(exp_z)
        sum_exp_z[sum_exp_z == 0] = 1
        return exp_z / sum_exp_z
    else:  # Si z es una matriz bidimensional
        sum_exp_z = np.sum(exp_z, axis=1, keepdims=True)
        # Asegurar que el denominador no sea cero
        sum_exp_z[sum_exp_z == 0] = 1  # Esto evita la división por cero
        return exp_z / sum_exp_z

def tanh(z):
    return np.tanh(z)

def tanh_derivative(z):
    return 1 - np.tanh(z) ** 2

def relu(z):
    return np.maximum(0, z)

def relu_derivative(z):
    return np.where(z > 0, 1, 0)

def leaky_relu(z, alpha=0.01):
    return np.where(z > 0, z, alpha * z)

def leaky_relu_derivative(z, alpha=0.01):
    return np.where(z > 0, 1, alpha)

def elu(z, alpha=1.0):
    return np.where(z > 0, z, alpha * (np.exp(z) - 1))

def elu_derivative(z, alpha=1.0):
    return np.where(z > 0, 1, alpha * np.exp(z))

def swish(z):
    return z * sigmoid(z)

def swish_derivative(z):
    sig = sigmoid(z)
    return sig + z * sig * (1 - sig)

def gelu(z):
    return 0.5 * z * (1 + erf(z / np.sqrt(2)))

def gelu_derivative(z):
    return 0.5 * (1 + erf(z / np.sqrt(2))) + (z / np.sqrt(2 * np.pi)) * np.exp(-0.5 * z**2)


# Funciones de Optimización

In [None]:
def gradient_descent_update(weights, grads, learning_rate):
    return weights - learning_rate * grads

def adam_update(weights, grads, v, s, t, beta1, beta2, learning_rate, epsilon):
    # Actualiza los momentos de primer y segundo orden
    v = beta1 * v + (1 - beta1) * grads
    s = beta2 * s + (1 - beta2) * (grads ** 2)

    # Corrección de sesgo
    v_hat = v / (1 - beta1 ** t)
    s_hat = s / (1 - beta2 ** t)

    # Actualización de pesos
    weights -= learning_rate * v_hat / (np.sqrt(s_hat) + epsilon)
    return weights, v, s

def rmsprop_update(weights, grads, cache, rho, learning_rate, epsilon):
    # Actualiza el cache de los gradientes
    cache = rho * cache + (1 - rho) * (grads ** 2)

    # Actualización de pesos
    weights -= learning_rate * grads / (np.sqrt(cache) + epsilon)
    return weights, cache

# Carga y Preprocesamiento de Datos

In [None]:
def normalizar_caracteristicas(X):
    media = X.mean(axis=0)
    desviacion = X.std(axis=0)
    desviacion[desviacion == 0] = 1
    return (X - media) / desviacion

def one_hot(y, num_classes):
    return np.eye(num_classes)[y]

def load_csv(file_path):
    df = pd.read_csv(file_path)
    label_to_int = {'Setosa': 0, 'Versicolor': 1, 'Virginica': 2}
    df['variety'] = df['variety'].map(label_to_int)
    X = df.iloc[:, :-1].values
    y = df.iloc[:, -1].values
    return X, y

def train_val_test_split(X, y, train_size=0.7, val_size=0.15, test_size=0.15):
    assert train_size + val_size + test_size == 1, "La suma de los tamaños debe ser 1."
    indices = np.random.permutation(X.shape[0])
    train_set_size = int(X.shape[0] * train_size)
    val_set_size = int(X.shape[0] * val_size)
    train_indices = indices[:train_set_size]
    val_indices = indices[train_set_size:train_set_size + val_set_size]
    test_indices = indices[train_set_size + val_set_size:]
    return X[train_indices], X[val_indices], X[test_indices], y[train_indices], y[val_indices], y[test_indices]


# Definición de la Clase Optimizer

In [None]:
class Optimizer:
    def __init__(self, optimizer_type, learning_rate=0.01, beta1=0.9, beta2=0.999, rho=0.9, epsilon=1e-8):
        self.optimizer_type = optimizer_type
        self.learning_rate = learning_rate
        self.beta1 = beta1
        self.beta2 = beta2
        self.rho = rho
        self.epsilon = epsilon
        self.t = 0
        self.v = {}
        self.s = {}
        self.cache = {}

    def update(self, weights, grads):
        if self.optimizer_type == 'gd':  # Gradient Descent
            return self.gradient_descent(weights, grads)
        elif self.optimizer_type == 'adam':  # Adam optimizer
            return self.adam(weights, grads)
        elif self.optimizer_type == 'rmsprop':  # RMSprop
            return self.rmsprop(weights, grads)

    def gradient_descent(self, weights, grads):
        weights['W1'] -= self.learning_rate * grads['dW1']
        weights['b1'] -= self.learning_rate * grads['db1']
        weights['W2'] -= self.learning_rate * grads['dW2']
        weights['b2'] -= self.learning_rate * grads['db2']
        return weights

    def adam(self, weights, grads):
        self.t += 1
        weights['W1'], self.v['W1'], self.s['W1'] = adam_update(weights['W1'], grads['dW1'], self.v.get('W1', np.zeros_like(grads['dW1'])), self.s.get('W1', np.zeros_like(grads['dW1'])), self.t, self.beta1, self.beta2, self.learning_rate, self.epsilon)
        weights['W2'], self.v['W2'], self.s['W2'] = adam_update(weights['W2'], grads['dW2'], self.v.get('W2', np.zeros_like(grads['dW2'])), self.s.get('W2', np.zeros_like(grads['dW2'])), self.t, self.beta1, self.beta2, self.learning_rate, self.epsilon)
        weights['b1'], self.v['b1'], self.s['b1'] = adam_update(weights['b1'], grads['db1'], self.v.get('b1', np.zeros_like(grads['db1'])), self.s.get('b1', np.zeros_like(grads['db1'])), self.t, self.beta1, self.beta2, self.learning_rate, self.epsilon)
        weights['b2'], self.v['b2'], self.s['b2'] = adam_update(weights['b2'], grads['db2'], self.v.get('b2', np.zeros_like(grads['db2'])), self.s.get('b2', np.zeros_like(grads['db2'])), self.t, self.beta1, self.beta2, self.learning_rate, self.epsilon)
        return weights

    def rmsprop(self, weights, grads):
        weights['W1'], self.cache['W1'] = rmsprop_update(weights['W1'], grads['dW1'], self.cache.get('W1', np.zeros_like(grads['dW1'])), self.rho, self.learning_rate, self.epsilon)
        weights['W2'], self.cache['W2'] = rmsprop_update(weights['W2'], grads['dW2'], self.cache.get('W2', np.zeros_like(grads['dW2'])), self.rho, self.learning_rate, self.epsilon)
        weights['b1'], self.cache['b1'] = rmsprop_update(weights['b1'], grads['db1'], self.cache.get('b1', np.zeros_like(grads['db1'])), self.rho, self.learning_rate, self.epsilon)
        weights['b2'], self.cache['b2'] = rmsprop_update(weights['b2'], grads['db2'], self.cache.get('b2', np.zeros_like(grads['db2'])), self.rho, self.learning_rate, self.epsilon)
        return weights


# Definición de la Clase NeuralNetwork

In [None]:
class NeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size, activation, activation_derivative, optimizer_type):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.activation = activation
        self.activation_derivative = activation_derivative
        self.optimizer = Optimizer(optimizer_type)  # Se crea el optimizador
        self.weights = self._initialize_weights()

        # Inicialización de v y s para optimizadores que lo necesiten
        self.v = {'W1': np.zeros((self.input_size, self.hidden_size)),
                  'b1': np.zeros((1, self.hidden_size)),
                  'W2': np.zeros((self.hidden_size, self.output_size)),
                  'b2': np.zeros((1, self.output_size))}
        self.s = {'W1': np.zeros((self.input_size, self.hidden_size)),
                  'b1': np.zeros((1, self.hidden_size)),
                  'W2': np.zeros((self.hidden_size, self.output_size)),
                  'b2': np.zeros((1, self.output_size))}
        self.cache = {'W1': np.zeros((self.input_size, self.hidden_size)),
                      'b1': np.zeros((1, self.hidden_size)),
                      'W2': np.zeros((self.hidden_size, self.output_size)),
                      'b2': np.zeros((1, self.output_size))}
        self.t = 0  # Contador de pasos de tiempo para Adam

    def _initialize_weights(self):
        W1 = np.random.randn(self.input_size, self.hidden_size) * 0.01
        b1 = np.zeros((1, self.hidden_size))
        W2 = np.random.randn(self.hidden_size, self.output_size) * 0.01
        b2 = np.zeros((1, self.output_size))
        return {'W1': W1, 'b1': b1, 'W2': W2, 'b2': b2}

    def forward_propagation(self, X):
        W1, b1, W2, b2 = self.weights.values()
        Z1 = np.dot(X, W1) + b1
        A1 = self.activation(Z1)
        Z2 = np.dot(A1, W2) + b2
        A2 = softmax(Z2)
        return A2, (Z1, A1, Z2, A2)

    def backward_propagation(self, X, y, cache):
        W1, b1, W2, b2 = self.weights.values()
        Z1, A1, Z2, A2 = cache
        m = X.shape[0]

        dZ2 = A2 - y
        dW2 = np.dot(A1.T, dZ2) / m
        db2 = np.sum(dZ2, axis=0, keepdims=True) / m

        dA1 = np.dot(dZ2, W2.T)
        dZ1 = dA1 * self.activation_derivative(A1)
        dW1 = np.dot(X.T, dZ1) / m
        db1 = np.sum(dZ1, axis=0, keepdims=True) / m

        return {'dW1': dW1, 'db1': db1, 'dW2': dW2, 'db2': db2}

    def update_weights(self, grads, learning_rate):
        """ Actualiza los pesos utilizando el optimizador especificado. """
        self.weights = self.optimizer.update(self.weights, grads)

    def predict(self, X):
        y_pred, _ = self.forward_propagation(X)
        return np.argmax(y_pred, axis=1)

    def train(self, X, y, X_val, y_val, epochs, learning_rate, patience):
        y_one_hot = one_hot(y, self.output_size)
        y_val_one_hot = one_hot(y_val, self.output_size)

        loss_history = []
        accuracy_history = []
        val_loss_history = []
        val_accuracy_history = []

        best_val_loss = float('inf')  # Pérdida mínima encontrada
        epochs_without_improvement = 0

        for epoch in range(epochs):
            # Forward propagation
            y_pred, cache = self.forward_propagation(X)

            # Calcula la pérdida y precisión en entrenamiento
            loss = -np.sum(y_one_hot * np.log(y_pred + 1e-8)) / X.shape[0]
            loss_history.append(loss)

            y_pred_labels = np.argmax(y_pred, axis=1)
            accuracy = calculate_accuracy(y, y_pred_labels)
            accuracy_history.append(accuracy)

            # Forward propagation en validación
            y_val_pred, _ = self.forward_propagation(X_val)
            val_loss = -np.sum(y_val_one_hot * np.log(y_val_pred + 1e-8)) / X_val.shape[0]
            val_loss_history.append(val_loss)

            y_val_pred_labels = np.argmax(y_val_pred, axis=1)
            val_accuracy = calculate_accuracy(y_val, y_val_pred_labels)
            val_accuracy_history.append(val_accuracy)

            # Llamar a early stopping
            stop, best_val_loss, epochs_without_improvement = early_stopping(
                val_loss, best_val_loss, patience, epochs_without_improvement
            )
            if stop:
                print(f"Parada temprana en la época {epoch + 1}. Mejor pérdida de validación: {best_val_loss:.4f}")
                break

            # Backward propagation
            grads = self.backward_propagation(X, y_one_hot, cache)

            # Actualizar pesos utilizando el optimizador
            self.update_weights(grads, learning_rate)

            # Mostrar progreso cada 100 épocas
            if epoch % 100 == 0:
                print(f"Época {epoch}, Pérdida: {loss:.4f}, Precisión: {accuracy:.2f}%, "
                      f"Pérdida Validación: {val_loss:.4f}, Precisión Validación: {val_accuracy:.2f}%")

        return loss_history, accuracy_history, val_loss_history, val_accuracy_history


# Función de Precisión y Early Stopping

In [None]:
def calculate_accuracy(y_true, y_pred):
    correct = np.sum(y_true == y_pred)
    total = y_true.shape[0]
    accuracy = correct / total
    return accuracy * 100

def early_stopping(val_loss, best_val_loss, patience, epochs_without_improvement):
    if val_loss < best_val_loss:
        return False, val_loss, 0  # Mejora encontrada, reinicia el contador
    else:
        epochs_without_improvement += 1
        if epochs_without_improvement >= patience:
            return True, best_val_loss, epochs_without_improvement  # Detener
        return False, best_val_loss, epochs_without_improvement

# Entrenamiento y Pruebas

In [None]:
# Definición de funciones de activación
activation_functions = {
    'sigmoid': (sigmoid, sigmoid_derivative),
    'tanh': (tanh, tanh_derivative),
    'relu': (relu, relu_derivative),
    'leaky_relu': (leaky_relu, leaky_relu_derivative),
    'elu': (elu, elu_derivative),
    'swish': (swish, swish_derivative),
    'gelu': (gelu, gelu_derivative)
}

# Definir optimizadores, tasas de aprendizaje y tamaños de la capa oculta
optimizers = ['gd', 'adam', 'rmsprop']
learning_rates = [0.001, 0.01, 0.1]
hidden_sizes = [50, 100]

# Cargar y normalizar los datos
X, y = load_csv("iris.csv")
X = normalizar_caracteristicas(X)
X_train, X_val, X_test, y_train, y_val, y_test = train_val_test_split(X, y, train_size=0.8, val_size=0.1, test_size=0.1)

# Recorrer todas las combinaciones de activación, optimizador, tasa de aprendizaje y tamaño de la capa oculta
for i, (activation_name, (activation, activation_derivative)) in enumerate(activation_functions.items()):
    for j, optimizer in enumerate(optimizers):
        for k, learning_rate in enumerate(learning_rates):
            for l, hidden_size in enumerate(hidden_sizes):
                print(f"\nEntrenando con optimizador: {optimizer}, función de activación: {activation_name}, "
                      f"tasa de aprendizaje: {learning_rate}, tamaño de capa oculta: {hidden_size}")

                # Inicializar la red neuronal
                nn = NeuralNetwork(input_size=4, hidden_size=hidden_size, output_size=3, activation=activation, activation_derivative=activation_derivative, optimizer_type=optimizer)

                # Entrenar la red neuronal
                loss_history, accuracy_history, val_loss_history, val_accuracy_history = nn.train(
                    X_train, y_train, X_val, y_val, epochs=10000, learning_rate=learning_rate, patience=1000
                )

                # Crear una figura con dos subgráficos: uno para la pérdida y otro para la precisión
                fig, ax = plt.subplots(1, 2, figsize=(20, 5))

                # Graficar la pérdida
                ax[0].plot(loss_history, label='Pérdida', color='red')
                ax[0].set_title(f"Pérdida durante el Entrenamiento\n({optimizer}, {activation_name})\n"
                                f"lr={learning_rate}, hidden_size={hidden_size}")
                ax[0].set_xlabel("Épocas")
                ax[0].set_ylabel("Pérdida")
                ax[0].legend()

                # Graficar la precisión
                ax[1].plot(accuracy_history, label='Precisión', color='blue')
                ax[1].set_title(f"Precisión durante el Entrenamiento\n({optimizer}, {activation_name})\n"
                                f"lr={learning_rate}, hidden_size={hidden_size}")
                ax[1].set_xlabel("Épocas")
                ax[1].set_ylabel("Precisión (%)")
                ax[1].legend()

                # Ajustar el espacio entre subgráficos
                plt.tight_layout()

                # Mostrar la figura con ambos gráficos
                plt.show()

                # Precisión en el conjunto de entrenamiento
                y_pred_train = nn.predict(X_train)
                train_accuracy = calculate_accuracy(y_train, y_pred_train)
                print(f"Precisión en el conjunto de entrenamiento con {optimizer}, {activation_name}, "
                      f"tasa de aprendizaje: {learning_rate}, tamaño de capa oculta: {hidden_size}: {train_accuracy:.2f}%")

# Ajustar el espacio entre los subgráficos y mostrar las figuras
plt.tight_layout()
plt.show()


Output hidden; open in https://colab.research.google.com to view.