# Perceptron multicapa para hacer regresión con 2 capas ocultas

In [2]:
#librerias
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

funciones

## Clasificación

In [14]:
#funcion sigmoide
def sigmoid(z):
    return 1. / (1. + np.exp(-z))

#funcion onehot
def int_to_onehot(y, num_labels):

    ary = np.zeros((y.shape[0], num_labels))
    for i, val in enumerate(y):
        ary[i, val] = 1

    return ary
#Funcion de costo
def mse_loss(targets, probas, num_labels=10):
    onehot_targets = int_to_onehot(targets, num_labels=num_labels)
    return np.mean((onehot_targets - probas)**2)

# Funcion de presición
def accuracy(targets, predicted_labels):
    return np.mean(predicted_labels == targets)
# función para los minilotes
def minibatch_generator(X, y, minibatch_size=100):
    indices = np.arange(X.shape[0])
    np.random.shuffle(indices)

    for start_idx in range(0, indices.shape[0] - minibatch_size
                           + 1, minibatch_size):
        batch_idx = indices[start_idx:start_idx + minibatch_size]

        yield X[batch_idx], y[batch_idx]
# Modelo 2 capas ocultas
class NeuralNetMLP:

    def __init__(self, num_features, num_hidden_1, num_hidden_2, num_classes, random_seed=123):
        super().__init__()

        self.num_classes = num_classes

        # Inicialización
        rng = np.random.RandomState(random_seed)

        # Primera capa oculta
        self.weight_h1 = rng.normal(
            loc=0.0, scale=0.1, size=(num_hidden_1, num_features))
        self.bias_h1 = np.zeros(num_hidden_1)

        # Segunda capa oculta
        self.weight_h2 = rng.normal(
            loc=0.0, scale=0.1, size=(num_hidden_2, num_hidden_1))
        self.bias_h2 = np.zeros(num_hidden_2)

        # capa de salida
        self.weight_out = rng.normal(
            loc=0.0, scale=0.1, size=(num_classes, num_hidden_2))
        self.bias_out = np.zeros(num_classes)

    def forward(self, x):
        # Primera capa oculta
        z_h1 = np.dot(x, self.weight_h1.T) + self.bias_h1
        a_h1 = sigmoid(z_h1)

        # Segunda capa oculta
        z_h2 = np.dot(a_h1, self.weight_h2.T) + self.bias_h2
        a_h2 = sigmoid(z_h2)

        # Capa de salida
        z_out = np.dot(a_h2, self.weight_out.T) + self.bias_out
        a_out = sigmoid(z_out)

        return a_h1, a_h2, a_out

    def backward(self, x, a_h1, a_h2, a_out, y, lambda_):
        y_onehot = int_to_onehot(y, self.num_classes)

        # Capa de salida
        d_loss__d_a_out = 2. * (a_out - y_onehot) / y.shape[0]
        d_a_out__d_z_out = a_out * (1. - a_out)
        delta_out = d_loss__d_a_out * d_a_out__d_z_out

        d_loss__dw_out = np.dot(delta_out.T, a_h2)
        d_loss__db_out = np.sum(delta_out, axis=0)
        d_loss__dw_out += lambda_ * self.weight_out

        # Segunda capa oculta
        d_z_out__a_h2 = self.weight_out
        d_loss__a_h2 = np.dot(delta_out, d_z_out__a_h2)
        d_a_h2__d_z_h2 = a_h2 * (1. - a_h2)

        d_loss__dw_h2 = np.dot((d_loss__a_h2 * d_a_h2__d_z_h2).T, a_h1)
        d_loss__db_h2 = np.sum((d_loss__a_h2 * d_a_h2__d_z_h2), axis=0)
        d_loss__dw_h2 += lambda_ * self.weight_h2

        # Primera capa oculta
        d_z_h2__a_h1 = self.weight_h2
        d_loss__a_h1 = np.dot((d_loss__a_h2 * d_a_h2__d_z_h2), d_z_h2__a_h1)
        d_a_h1__d_z_h1 = a_h1 * (1. - a_h1)

        d_loss__dw_h1 = np.dot((d_loss__a_h1 * d_a_h1__d_z_h1).T, x)
        d_loss__db_h1 = np.sum((d_loss__a_h1 * d_a_h1__d_z_h1), axis=0)
        d_loss__dw_h1 += lambda_ * self.weight_h1

        return (d_loss__dw_out, d_loss__db_out,
                d_loss__dw_h2, d_loss__db_h2,
                d_loss__dw_h1, d_loss__db_h1)
### compute mse y acc con regularización L2
def compute_mse_and_acc(nnet, X, y, lambda_, minibatch_size=100, num_labels=10):
    mse, correct_pred, num_examples = 0., 0, 0
    minibatch_gen = minibatch_generator(X, y, minibatch_size)

    for i, (features, targets) in enumerate(minibatch_gen):

        # Forward pass
        _, _, probas = nnet.forward(features)  # Ahora devuelve 3 valores (a_h1, a_h2, probas)
        predicted_labels = np.argmax(probas, axis=1)

        # Compute MSE loss
        onehot_targets = int_to_onehot(targets, num_labels=num_labels)
        loss = np.mean((onehot_targets - probas)**2)

        # Add L2 regularization term
        l2_term = (lambda_ / 2.0) * (
            np.sum(nnet.weight_h1**2) +  # Regularización para la primera capa oculta
            np.sum(nnet.weight_h2**2) +  # Regularización para la segunda capa oculta
            np.sum(nnet.weight_out**2)   # Regularización para la capa de salida
        )
        loss += l2_term

        # Update metrics
        correct_pred += (predicted_labels == targets).sum()
        num_examples += targets.shape[0]
        mse += loss

    # Average the MSE over all minibatches
    mse = mse / (i + 1)
    acc = correct_pred / num_examples
    return mse, acc

# Entrenar el modelo
def train(model, X_train, y_train, X_valid, y_valid, num_epochs, lambda_,
          minibatch_size=100, learning_rate=0.1,):

    loss = []
    valid_loss = []
    epoch_train_acc = []
    epoch_valid_acc = []

    for e in range(num_epochs):

        # iterate over minibatches
        minibatch_gen = minibatch_generator(
            X_train, y_train, minibatch_size)

        for X_train_mini, y_train_mini in minibatch_gen:

            #### Compute outputs ####
            a_h1, a_h2, a_out = model.forward(X_train_mini)

            #### Compute gradients ####
            d_loss__d_w_out, d_loss__d_b_out, \
            d_loss__d_w_h2, d_loss__d_b_h2, \
            d_loss__d_w_h1, d_loss__d_b_h1 = \
                model.backward(X_train_mini, a_h1, a_h2, a_out, y_train_mini, lambda_)

            #### Update weights ####
            # First hidden layer
            model.weight_h1 -= learning_rate * d_loss__d_w_h1
            model.bias_h1 -= learning_rate * d_loss__d_b_h1

            # Second hidden layer
            model.weight_h2 -= learning_rate * d_loss__d_w_h2
            model.bias_h2 -= learning_rate * d_loss__d_b_h2

            # Output layer
            model.weight_out -= learning_rate * d_loss__d_w_out
            model.bias_out -= learning_rate * d_loss__d_b_out

        #### Epoch Logging ####
        train_mse, train_acc = compute_mse_and_acc(model, X_train, y_train, lambda_, minibatch_size)
        valid_mse, valid_acc = compute_mse_and_acc(model, X_valid, y_valid, lambda_, minibatch_size)
        train_acc, valid_acc = train_acc * 100, valid_acc * 100
        epoch_train_acc.append(train_acc)
        epoch_valid_acc.append(valid_acc)
        loss.append(train_mse)
        valid_loss.append(valid_mse)
        print(f'Epoch: {e+1:03d}/{num_epochs:03d} '
              f'| Train MSE: {train_mse:.3f} '
              f'| Valid MSE: {valid_mse:.3f} '
              f'| Train Acc: {train_acc:.2f}% '
              f'| Valid Acc: {valid_acc:.2f}%')

    return loss, valid_loss, epoch_train_acc, epoch_valid_acc


## Regresión

In [30]:
# Función activación ReLU para regresión
def relu(z):
    return np.maximum(0, z)

# Derivada de ReLU
def relu_derivative(z):
    return (z > 0).astype(float)

# Función de costo MSE para regresión
def mse_loss_regression(targets, predictions):
    return np.mean((targets - predictions) ** 2)

# Generador de minilotes
def minibatch_generator(X, y, minibatch_size=100):
    indices = np.arange(X.shape[0])
    np.random.shuffle(indices)
    for start_idx in range(0, indices.shape[0] - minibatch_size + 1, minibatch_size):
        batch_idx = indices[start_idx:start_idx + minibatch_size]
        yield X[batch_idx], y[batch_idx]

# Clase del modelo Perceptrón Multicapa para regresión
class NeuralNetMLPRegression:

    def __init__(self, num_features, num_hidden_1, num_hidden_2, random_seed=123):
        super().__init__()

        rng = np.random.RandomState(random_seed)

        # Primera capa oculta
        self.weight_h1 = rng.normal(loc=0.0, scale=0.1, size=(num_hidden_1, num_features))
        self.bias_h1 = np.zeros(num_hidden_1)

        # Segunda capa oculta
        self.weight_h2 = rng.normal(loc=0.0, scale=0.1, size=(num_hidden_2, num_hidden_1))
        self.bias_h2 = np.zeros(num_hidden_2)

        # Capa de salida
        self.weight_out = rng.normal(loc=0.0, scale=0.1, size=(1, num_hidden_2))
        self.bias_out = np.zeros(1)

    def forward(self, x):
        # Primera capa oculta
        z_h1 = np.dot(x, self.weight_h1.T) + self.bias_h1
        a_h1 = relu(z_h1)

        # Segunda capa oculta
        z_h2 = np.dot(a_h1, self.weight_h2.T) + self.bias_h2
        a_h2 = relu(z_h2)

        # Capa de salida
        z_out = np.dot(a_h2, self.weight_out.T) + self.bias_out
        return a_h1, a_h2, z_out

    def backward(self, x, a_h1, a_h2, output, y, lambda_):
        # Gradientes capa de salida
        delta_out = (output - y.reshape(-1, 1))

        d_loss__dw_out = np.dot(delta_out.T, a_h2)
        d_loss__db_out = np.sum(delta_out, axis=0)
        d_loss__dw_out += lambda_ * self.weight_out

        # Gradientes segunda capa oculta
        d_z_out__a_h2 = self.weight_out
        d_loss__a_h2 = np.dot(delta_out, d_z_out__a_h2)
        d_a_h2__d_z_h2 = relu_derivative(a_h2)

        d_loss__dw_h2 = np.dot((d_loss__a_h2 * d_a_h2__d_z_h2).T, a_h1)
        d_loss__db_h2 = np.sum((d_loss__a_h2 * d_a_h2__d_z_h2), axis=0)
        d_loss__dw_h2 += lambda_ * self.weight_h2

        # Gradientes primera capa oculta
        d_z_h2__a_h1 = self.weight_h2
        d_loss__a_h1 = np.dot((d_loss__a_h2 * d_a_h2__d_z_h2), d_z_h2__a_h1)
        d_a_h1__d_z_h1 = relu_derivative(a_h1)

        d_loss__dw_h1 = np.dot((d_loss__a_h1 * d_a_h1__d_z_h1).T, x)
        d_loss__db_h1 = np.sum((d_loss__a_h1 * d_a_h1__d_z_h1), axis=0)
        d_loss__dw_h1 += lambda_ * self.weight_h1

        return (d_loss__dw_out, d_loss__db_out,
                d_loss__dw_h2, d_loss__db_h2,
                d_loss__dw_h1, d_loss__db_h1)
    def predict(self, X):
          _, _, a_out = self.forward(X)
          return a_out  # Salidas continuas directamente como predicciones
# Función para entrenar el modelo
def train_regression(model, X_train, y_train, X_valid, y_valid, num_epochs, lambda_,
                      minibatch_size=100, learning_rate=0.01):

    loss = []
    valid_loss = []

    for e in range(num_epochs):
        minibatch_gen = minibatch_generator(X_train, y_train, minibatch_size)

        for X_train_mini, y_train_mini in minibatch_gen:
            a_h1, a_h2, output = model.forward(X_train_mini)

            d_loss__dw_out, d_loss__db_out, \
            d_loss__dw_h2, d_loss__db_h2, \
            d_loss__dw_h1, d_loss__db_h1 = \
                model.backward(X_train_mini, a_h1, a_h2, output, y_train_mini, lambda_)

            # Actualización de pesos
            model.weight_h1 -= learning_rate * d_loss__dw_h1
            model.bias_h1 -= learning_rate * d_loss__db_h1

            model.weight_h2 -= learning_rate * d_loss__dw_h2
            model.bias_h2 -= learning_rate * d_loss__db_h2

            model.weight_out -= learning_rate * d_loss__dw_out
            model.bias_out -= learning_rate * d_loss__db_out

        # Evaluación en entrenamiento y validación
        _, _, train_output = model.forward(X_train)
        _, _, valid_output = model.forward(X_valid)

        train_mse = mse_loss_regression(y_train, train_output.flatten())
        valid_mse = mse_loss_regression(y_valid, valid_output.flatten())

        loss.append(train_mse)
        valid_loss.append(valid_mse)

        print(f'Epoch: {e + 1:03d}/{num_epochs:03d} '
              f'| Train MSE: {train_mse:.5f} '
              f'| Valid MSE: {valid_mse:.5f}')

    return loss, valid_loss

Conjunto de datos

In [31]:
from sklearn.datasets import fetch_california_housing
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error, r2_score

In [5]:
data = fetch_california_housing()

In [6]:
df = pd.DataFrame(data.data, columns=data.feature_names)
df['MedHouseValue'] = data.target  # Agregar la columna de valores objetivo (precio medio)

# 3. Mostrar las primeras filas del DataFrame
df.head()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,MedHouseValue
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556,37.88,-122.23,4.526
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842,37.86,-122.22,3.585
2,7.2574,52.0,8.288136,1.073446,496.0,2.80226,37.85,-122.24,3.521
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25,3.413
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25,3.422


In [7]:
df.describe()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,MedHouseValue
count,20640.0,20640.0,20640.0,20640.0,20640.0,20640.0,20640.0,20640.0,20640.0
mean,3.870671,28.639486,5.429,1.096675,1425.476744,3.070655,35.631861,-119.569704,2.068558
std,1.899822,12.585558,2.474173,0.473911,1132.462122,10.38605,2.135952,2.003532,1.153956
min,0.4999,1.0,0.846154,0.333333,3.0,0.692308,32.54,-124.35,0.14999
25%,2.5634,18.0,4.440716,1.006079,787.0,2.429741,33.93,-121.8,1.196
50%,3.5348,29.0,5.229129,1.04878,1166.0,2.818116,34.26,-118.49,1.797
75%,4.74325,37.0,6.052381,1.099526,1725.0,3.282261,37.71,-118.01,2.64725
max,15.0001,52.0,141.909091,34.066667,35682.0,1243.333333,41.95,-114.31,5.00001


In [8]:
X, y = data.data, data.target

In [9]:
X,y

(array([[   8.3252    ,   41.        ,    6.98412698, ...,    2.55555556,
           37.88      , -122.23      ],
        [   8.3014    ,   21.        ,    6.23813708, ...,    2.10984183,
           37.86      , -122.22      ],
        [   7.2574    ,   52.        ,    8.28813559, ...,    2.80225989,
           37.85      , -122.24      ],
        ...,
        [   1.7       ,   17.        ,    5.20554273, ...,    2.3256351 ,
           39.43      , -121.22      ],
        [   1.8672    ,   18.        ,    5.32951289, ...,    2.12320917,
           39.43      , -121.32      ],
        [   2.3886    ,   16.        ,    5.25471698, ...,    2.61698113,
           39.37      , -121.24      ]]),
 array([4.526, 3.585, 3.521, ..., 0.923, 0.847, 0.894]))

In [22]:
#separar en entrenamiento, prueba y validacion
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

In [23]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
X_valid = scaler.transform(X_valid)

In [12]:
mlp = MLPRegressor(
    hidden_layer_sizes=(100, 50),  # 2 capas ocultas, con 100 y 50 neuronas
    activation='relu',            # Activación ReLU para las capas ocultas
    max_iter=1000,                # Número de iteraciones más alto para mayor estabilidad
    alpha=0.0001,                 # Regularización L2 (evitar overfitting)
    random_state=42
)
mlp.fit(X_train, y_train)

In [13]:
y_pred = mlp.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error (MSE): {mse:.4f}")
print(f"R² Score: {r2:.4f}")

Mean Squared Error (MSE): 0.2710
R² Score: 0.7932


## Probar modelo propio

In [40]:
# Inicialización del modelo
model = NeuralNetMLPRegression(num_features=X_train.shape[1], num_hidden_1=100, num_hidden_2=50)

In [41]:
# Entrenamiento
train_loss, valid_loss = train_regression(model, X_train, y_train, X_valid, y_valid,
                                               num_epochs=100, lambda_=0.001, learning_rate=0.001)

Epoch: 001/100 | Train MSE: 0.46504 | Valid MSE: 0.50814
Epoch: 002/100 | Train MSE: 0.40701 | Valid MSE: 0.43047
Epoch: 003/100 | Train MSE: 0.37620 | Valid MSE: 0.40482
Epoch: 004/100 | Train MSE: 0.36538 | Valid MSE: 0.40420
Epoch: 005/100 | Train MSE: 0.37411 | Valid MSE: 0.40231
Epoch: 006/100 | Train MSE: 0.35904 | Valid MSE: 0.39869
Epoch: 007/100 | Train MSE: 0.33426 | Valid MSE: 0.36911
Epoch: 008/100 | Train MSE: 0.33469 | Valid MSE: 0.35899
Epoch: 009/100 | Train MSE: 0.33658 | Valid MSE: 0.38009
Epoch: 010/100 | Train MSE: 0.37795 | Valid MSE: 0.42097
Epoch: 011/100 | Train MSE: 0.31524 | Valid MSE: 0.34654
Epoch: 012/100 | Train MSE: 0.33222 | Valid MSE: 0.35799
Epoch: 013/100 | Train MSE: 0.30923 | Valid MSE: 0.33862
Epoch: 014/100 | Train MSE: 0.30240 | Valid MSE: 0.33112
Epoch: 015/100 | Train MSE: 0.31152 | Valid MSE: 0.34336
Epoch: 016/100 | Train MSE: 0.29748 | Valid MSE: 0.32853
Epoch: 017/100 | Train MSE: 0.33012 | Valid MSE: 0.37104
Epoch: 018/100 | Train MSE: 0.3

In [42]:
y_pred = model.predict(X_test)

In [43]:
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error (MSE): {mse:.4f}")
print(f"R² Score: {r2:.4f}")

Mean Squared Error (MSE): 0.2797
R² Score: 0.7866
