# Predicción de Contaminación MP10 con Perceptrón Mejorado

Este notebook implementa un perceptrón simple para regresión con técnicas avanzadas como:
- Batch Normalization
- Gradient Clipping
- Weight Decay
- Warm Restarts (cosine annealing LR)

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import math

In [2]:
# Cargar datos
df = pd.read_csv("AllData1.csv")
X = df.drop(columns=["mp10_val"]).values
y = df["mp10_val"].values.reshape(-1, 1)

# Escalado
scaler_X = StandardScaler()
scaler_y = StandardScaler()

X_scaled = scaler_X.fit_transform(X)
y_scaled = scaler_y.fit_transform(y)

# División
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, test_size=0.2, random_state=42)

## Perceptron simple

In [7]:
# Modelo Perceptrón con mejoras
class PerceptronRegresionAvanzado:
  def __init__(self, input_size, lr=0.01, lambda_wd=1e-4, clip_value=1.0, eta_min=0.001, eta_max=0.1, T=50):
    self.W = np.random.randn(input_size, 1) * np.sqrt(2. / input_size)
    self.b = np.zeros((1,))
    self.lr = lr
    self.lambda_wd = lambda_wd
    self.clip_value = clip_value
    self.eta_min = eta_min
    self.eta_max = eta_max
    self.T = T
    self.gamma = np.ones((1,))
    self.beta = np.zeros((1,))

  def batch_norm(self, z):
    mu = np.mean(z, axis=0)
    sigma = np.std(z, axis=0)
    z_norm = (z - mu) / (sigma + 1e-8)
    return self.gamma * z_norm + self.beta

  def forward(self, X):
    z = np.dot(X, self.W) + self.b
    z_bn = self.batch_norm(z)
    return z_bn

  def backward(self, X, y_true, y_pred, t):
    m = X.shape[0]
    grad = (y_pred - y_true) / m

    dW = np.dot(X.T, grad)
    db = np.sum(grad)

    # Weight decay
    dW += self.lambda_wd * self.W

    # Clipping
    dW = np.clip(dW, -self.clip_value, self.clip_value)
    db = np.clip(db, -self.clip_value, self.clip_value)

    # Cosine annealing LR
    lr_t = self.eta_min + 0.5 * (self.eta_max - self.eta_min) * (1 + math.cos(math.pi * t / self.T))

    # Actualizar
    self.W -= lr_t * dW
    self.b -= lr_t * db

  def train(self, X, y, epochs=100):
    for epoch in range(epochs):
      y_pred = self.forward(X)
      loss = np.mean((y_pred - y) ** 2)
      self.backward(X, y, y_pred, epoch % self.T)
      if epoch % 10 == 0:
        print(f"Época {epoch}: MSE={loss:.4f}")

  def predict(self, X):
    return self.forward(X)

In [8]:
# Entrenamiento del modelo
modelo = PerceptronRegresionAvanzado(input_size=X.shape[1])
modelo.train(X_train, y_train, epochs=100)

Época 0: MSE=2.4471
Época 10: MSE=1.5986
Época 20: MSE=1.3326
Época 30: MSE=1.2981
Época 40: MSE=1.3027
Época 50: MSE=1.3055
Época 60: MSE=1.6429
Época 70: MSE=1.5526
Época 80: MSE=1.5035
Época 90: MSE=1.3802


In [10]:
# Evaluación final
y_pred_test = modelo.predict(X_test)
mse_test = np.mean((y_pred_test - y_test) ** 2)
mse_real = np.mean((scaler_y.inverse_transform(y_pred_test) - scaler_y.inverse_transform(y_test)) ** 2)
print(f"MSE (escalado): {mse_test:.4f}")
print(f"MSE real (μg/m³): {mse_real:.4f}")

MSE (escalado): 1.7072
MSE real (μg/m³): 114.4546


## Perceptron Multicapa

In [11]:
# MLP con 2 capas ocultas y mejoras avanzadas
def relu(x): return np.maximum(0, x)
def relu_derivada(x): return (x > 0).astype(float)

class MLP_Regresion_Mejorado:
  def __init__(self, capas, lr=0.01, lambda_wd=1e-4, clip_value=1.0, eta_min=0.001, eta_max=0.1, T=50):
    self.capas = capas
    self.lr = lr
    self.lambda_wd = lambda_wd
    self.clip_value = clip_value
    self.eta_min = eta_min
    self.eta_max = eta_max
    self.T = T

    self.W = []
    self.b = []
    self.gamma = []
    self.beta = []

    for i in range(len(capas) - 1):
      self.W.append(np.random.randn(capas[i], capas[i+1]) * np.sqrt(2. / capas[i]))
      self.b.append(np.zeros((1, capas[i+1])))
      self.gamma.append(np.ones((1, capas[i+1])))
      self.beta.append(np.zeros((1, capas[i+1])))

  def batch_norm(self, z, gamma, beta):
    mu = np.mean(z, axis=0)
    sigma = np.std(z, axis=0)
    z_norm = (z - mu) / (sigma + 1e-8)
    return gamma * z_norm + beta

  def forward(self, X):
    self.a = [X]
    self.z = []
    for i in range(len(self.W) - 1):
      z_i = np.dot(self.a[-1], self.W[i]) + self.b[i]
      z_bn = self.batch_norm(z_i, self.gamma[i], self.beta[i])
      a_i = relu(z_bn)
      self.z.append(z_bn)
      self.a.append(a_i)
    z_final = np.dot(self.a[-1], self.W[-1]) + self.b[-1]
    self.z.append(z_final)
    self.a.append(z_final)
    return z_final

  def backward(self, y_true, t):
    m = y_true.shape[0]
    grad = (self.a[-1] - y_true) / m

    for i in reversed(range(len(self.W))):
      dW = np.dot(self.a[i].T, grad)
      db = np.sum(grad, axis=0, keepdims=True)

      dW += self.lambda_wd * self.W[i]
      dW = np.clip(dW, -self.clip_value, self.clip_value)
      db = np.clip(db, -self.clip_value, self.clip_value)

      lr_t = self.eta_min + 0.5 * (self.eta_max - self.eta_min) * (1 + math.cos(math.pi * t / self.T))
      self.W[i] -= lr_t * dW
      self.b[i] -= lr_t * db

      if i != 0:
        grad = np.dot(grad, self.W[i].T) * relu_derivada(self.z[i-1])

  def train(self, X, y, epochs=100):
    for epoch in range(epochs):
      y_pred = self.forward(X)
      loss = np.mean((y_pred - y) ** 2)
      self.backward(y, epoch % self.T)
      if epoch % 10 == 0:
        print(f"Época {epoch}: MSE={loss:.4f}")

  def predict(self, X):
    return self.forward(X)

In [12]:
# Entrenar el modelo
mlp = MLP_Regresion_Mejorado(capas=[X.shape[1], 32, 16, 1])
mlp.train(X_train, y_train, epochs=100)

Época 0: MSE=1.8933
Época 10: MSE=0.8015
Época 20: MSE=0.6687
Época 30: MSE=0.6198
Época 40: MSE=0.6022
Época 50: MSE=0.5988
Época 60: MSE=0.5379
Época 70: MSE=0.4979
Época 80: MSE=0.4750
Época 90: MSE=0.4656


In [14]:
# Evaluación
y_pred_test = mlp.predict(X_test)
mse_test = np.mean((y_pred_test - y_test) ** 2)
mse_real = np.mean((scaler_y.inverse_transform(y_pred_test) - scaler_y.inverse_transform(y_test)) ** 2)
print(f" MSE (escalado): {mse_test:.4f}")
print(f" MSE real (μg/m³): {mse_real:.4f}")

 MSE (escalado): 1.0516
 MSE real (μg/m³): 70.5021
