In [1]:
import numpy as np
import scipy.io as sp
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import RobustScaler
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
import math


In [2]:
#modificacion 

# Normalización y división de datos
def normalize_and_split_data(inputs, targets):
    scaler_X = RobustScaler()
    scaler_y = RobustScaler()
    inputs = scaler_X.fit_transform(inputs)
    targets = scaler_y.fit_transform(targets)
    inputs_train, inputs_test, targets_train, targets_test = train_test_split(inputs, targets, random_state=1, test_size=0.4)
    return inputs_train, inputs_test, targets_train, targets_test, scaler_X, scaler_y



In [3]:

def model(A, THETA):
    """
    Multivariate Linear Regression Model, Yh = A*THETA
    The matrix A is sometimes called the design matrix.
    """
    return A.dot(THETA)

In [4]:
# Design Matrix
def designMatrix(Tau,X):
    q,_ = X.shape
    for p in range(q):
        M = powerVector(Tau,X[p,:])
        if p == 0:
            A = M
            continue
        A = np.vstack((A,M))
    return A

# Power Vector M
# def powerVector(Tau, V):
#     if V.size == 0 or Tau == 0:
#         return np.array([1.0])
#     Z = V[:-1]
#     W = V[-1]
#     terms = []
#     for k in range(Tau + 1):
#         sub_terms = powerVector(Tau - k, Z)
#         for term in sub_terms:
#             terms.append(term * (W ** k))
#     return np.array(terms)

def powerVector(Tau, V):
    if V.size == 0 or Tau == 0:
        return np.array([[1.0]])  # Asegurar 2D
    Z = V[:-1]
    W = V[-1]
    terms = []
    for k in range(Tau + 1):
        sub_terms = powerVector(Tau - k, Z)
        for term in sub_terms.flatten():  # Manejar sub_terms 2D
            terms.append(term * (W ** k))
    return np.array([terms])  # Devolver como 2D (1, n_terms)


In [5]:
#Polynomial parameter number
# def polyParamsNumber(n,tau):
#     s = 0
#     for l in range(tau+1):
#         #val = np.math.factorial(l+n-1)/np.math.factorial(n-1)
#         val = math.factorial(l + n - 1) / (math.factorial(n - 1) * math.factorial(l))
#         val = val/np.math.factorial(l)
#         s = s + val
#     return int(s)


# def polyParamsNumber(n,tau):
#     s = 0
#     for l in range(tau+1):
#         val = math.factorial(l + n - 1) / (math.factorial(n - 1) * math.factorial(l))
#         val = val / math.factorial(l)  # Usar math en lugar de np.math
#         s += val
#     return int(s)

def polyParamsNumber(n, tau):
    return int(math.comb(n + tau, tau))


In [6]:

def loss(Y_true, Y_pred, THETA, lambda_param):
    """ Mean Squared Error """
    E = Y_true - Y_pred
    SSE = np.square(np.linalg.norm(E, 'fro'))
    Reg = (lambda_param/(2*E.shape[0]))*np.square(np.linalg.norm(THETA[1:,:], 'fro'))
    MSE = SSE/(2*E.shape[0]) + Reg
    return MSE

In [7]:

def gradient(A,E,THETA,lambda_param):
    """ MSE Gradient """
    SSEGrad = -1.0*(A.T).dot(E)
    MSEGrad = SSEGrad/E.shape[0]
    MSEGrad[1:,:] = MSEGrad[1:,:] + (lambda_param/E.shape[0])*THETA[1:,:]
    return MSEGrad



In [8]:
def gdx_optimization(
    X,
    Y,
    tau,
    lambda_param=0.0,
    maxEpochs=100,
    show=10,
    batch_size=16,
    learning_rate=0.01,
    momentum=0.9,
    lr_dec=0.5,
    lr_inc=1.05,
    max_perf_inc=1.04,
    stopping_threshold=1e-6,
):
    """Optimizador GDX con manejo de tamaño de lote variable"""
    n = X.shape[1]
    m = Y.shape[1]
    rho = polyParamsNumber(n, tau)
    #rho me esta dando porbelmas 
    THETA = np.random.randn(rho, m) *0.01 #reduce la escala de la inicalizaciones de los pesos
    delta_THETA = np.zeros((rho, m))
    lr = learning_rate
    q = X.shape[0]
    previous_loss = np.inf

    for epoch in range(maxEpochs + 1):
        THETA_prev = THETA.copy()
        current_loss = np.inf

        if batch_size < q:
            indices = np.random.permutation(q)
            X = X[indices, :]
            Y = Y[indices, :]

        n_batches = q // batch_size
        residual = q % batch_size
        total_batches = n_batches + 1 if residual != 0 else n_batches

        for batch_idx in range(total_batches):
            start = batch_idx * batch_size
            end = start + batch_size
            if batch_idx == total_batches - 1 and residual != 0:
                end = start + residual

            X_batch = X[start:end, :]
            Y_batch = Y[start:end, :]
            # A_batch = designMatrix(tau, X_batch)
            # Y_pred = model(A_batch, THETA)
            # E_batch = Y_batch - Y_pred

            A_batch = designMatrix(tau, X_batch)  # (batch_size, rho)
            Y_pred = model(A_batch, THETA)  # (batch_size, m)
            E_batch = Y_batch - Y_pred  # (batch_size, m)
            Grad = gradient(A_batch, E_batch, THETA, lambda_param)
            #evitar desbordamienteos 
            Grad = np.clip(Grad, -1e3, 1e3)
            delta_THETA = momentum * delta_THETA - (1 - momentum) * lr * Grad
            THETA += delta_THETA

        A = designMatrix(tau, X)
        Yh = model(A, THETA)
        current_loss = loss(Y, Yh, THETA, lambda_param)

        if current_loss > previous_loss * max_perf_inc:
            THETA = THETA_prev
            lr *= lr_dec
        elif current_loss < previous_loss:
            lr *= lr_inc

        if epoch % show == 0:
            print(f"Epoch {epoch}: Loss={current_loss:.3e}, lr={lr:.2e}")
        if abs(previous_loss - current_loss) < stopping_threshold:
            break
        previous_loss = current_loss

    return THETA


In [9]:
# Load dataset
mat = sp.loadmat('engine_dataset.mat')
#print(mat.keys())
inputs  = mat['engineInputs'].T
targets = mat['engineTargets'].T


In [10]:
# Train and Test Split Data modificacion
#inputs_train, inputs_test, targets_train, targets_test = train_test_split(inputs, targets, random_state=1, test_size=0.4)
inputs_train, inputs_test, targets_train, targets_test, scaler_X, scaler_y = normalize_and_split_data(inputs, targets)


In [11]:

# Train and Test Data
# Train Data
xTrain = inputs_train
tTrain = targets_train
# Test Data
xTest = inputs_test
tTest = targets_test


In [12]:
# mini batch
# Find the optimal parameters m and b with RMSprop
tau = 1
lambda_param = 0.01

THETA = gdx_optimization(
    xTrain,
    tTrain,
    tau=tau,
    lambda_param=lambda_param,
    maxEpochs=5000,
    show=100,
    batch_size=32,
    learning_rate=1e-3,
    momentum=0.9,          
    lr_dec=0.5,           
    lr_inc=1.05,
    max_perf_inc=1.04,
    stopping_threshold=1e-8,
)



Epoch 0: Loss=4.070e-01, lr=1.05e-03
Epoch 100: Loss=1.020e-01, lr=7.32e-02
Epoch 200: Loss=1.040e-01, lr=7.25e-01
Epoch 300: Loss=1.030e-01, lr=5.46e-01
Epoch 400: Loss=1.021e-01, lr=4.11e-01
Epoch 500: Loss=1.026e-01, lr=5.09e-01
Epoch 600: Loss=1.021e-01, lr=4.22e-01
Epoch 700: Loss=1.023e-01, lr=3.33e-01
Epoch 800: Loss=1.026e-01, lr=4.55e-01
Epoch 900: Loss=1.036e-01, lr=6.85e-01
Epoch 1000: Loss=1.022e-01, lr=4.91e-01
Epoch 1100: Loss=1.032e-01, lr=7.04e-01
Epoch 1200: Loss=1.037e-01, lr=8.30e-01
Epoch 1300: Loss=1.031e-01, lr=5.67e-01
Epoch 1400: Loss=1.024e-01, lr=8.12e-01
Epoch 1500: Loss=1.021e-01, lr=5.28e-01
Epoch 1600: Loss=1.023e-01, lr=3.11e-01
Epoch 1700: Loss=1.030e-01, lr=8.93e-01
Epoch 1800: Loss=1.023e-01, lr=5.53e-01
Epoch 1900: Loss=1.052e-01, lr=4.58e-01
Epoch 2000: Loss=1.048e-01, lr=6.26e-01
Epoch 2100: Loss=1.026e-01, lr=9.42e-01
Epoch 2200: Loss=1.057e-01, lr=7.09e-01
Epoch 2300: Loss=1.032e-01, lr=7.96e-01
Epoch 2400: Loss=1.040e-01, lr=8.52e-01
Epoch 2500: 

In [13]:

#ONLINE (1 a 1)
"""
tau = 1
lambda_param = 0.1
THETA = gdx_optimization(
    xTrain,
    tTrain,
    tau=tau,
    lambda_param=lambda_param,
    maxEpochs=10000,
    show=500,
    batch_size=1,
    learning_rate=1e-7,
    momentum=0.8,
    lr_dec=0.5,
    lr_inc=1.02,
    max_perf_inc=1.02,
    stopping_threshold=1e-6,
)
"""

'\ntau = 1\nlambda_param = 0.1\nTHETA = gdx_optimization(\n    xTrain,\n    tTrain,\n    tau=tau,\n    lambda_param=lambda_param,\n    maxEpochs=10000,\n    show=500,\n    batch_size=1,\n    learning_rate=1e-7,\n    momentum=0.8,\n    lr_dec=0.5,\n    lr_inc=1.02,\n    max_perf_inc=1.02,\n    stopping_threshold=1e-6,\n)\n'

In [14]:
"""
#lote
tau = 1
lambda_param = 0.1
THETA = gdx_optimization(
    xTrain,
    tTrain,
    tau=tau,
    lambda_param=lambda_param,
    maxEpochs=10000,
    show=500,
    batch_size=xTrain.shape[0],
    learning_rate=1e-7,
    momentum=0.8,          
    lr_dec=0.5,           
    lr_inc=1.02,
    max_perf_inc=1.02,
    stopping_threshold=1e-6,
) """


'\n#lote\ntau = 1\nlambda_param = 0.1\nTHETA = gdx_optimization(\n    xTrain,\n    tTrain,\n    tau=tau,\n    lambda_param=lambda_param,\n    maxEpochs=10000,\n    show=500,\n    batch_size=xTrain.shape[0],\n    learning_rate=1e-7,\n    momentum=0.8,          \n    lr_dec=0.5,           \n    lr_inc=1.02,\n    max_perf_inc=1.02,\n    stopping_threshold=1e-6,\n) '

In [15]:
# Make predictions
# Train data
A_train = designMatrix(tau,xTrain)
outputTrain = model(A_train,THETA)
# Test data
A_test = designMatrix(tau,xTest)
outputTest = model(A_test,THETA)


In [16]:

#desnormalizacion predicciones 
outputTrain_descaled = scaler_y.inverse_transform(outputTrain)
outputTest_descaled = scaler_y.inverse_transform(outputTest)


In [17]:
# desnormlaizar reales

targets_train_descaled = scaler_y.inverse_transform(targets_train).reshape(-1, 1)
targets_test_descaled = scaler_y.inverse_transform(targets_test).reshape(-1, 1)

In [18]:

# R2 for raw train data
R2_train = r2_score(targets_train_descaled, outputTrain_descaled.reshape(-1, 1))
print(R2_train)

0.8175166700243921


In [19]:
# MSE for raw train data
MSE_train = mean_squared_error(targets_train_descaled, outputTrain_descaled.reshape(-1, 1))
print(MSE_train)

47590.92020642195


In [20]:
# R2 for raw test data
R2_test = r2_score(targets_test_descaled, outputTest_descaled.reshape(-1, 1))
print(R2_test)

0.818961371109651


In [21]:
# MSE for raw test data
MSE_test = mean_squared_error(targets_test_descaled, outputTest_descaled.reshape(-1, 1))
print(MSE_test)

51171.802336793946


In [22]:
THETA

array([[-0.01427245, -0.16533663],
       [ 0.97275226,  0.81377015],
       [ 0.02376489, -0.21209691]])