In [30]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
import itertools


In [31]:
df = pd.read_csv("./data/importacion.csv", parse_dates=["Fecha"])
df.set_index("Fecha", inplace=True)

In [32]:
# Preparacion gasolina regular
regular_series = df["Gasolina regular"]

regular_split = int(len(regular_series) * 0.7)
regular_train = regular_series.iloc[:regular_split] 
regular_test = regular_series.iloc[regular_split:]

regular_scaler = MinMaxScaler()
regular_train_scaled = regular_scaler.fit_transform(regular_train.values.reshape(-1, 1))
regular_test_scaled = regular_scaler.transform(regular_test.values.reshape(-1, 1))

In [33]:
# Preparacion gasolina superior
super_series = df["Gasolina superior"]

super_split = int(len(regular_series) * 0.7)
super_train = super_series.iloc[:super_split] 
super_test = super_series.iloc[super_split:]

super_scaler = MinMaxScaler()
super_train_scaled = super_scaler.fit_transform(super_train.values.reshape(-1, 1))
super_test_scaled = super_scaler.transform(super_test.values.reshape(-1, 1))

In [34]:
# Preparacion gasolina diesel
diesel_series = df["Diesel alto azufre"]

diesel_split = int(len(diesel_series) * 0.7)
diesel_train = diesel_series.iloc[:diesel_split]
diesel_test = diesel_series.iloc[diesel_split:]

diesel_scaler = MinMaxScaler()
diesel_train_scaled = diesel_scaler.fit_transform(diesel_train.values.reshape(-1, 1))
diesel_test_scaled = diesel_scaler.transform(diesel_test.values.reshape(-1,1))

In [35]:
def create_sequences(data, window=12):
    X, y = [], []
    for i in range(window, len(data)):
        X.append(data[i-window:i])
        y.append(data[i])
    return np.array(X), np.array(y)


In [36]:
# Para Gasolina regular
window_size = 12
X_reg_train, y_reg_train = create_sequences(regular_train_scaled, window=window_size)
X_reg_test,  y_reg_test  = create_sequences(regular_test_scaled,  window=window_size)

# Para Gasolina superior
X_sup_train, y_sup_train = create_sequences(super_train_scaled, window=window_size)
X_sup_test,  y_sup_test  = create_sequences(super_test_scaled,  window=window_size)

# Para Gasolina diesel
x_dis_train, y_dis_train = create_sequences(diesel_train_scaled, window=window_size)
x_dis_test, y_dis_test = create_sequences(diesel_test_scaled, window=window_size)

In [37]:
# Redimensiona al formato [muestras, timesteps, features]
# Regular
X_reg_train = X_reg_train.reshape(-1, window_size, 1)
X_reg_test  = X_reg_test.reshape(-1,  window_size, 1)

# Superior
X_sup_train = X_sup_train.reshape(-1, window_size, 1)
X_sup_test  = X_sup_test.reshape(-1,  window_size, 1)

# Diesel
x_dis_train = x_dis_train.reshape(-1, window_size, 1)
x_dis_test = x_dis_test.reshape(-1, window_size, 1)

In [38]:
# Convierte NumPy → Torch Tensor

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Gasolina regular
X_reg_train_t = torch.tensor(X_reg_train, dtype=torch.float32).to(device)
y_reg_train_t = torch.tensor(y_reg_train, dtype=torch.float32).to(device)
X_reg_test_t  = torch.tensor(X_reg_test,  dtype=torch.float32).to(device)
y_reg_test_t  = torch.tensor(y_reg_test,  dtype=torch.float32).to(device)

# Gasolina superior
X_sup_train_t = torch.tensor(X_sup_train, dtype=torch.float32).to(device)
y_sup_train_t = torch.tensor(y_sup_train, dtype=torch.float32).to(device)
X_sup_test_t  = torch.tensor(X_sup_test,  dtype=torch.float32).to(device)
y_sup_test_t  = torch.tensor(y_sup_test,  dtype=torch.float32).to(device)

# Gasolina diesel
x_dis_train_t = torch.tensor(x_dis_train, dtype=torch.float32).to(device)
y_dis_train_t = torch.tensor(y_dis_train, dtype=torch.float32).to(device)
x_dis_test_t = torch.tensor(x_dis_test, dtype=torch.float32).to(device)
y_dis_test_t = torch.tensor(y_dis_test, dtype=torch.float32).to(device)


In [39]:
# Dataset y DataLoader
batch_size = 32

# Gasolina regular
train_ds = TensorDataset(X_reg_train_t, y_reg_train_t)
test_ds  = TensorDataset(X_reg_test_t,  y_reg_test_t)

train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
test_loader  = DataLoader(test_ds,  batch_size=batch_size)

# Gasolina superior
train_ds_sup = TensorDataset(X_sup_train_t, y_sup_train_t)
test_ds_sup  = TensorDataset(X_sup_test_t,  y_sup_test_t)

train_loader_sup = DataLoader(train_ds_sup, batch_size=64, shuffle=True)
test_loader_sup  = DataLoader(test_ds_sup,  batch_size=64)

# Gasolina diesel
train_ds_dis = TensorDataset(x_dis_train_t, y_dis_train_t)
test_ds_dis = TensorDataset(x_dis_test_t, y_dis_test_t)

train_loader_dis = DataLoader(train_ds_dis, batch_size=64, shuffle=True)
test_loader_dis = DataLoader(test_ds_dis, batch_size=64)

In [40]:

# Define dos arquitecturas de LSTM
class LSTMModelA(nn.Module):
    def __init__(self, input_size=1, hidden_size=50, dropout_rate=0.2):
        super().__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.dropout = nn.Dropout(dropout_rate)
        self.fc = nn.Linear(hidden_size, 1)
        
    def forward(self, x):
        out, _ = self.lstm(x)
        last = out[:, -1, :]
        y = self.dropout(last)
        return self.fc(y)

In [41]:
class LSTMModelB(nn.Module):
    def __init__(self, input_size=1, hidden_sizes=(100,50)):
        super().__init__()
        self.lstm1 = nn.LSTM(input_size,  hidden_sizes[0], batch_first=True)
        self.dropout1 = nn.Dropout(0.3)
        self.lstm2 = nn.LSTM(hidden_sizes[0], hidden_sizes[1], batch_first=True)
        self.fc     = nn.Linear(hidden_sizes[1], 1)
        
    def forward(self, x):
        out, _ = self.lstm1(x)
        out     = self.dropout1(out)
        out, _ = self.lstm2(out)
        last    = out[:, -1, :]
        return self.fc(last)

In [42]:
# Bucle de entrenamiento y validación
def train_model(model, train_loader, test_loader, epochs=50, lr=1e-3):
    model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = nn.MSELoss()
    best_loss = float('inf')
    patience, counter = 5, 0
    
    for epoch in range(1, epochs+1):
        # Training
        model.train()
        running = 0.0
        for xb, yb in train_loader:
            optimizer.zero_grad()
            preds = model(xb).squeeze()
            loss = criterion(preds, yb)
            loss.backward()
            optimizer.step()
            running += loss.item() * xb.size(0)
        train_loss = running / len(train_loader.dataset)
        
        # Validation
        model.eval()
        running = 0.0
        with torch.no_grad():
            for xb, yb in test_loader:
                preds = model(xb).squeeze()
                running += criterion(preds, yb).item() * xb.size(0)
        val_loss = running / len(test_loader.dataset)
        
        print(f"Epoch {epoch} — Train MSE: {train_loss:.4f} — Val MSE: {val_loss:.4f}")
        
        if val_loss < best_loss:
            best_loss = val_loss
            counter = 0
            torch.save(model.state_dict(), "best_modelA.pth")
        else:
            counter += 1
            if counter >= patience:
                print("Early stopping.")
                break
    
    model.load_state_dict(torch.load("best_modelA.pth"))
    return model, best_loss

In [43]:
param_grid = {
    'hidden_size': [50, 100],
    'dropout_rate': [0.2, 0.3],
    'lr': [1e-3, 1e-4],
    'batch_size': [32, 64],
}
results = []

In [44]:
for hs, dr, lr, bs in itertools.product(
        param_grid['hidden_size'],
        param_grid['dropout_rate'],
        param_grid['lr'],
        param_grid['batch_size']
    ):
    print(f"Probando hs={hs}, dr={dr}, lr={lr}, bs={bs}…")

    # Redefine modelo con estos hiperparámetros
    model = LSTMModelA(input_size=1, hidden_size=hs, dropout_rate=dr)

    # DataLoaders con nuevo batch_size
    train_loader = DataLoader(train_ds, batch_size=bs, shuffle=True)
    test_loader  = DataLoader(test_ds,  batch_size=bs)
    
    # Entrena y captura el val_loss
    _, val_loss = train_model(model, train_loader, test_loader, epochs=30, lr=lr)
    results.append({
        'hidden_size': hs,
        'dropout_rate': dr,
        'lr': lr,
        'batch_size': bs,
        'val_loss': val_loss
    })


Probando hs=50, dr=0.2, lr=0.001, bs=32…
Epoch 1 — Train MSE: 0.1633 — Val MSE: 1.1606
Epoch 2 — Train MSE: 0.1167 — Val MSE: 0.9447
Epoch 3 — Train MSE: 0.0741 — Val MSE: 0.6761
Epoch 4 — Train MSE: 0.0527 — Val MSE: 0.4197
Epoch 5 — Train MSE: 0.0575 — Val MSE: 0.4568


  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 6 — Train MSE: 0.0511 — Val MSE: 0.6214
Epoch 7 — Train MSE: 0.0533 — Val MSE: 0.6681
Epoch 8 — Train MSE: 0.0542 — Val MSE: 0.6310
Epoch 9 — Train MSE: 0.0510 — Val MSE: 0.5377
Early stopping.
Probando hs=50, dr=0.2, lr=0.001, bs=64…
Epoch 1 — Train MSE: 0.1708 — Val MSE: 1.1474
Epoch 2 — Train MSE: 0.1378 — Val MSE: 1.0041
Epoch 3 — Train MSE: 0.1053 — Val MSE: 0.8574
Epoch 4 — Train MSE: 0.0777 — Val MSE: 0.7009
Epoch 5 — Train MSE: 0.0601 — Val MSE: 0.5539


  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 6 — Train MSE: 0.0525 — Val MSE: 0.4404
Epoch 7 — Train MSE: 0.0534 — Val MSE: 0.3808
Epoch 8 — Train MSE: 0.0555 — Val MSE: 0.3969
Epoch 9 — Train MSE: 0.0547 — Val MSE: 0.4390
Epoch 10 — Train MSE: 0.0516 — Val MSE: 0.4962
Epoch 11 — Train MSE: 0.0505 — Val MSE: 0.5005
Epoch 12 — Train MSE: 0.0503 — Val MSE: 0.5217
Early stopping.
Probando hs=50, dr=0.2, lr=0.0001, bs=32…
Epoch 1 — Train MSE: 0.1087 — Val MSE: 1.1120
Epoch 2 — Train MSE: 0.1059 — Val MSE: 1.0865
Epoch 3 — Train MSE: 0.1003 — Val MSE: 1.0617
Epoch 4 — Train MSE: 0.0958 — Val MSE: 1.0383
Epoch 5 — Train MSE: 0.0912 — Val MSE: 1.0171
Epoch 6 — Train MSE: 0.0878 — Val MSE: 0.9966
Epoch 7 — Train MSE: 0.0849 — Val MSE: 0.9755
Epoch 8 — Train MSE: 0.0814 — Val MSE: 0.9558
Epoch 9 — Train MSE: 0.0782 — Val MSE: 0.9348
Epoch 10 — Train MSE: 0.0756 — Val MSE: 0.9149
Epoch 11 — Train MSE: 0.0728 — Val MSE: 0.8964
Epoch 12 — Train MSE: 0.0710 — Val MSE: 0.8774
Epoch 13 — Train MSE: 0.0676 — Val MSE: 0.8583
Epoch 14 — Trai

In [45]:
df_results = pd.DataFrame(results)
print(df_results.sort_values('val_loss').head())

    hidden_size  dropout_rate      lr  batch_size  val_loss
9           100           0.2  0.0010          64  0.212905
4            50           0.3  0.0010          32  0.324617
13          100           0.3  0.0010          64  0.356820
1            50           0.2  0.0010          64  0.380771
10          100           0.2  0.0001          32  0.384292


In [46]:
# Mejores hiperparametros
best_params_reg = {
    'hidden_size': 100,
    'dropout_rate': 0.3,
    'lr': 1e-3,
    'batch_size': 64
}

### Mejor modelo para gasolina regular

In [47]:
# Definir modelo con estos hieperparametros
model_reg = LSTMModelA(
    input_size=1, 
    hidden_size=best_params_reg['hidden_size'],
    dropout_rate=best_params_reg['dropout_rate']
)

train_loader_reg = DataLoader(train_ds, batch_size=best_params_reg['batch_size'], shuffle=True)
test_loader_reg  = DataLoader(test_ds,  batch_size=best_params_reg['batch_size'])


# Entrena y captura la mejor pérdida de validación
model_reg, best_loss_reg = train_model(
    model_reg,
    train_loader_reg,
    test_loader_reg,
    epochs=50,
    lr=best_params_reg['lr']
)
print(f"Mejor Val MSE (regular): {best_loss_reg:.4f}")

Epoch 1 — Train MSE: 0.1642 — Val MSE: 1.1269
Epoch 2 — Train MSE: 0.1171 — Val MSE: 0.9335
Epoch 3 — Train MSE: 0.0806 — Val MSE: 0.7179
Epoch 4 — Train MSE: 0.0559 — Val MSE: 0.4862
Epoch 5 — Train MSE: 0.0531 — Val MSE: 0.3736
Epoch 6 — Train MSE: 0.0584 — Val MSE: 0.4330


  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 7 — Train MSE: 0.0514 — Val MSE: 0.5512
Epoch 8 — Train MSE: 0.0504 — Val MSE: 0.6294
Epoch 9 — Train MSE: 0.0538 — Val MSE: 0.6631
Epoch 10 — Train MSE: 0.0555 — Val MSE: 0.6551
Early stopping.
Mejor Val MSE (regular): 0.3736


In [48]:

# Predicciones
model_reg.eval()
preds_reg = []
with torch.no_grad():
    for xb, _ in test_loader_reg:
        preds_reg.extend(model_reg(xb).squeeze().cpu().numpy())
preds_reg = np.array(preds_reg).reshape(-1,1)

# Inversión de escala
preds_reg_inv = regular_scaler.inverse_transform(preds_reg)
y_reg_true    = regular_test.values[window_size:]

# Metricas
mae_reg  = mean_absolute_error(y_reg_true, preds_reg_inv)
rmse_reg = np.sqrt(mean_squared_error(y_reg_true, preds_reg_inv))
print(f"Gasolina Regular: \n\nMAE: {mae_reg:.4f} \nRMSE: {rmse_reg:.4f}")


Gasolina Regular: 

MAE: 342398.5285 
RMSE: 364936.5787


### Mejor modelo para gasolina Superior

In [49]:
model_sup = LSTMModelA(input_size=1, hidden_size=100, dropout_rate=0.3)

model_sup, best_loss_sup = train_model(
    model_sup,
    train_loader_sup,
    test_loader_sup,
    epochs=50,
    lr=1e-3
)
print(f"Mejor Val MSE (superior): {best_loss_sup:.4f}")

Epoch 1 — Train MSE: 0.1540 — Val MSE: 0.3782
Epoch 2 — Train MSE: 0.1023 — Val MSE: 0.2804
Epoch 3 — Train MSE: 0.0644 — Val MSE: 0.1830
Epoch 4 — Train MSE: 0.0456 — Val MSE: 0.1199
Epoch 5 — Train MSE: 0.0446 — Val MSE: 0.1366
Epoch 6 — Train MSE: 0.0439 — Val MSE: 0.1611
Epoch 7 — Train MSE: 0.0456 — Val MSE: 0.1759


  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 8 — Train MSE: 0.0458 — Val MSE: 0.1675
Epoch 9 — Train MSE: 0.0452 — Val MSE: 0.1612
Early stopping.
Mejor Val MSE (superior): 0.1199


In [50]:
# Predicciones
model_sup.eval()
preds_sup = []
with torch.no_grad():
    for xb, _ in test_loader_sup:
        preds_sup.extend(model_sup(xb).squeeze().cpu().numpy())
preds_sup = np.array(preds_sup).reshape(-1,1)

# Inversión de escala
preds_sup_inv = super_scaler.inverse_transform(preds_sup)
y_sup_true    = super_test.values[window_size:]

# Métricas
mae_sup  = mean_absolute_error(y_sup_true, preds_sup_inv)
rmse_sup = np.sqrt(mean_squared_error(y_sup_true, preds_sup_inv))
print(f"Gasolina Superior:  \n\nMAE: {mae_sup:.4f} \nRMSE: {rmse_sup:.4f}")


Gasolina Superior:  

MAE: 179196.5457 
RMSE: 209866.8716


### Mejor modelo para Diesel

In [52]:
model_dis = LSTMModelA(input_size=1, hidden_size=100, dropout_rate=0.3)

model_dis, best_loss_dis = train_model(
    model_dis,
    train_loader_dis,
    test_loader_dis,
    epochs=50,
    lr=1e-3
)
print(f"Mejor Val MSE (diesel): {best_loss_dis:.4f}")

Epoch 1 — Train MSE: 0.2631 — Val MSE: 0.0033
Epoch 2 — Train MSE: 0.1783 — Val MSE: 0.0208
Epoch 3 — Train MSE: 0.1031 — Val MSE: 0.0745
Epoch 4 — Train MSE: 0.0342 — Val MSE: 0.2312
Epoch 5 — Train MSE: 0.0443 — Val MSE: 0.2057
Epoch 6 — Train MSE: 0.0263 — Val MSE: 0.1285
Early stopping.
Mejor Val MSE (diesel): 0.0033


  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


In [53]:
# Predicciones
model_dis.eval()
preds_dis = []
with torch.no_grad():
    for xb, _ in test_loader_dis:
        preds_dis.extend(model_dis(xb).squeeze().cpu().numpy())
preds_dis = np.array(preds_dis).reshape(-1, 1)

# Inversion de escala
preds_dis_inv = diesel_scaler.inverse_transform(preds_dis)
y_dis_true = diesel_test.values[window_size:]

# Metricas
mae_dis = mean_squared_error(y_dis_true, preds_dis_inv)
rmse_dis = np.sqrt(mean_squared_error(y_dis_true, preds_dis_inv))
print(f"Gasolina Diesel: \n\nMae: {mae_dis:.4f} \nRMSE: {rmse_dis:.4f}")

Gasolina Diesel: 

Mae: 8460750124.5191 
RMSE: 91982.3359


### Comparacion modelos Laboratorio 1 vs Laboratorio 2

##### __Laboratorio 1 – Modelos ARIMA__
- Gasolina Regular

    Modelo usado para predicción de los últimos 36 meses: ARIMA(45,1,20)

    MAE: 102 164.24 

    RMSE: 124 108.98 

- Gasolina Superior

    Modelo usado para predicción de los últimos 36 meses: ARIMA(45,1,20)
    
    MAE: 93 291.85 

    RMSE: 130 298.21 

- Gasolina Diesel

    Sobre la ultima serie filtrada hasta diciembre 2017: ARIMA(45,1,20)

    MAE: 182 448.53

    RMSE: 228 476.56

##### __Laboratorio 2 – Modelos LSTM__
- Gasolina Regular

    MAE: 361 909.9981

    RMSE: 383 606.3430



- Gasolina Superior

    MAE: 116 476.7082

    RMSE: 155 397.9649



- Gasolina Diesel: 

    MAE: 8460750124.5191

    RMSE: 91982.3359


Para comparar los modelos usamos dos criterios claros sobre la serie original:

* **MAE** (error absoluto medio), que nos dice cuánto, en promedio, se equivocan las predicciones.
* **RMSE** (raíz del error cuadrático medio), que penaliza más los errores grandes.


En **Gasolina Regular**, el LSTM obtuvo un **MAE = 361 910** y un **RMSE = 383 606**, mientras que el ARIMA(45,1,20) del laboratorio 1 logró un **MAE = 102 164** y **RMSE = 124 109** . Es decir, el ARIMA se quedó mucho más cerca de los valores reales, con errores alrededor de un tercio de los del LSTM. Por eso, para esta serie el ARIMA sigue siendo el ganador claro.

En **Gasolina Superior**, el LSTM bajó el error respecto al caso anterior (MAE = 116 477, RMSE = 155 398), pero el ARIMA(45,1,20) todavía lo superó con un **MAE = 93 292** y **RMSE = 130 298** . Aunque la diferencia es menor que en la serie regular, de nuevo el ARIMA entrega predicciones más ajustadas.

Para **Gasolina Diésel** vemos un comportamiento curioso: el LSTM reportó un **RMSE = 91 982**, que técnicamente es mejor que el **228 477** del ARIMA, pero su **MAE = 8 460 750 124** es colosal, lo que indica que hubo predicciones completamente fuera de escala. En cambio, el ARIMA(45,1,20) mantuvo ambos errores en rangos razonables (MAE = 182 449, RMSE = 228 477) . Esto sugiere que el LSTM, al no capturar bien la dinámica de la serie diésel, produjo picos de error muy grandes que no reflejan la realidad.

**Conclusión**:

* **ARIMA(45,1,20)** es el mejor modelo para las tres series cuando se mide con MAE y RMSE en la escala original.
* El **LSTM**, aunque prometedor en otros contextos, necesita más ajustes (arquitectura, datos de entrada o exógenas) para alcanzar a los modelos clásicos en estas series de gasolina.
