# LSTM

In [16]:
print(df["Instalacion"].unique())


[]


In [17]:
import pandas as pd
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.preprocessing import MinMaxScaler
import plotly.graph_objects as go

# --- 1. Cargar datos ---
df=pd.read_excel("base_datos_energia_hdd_consumo.xlsx")
inst = "M218807"
df = df[df["Instalacion"] == inst].sort_values("Date")

# --- 2. Escalado ---
scaler = MinMaxScaler()
scaled = scaler.fit_transform(df[["HDD", "Demanda"]])
df[["HDD", "Demanda"]] = scaled

# --- 3. Crear secuencias de entrenamiento ---
def create_sequences(data, seq_len=7):
    X, y = [], []
    for i in range(len(data) - seq_len):
        X.append(data[i:i+seq_len, 0])  # HDD como input
        y.append(data[i+seq_len, 1])    # energía como output
    return np.array(X), np.array(y)

data = df[["HDD", "Demanda"]].values
X, y = create_sequences(data, seq_len=7)

# Fechas asociadas a cada valor predicho
fechas = df["Date"].iloc[7:].reset_index(drop=True)

# Dar forma para LSTM: [samples, timesteps, features]
X = X.reshape((X.shape[0], X.shape[1], 1))

# --- 4. Definir modelo LSTM ---
model = Sequential([
    LSTM(64, input_shape=(X.shape[1], 1), return_sequences=False),
    Dense(32, activation='relu'),
    Dense(1)
])

model.compile(optimizer='adam', loss='mse')

# --- 5. Entrenamiento ---
model.fit(X, y, epochs=50, batch_size=16, validation_split=0.1, verbose=1)

# --- 6. Predicción ---
y_pred = model.predict(X)

# --- 7. Desescalar para comparar ---
pred = np.zeros((len(y_pred), 2))
pred[:, 1] = y_pred[:, 0]
orig = np.zeros((len(y), 2))
orig[:, 1] = y

inv_pred = scaler.inverse_transform(pred)[:, 1]
inv_orig = scaler.inverse_transform(orig)[:, 1]

# --- 8. Crear series temporales por día ---
# Como los datos son diarios, usamos las fechas directamente (no por hora)
real_series = pd.Series(inv_orig, index=fechas)
pred_series = pd.Series(inv_pred, index=fechas)

# --- 9. Visualización interactiva con Plotly ---
trace_real = go.Scatter(
    x=real_series.index, y=real_series.values,
    mode='lines+markers', name='Real', line=dict(color='blue')
)
trace_pred = go.Scatter(
    x=pred_series.index, y=pred_series.values,
    mode='lines+markers', name='Predicho', line=dict(color='red')
)

fig = go.Figure(data=[trace_real, trace_pred])

fig.update_layout(
    title=f'Predicción diaria de energía ({inst})',
    xaxis_title='Fecha',
    yaxis_title='Demanda',
    template='plotly_white',
    hovermode='x unified'
)

fig.show()


  super().__init__(**kwargs)


Epoch 1/50
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 24ms/step - loss: 0.0884 - val_loss: 0.0376
Epoch 2/50
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0325 - val_loss: 0.0306
Epoch 3/50
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0247 - val_loss: 0.0291
Epoch 4/50
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0221 - val_loss: 0.0305
Epoch 5/50
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0204 - val_loss: 0.0260
Epoch 6/50
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.0219 - val_loss: 0.0254
Epoch 7/50
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0203 - val_loss: 0.0242
Epoch 8/50
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 0.0248 - val_loss: 0.0223
Epoch 9/50
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m

In [19]:
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np


df=pd.read_excel("base_datos_energia_hdd_consumo.xlsx")
# --- 1. Filtrar instalación y preparar serie ---
inst = "M18807"
df_inst = df[df["Instalacion"] == inst].sort_values("Date")

# Asegurarse de que la columna Date sea datetime
df_inst["Date"] = pd.to_datetime(df_inst["Date"])

# Usamos la columna "Demanda" como serie temporal
serie = df_inst.set_index("Date")["Demanda"]

# --- 2. División entrenamiento / test ---
train_size = int(len(serie) * 0.8)
train, test = serie.iloc[:train_size], serie.iloc[train_size:]

# --- 3. Entrenar modelo ARIMA ---
# ARIMA(p, d, q): ajusta estos parámetros si quieres afinar
model = ARIMA(train, order=(5, 1, 2))
model_fit = model.fit()

print(model_fit.summary())

# --- 4. Predicciones ---
forecast = model_fit.forecast(steps=len(test))
forecast.index = test.index  # alineamos las fechas

# --- 5. Métricas ---
mae = mean_absolute_error(test, forecast)
rmse = mean_squared_error(test, forecast, squared=False)
print(f"MAE: {mae:.2f}")
print(f"RMSE: {rmse:.2f}")

# --- 6. Visualización ---
plt.figure(figsize=(12,5))
plt.plot(train.index, train, label="Entrenamiento", color="gray")
plt.plot(test.index, test, label="Real (Test)", color="blue")
plt.plot(forecast.index, forecast, label="Predicción (ARIMA)", color="red")
plt.title(f"Predicción diaria de energía ({inst}) - ARIMA(5,1,2)")
plt.xlabel("Fecha")
plt.ylabel("Demanda")
plt.legend()
plt.show()



A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.


A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.


A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.


Too few observations to estimate starting parameters for ARMA and trend. All parameters except for variances will be set to zeros.


invalid value encountered in scalar divide



LinAlgError: Schur decomposition solver error.

# Biomasa

In [None]:
import statsmodels.formula.api as smf

df = pd.read_excel("base_datos_biomasa.xlsx")

modelo = smf.ols("ConsumoBiomasa ~ DemandaEnergetica + C(Instalacion)", data=df).fit()
print(modelo.summary())
