# Regresion lineal

In [None]:
from sklearn.linear_model import LinearRegression

X = df[["HDD"]]
y = df["EnergiaCaldera(MW-hr)"]
model = LinearRegression().fit(X, y)


In [None]:
import statsmodels.formula.api as smf
model = smf.ols("EnergiaCaldera ~ HDD + C(Instalacion)", data=df).fit()
print(model.summary())


In [None]:
df["HDD2"] = df["HDD"]**2
model = smf.ols("EnergiaCaldera ~ HDD + HDD2 + C(Instalacion)", data=df).fit()


In [None]:
from sklearn.ensemble import RandomForestRegressor
model = RandomForestRegressor(n_estimators=200, random_state=42)
model.fit(X, y)


In [None]:
import statsmodels.formula.api as smf
model = smf.mixedlm("EnergiaCaldera ~ HDD", df, groups=df["Instalacion"]).fit()


## LSTM

In [None]:
import pandas as pd
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.preprocessing import MinMaxScaler
import plotly.graph_objects as go

# --- 1. Cargar datos ---
df = pd.read_excel("base_datos_energia_hdd.xlsx")
inst = "m218807"
df = df[df["Instalacion"] == inst].sort_values("Date")

# --- 2. Escalado ---
scaler = MinMaxScaler()
scaled = scaler.fit_transform(df[["HDD", "EnergiaCaldera(MW-hr)"]])
df[["HDD", "EnergiaCaldera(MW-hr)"]] = scaled

# --- 3. Crear secuencias de entrenamiento ---
def create_sequences(data, seq_len=7):
    X, y = [], []
    for i in range(len(data) - seq_len):
        X.append(data[i:i+seq_len, 0])  # HDD como input
        y.append(data[i+seq_len, 1])    # energía como output
    return np.array(X), np.array(y)

data = df[["HDD", "EnergiaCaldera(MW-hr)"]].values
X, y = create_sequences(data, seq_len=7)

# Fechas asociadas a cada valor predicho
fechas = df["Date"].iloc[7:].reset_index(drop=True)

# Dar forma para LSTM: [samples, timesteps, features]
X = X.reshape((X.shape[0], X.shape[1], 1))

# --- 4. Definir modelo LSTM ---
model = Sequential([
    LSTM(64, input_shape=(X.shape[1], 1), return_sequences=False),
    Dense(32, activation='relu'),
    Dense(1)
])

model.compile(optimizer='adam', loss='mse')

# --- 5. Entrenamiento ---
model.fit(X, y, epochs=50, batch_size=16, validation_split=0.1, verbose=1)

# --- 6. Predicción ---
y_pred = model.predict(X)

# --- 7. Desescalar para comparar ---
pred = np.zeros((len(y_pred), 2))
pred[:, 1] = y_pred[:, 0]
orig = np.zeros((len(y), 2))
orig[:, 1] = y

inv_pred = scaler.inverse_transform(pred)[:, 1]
inv_orig = scaler.inverse_transform(orig)[:, 1]

# --- 8. Crear series temporales por día ---
# Como los datos son diarios, usamos las fechas directamente (no por hora)
real_series = pd.Series(inv_orig, index=fechas)
pred_series = pd.Series(inv_pred, index=fechas)

# --- 9. Visualización interactiva con Plotly ---
trace_real = go.Scatter(
    x=real_series.index, y=real_series.values,
    mode='lines+markers', name='Real', line=dict(color='blue')
)
trace_pred = go.Scatter(
    x=pred_series.index, y=pred_series.values,
    mode='lines+markers', name='Predicho', line=dict(color='red')
)

fig = go.Figure(data=[trace_real, trace_pred])

fig.update_layout(
    title=f'Predicción diaria de energía ({inst})',
    xaxis_title='Fecha',
    yaxis_title='Energía Caldera (MW-hr)',
    template='plotly_white',
    hovermode='x unified'
)

fig.show()


# Biomasa

In [None]:
import statsmodels.formula.api as smf

df = pd.read_excel("base_datos_biomasa.xlsx")

modelo = smf.ols("ConsumoBiomasa ~ DemandaEnergetica + C(Instalacion)", data=df).fit()
print(modelo.summary())
