In [35]:
import pandas as pd
import numpy as np
import datetime as dt

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

### Preparación de datos

In [None]:
df_esios = pd.read_csv('data_training/esios_previsiones_d+1.csv')

# Datetime de inicio del periodo
df_esios['Datetime'] = pd.to_datetime(df_esios['Date']) + pd.to_timedelta(df_esios['Hour'] - 1, unit='h')
df_esios.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17280 entries, 0 to 17279
Data columns (total 6 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   indicator_id  17280 non-null  int64         
 1   Date          17280 non-null  object        
 2   Hour          17280 non-null  int64         
 3   geo_id        17280 non-null  int64         
 4   value         17280 non-null  float64       
 5   Datetime      17280 non-null  datetime64[ns]
dtypes: datetime64[ns](1), float64(1), int64(3), object(1)
memory usage: 810.1+ KB


In [28]:
df_esios_clean = df_esios[['Datetime', 'indicator_id', 'value']].copy()
df_esios_clean.drop_duplicates(subset=['Datetime', 'indicator_id'], keep='first', inplace=True)

df_esios_pivot = df_esios_clean.pivot(
    index='Datetime',
    columns='indicator_id',
    values='value'
).reset_index()

df_esios_pivot

indicator_id,Datetime,600,1775,1777,1779
0,2025-01-01 00:00:00,134.49,23530.8,2608.3,0.0
1,2025-01-01 01:00:00,131.59,22670.3,2750.5,0.0
2,2025-01-01 02:00:00,131.49,21420.5,2936.8,0.0
3,2025-01-01 03:00:00,131.42,20285.0,3065.3,0.0
4,2025-01-01 04:00:00,120.49,19470.5,3157.8,0.0
...,...,...,...,...,...
4315,2025-06-29 20:00:00,111.98,31241.5,4842.3,4534.8
4316,2025-06-29 21:00:00,129.45,32164.5,4481.8,632.8
4317,2025-06-29 22:00:00,154.52,32269.3,4226.0,0.0
4318,2025-06-29 23:00:00,133.61,30043.3,3804.3,0.0


In [29]:
df_input = df_esios_pivot.copy()

df_input['Year'] = df_input['Datetime'].dt.year
df_input['Month'] = df_input['Datetime'].dt.month
df_input['Day_of_Week'] = df_input['Datetime'].dt.dayofweek
df_input['Hour'] = df_input['Datetime'].dt.hour

dicc_indicators = {1775: 'demanda', 1777: 'gen_eolica', 1779: 'gen_fotovoltaica', 600: 'MD'}

df_input.rename(columns=dicc_indicators, inplace=True)
df_input.index = df_input['Datetime']

df_input.drop(columns=['Datetime'], inplace=True)
df_input

indicator_id,MD,demanda,gen_eolica,gen_fotovoltaica,Year,Month,Day_of_Week,Hour
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2025-01-01 00:00:00,134.49,23530.8,2608.3,0.0,2025,1,2,0
2025-01-01 01:00:00,131.59,22670.3,2750.5,0.0,2025,1,2,1
2025-01-01 02:00:00,131.49,21420.5,2936.8,0.0,2025,1,2,2
2025-01-01 03:00:00,131.42,20285.0,3065.3,0.0,2025,1,2,3
2025-01-01 04:00:00,120.49,19470.5,3157.8,0.0,2025,1,2,4
...,...,...,...,...,...,...,...,...
2025-06-29 20:00:00,111.98,31241.5,4842.3,4534.8,2025,6,6,20
2025-06-29 21:00:00,129.45,32164.5,4481.8,632.8,2025,6,6,21
2025-06-29 22:00:00,154.52,32269.3,4226.0,0.0,2025,6,6,22
2025-06-29 23:00:00,133.61,30043.3,3804.3,0.0,2025,6,6,23


### Split train - test

In [44]:
X_train, X_test, y_train, y_test = train_test_split(
    df_input.drop(columns=['MD'], axis=1),
    df_input['MD'],
    test_size=0.2,
    random_state=8
)
split_date = dt.datetime(2025, 5, 31, 23, 0)

X = df_input.drop(columns=['MD'], axis=1)
y = df_input['MD']
X_train, y_train = X[:split_date], y[:split_date]
X_test, y_test = X[split_date + dt.timedelta(hours=1):], y[split_date + dt.timedelta(hours=1):]

In [46]:
model = Pipeline([
    ('scaler', StandardScaler()),      # Escalado de características
    ('regressor', RandomForestRegressor(
        n_estimators=500,
        random_state=42,
        n_jobs=-1
    ))
])
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print(f"MAE: {mean_absolute_error(y_test, y_pred):.2f}")
print(f"RMSE: {np.sqrt(mean_squared_error(y_test, y_pred)):.2f}")

MAE: 39.09
RMSE: 49.07
