### Entrenamiento, Predicción y Evaluación de un modelo LSTM

#### **Importación de Datos**

In [17]:
import requests
import pandas as pd
import numpy as np
from io import BytesIO

In [18]:
# Importar datos
DATA_GITHUB_URL = 'https://raw.githubusercontent.com/DCajiao/Time-series-forecast-of-energy-consumption-in-Tetouan-City/refs/heads/main/data/zone1_power_consumption_of_tetouan_city.csv'

# Descargar los datos desde github
response = requests.get(DATA_GITHUB_URL)

# Convertir en un df desde el xlsx de github
df = pd.read_csv(BytesIO(response.content), sep=',')

df["datetime"] = pd.to_datetime(df["datetime"], errors="coerce")
df = df.set_index("datetime")

# Validaciones mínimas
expected_cols = {"temperature","humidity","general_diffuse_flows","zone_1"}
missing = expected_cols - set(df.columns)
if missing:
    raise ValueError(f"Faltan columnas en el dataset: {missing}. "
                     f"Columnas disponibles: {df.columns.tolist()}")

print("\nFrecuencia aproximada:", (df.index.to_series().diff().mode().iloc[0]))
print("Filas totales:", len(df))


Frecuencia aproximada: 0 days 00:10:00
Filas totales: 52416


In [19]:
df

Unnamed: 0_level_0,temperature,humidity,wind_speed,general_diffuse_flows,zone_1
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-01-01 00:00:00,6.559,73.8,0.083,0.051,34055.69620
2017-01-01 00:10:00,6.414,74.5,0.083,0.070,29814.68354
2017-01-01 00:20:00,6.313,74.5,0.080,0.062,29128.10127
2017-01-01 00:30:00,6.121,75.0,0.083,0.091,28228.86076
2017-01-01 00:40:00,5.921,75.7,0.081,0.048,27335.69620
...,...,...,...,...,...
2017-12-30 23:10:00,7.010,72.4,0.080,0.040,31160.45627
2017-12-30 23:20:00,6.947,72.6,0.082,0.051,30430.41825
2017-12-30 23:30:00,6.900,72.8,0.086,0.084,29590.87452
2017-12-30 23:40:00,6.758,73.0,0.080,0.066,28958.17490


In [20]:
df.index.min(), df.index.max()

(Timestamp('2017-01-01 00:00:00'), Timestamp('2017-12-30 23:50:00'))

#### **Enriquecimiento del Dataset con Variables Calculadas**

In [21]:
# Agregar variable is_weekend bool basada en la columna datetime
df['is_weekend'] = df.index.dayofweek >= 5

# Agregar variable is_holiday bool basado en el dictionario de días festivos

festivos_marruecos = {
    "2017/01/01": "Año Nuevo cristiano",
    "2017/01/11": "Aniversario del Manifiesto de la Independencia",
    "2017/05/01": "Día del Trabajador",
    "2017/07/30": "Día de la Coronación / Fiesta del Trono",
    "2017/08/14": "Conmemoración de la lealtad del Oued Eddahab",
    "2017/08/20": "Aniversario de la Revolución del Rey y el Pueblo",
    "2017/08/21": "Fiesta de la Juventud",
    "2017/11/06": "Aniversario de la Marcha Verde",
    "2017/11/18": "Día de la Independencia",
}

df['is_holiday'] = df.index.normalize().isin(pd.to_datetime(list(festivos_marruecos.keys())))


In [22]:
# Ahora agregaremos la columna hora, fecha, mes y año
df['hour'] = df.index.hour
df['day'] = df.index.day
df['month'] = df.index.month

df.columns

Index(['temperature', 'humidity', 'wind_speed', 'general_diffuse_flows',
       'zone_1', 'is_weekend', 'is_holiday', 'hour', 'day', 'month'],
      dtype='object')

#### **Exportación de Datos**

In [23]:
df_path = '../data/enriched_zone1_power_consumption_of_tetouan_city.csv'
df.to_csv(df_path)