In [1]:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from math import pi

from sklearn.linear_model import RidgeCV
from sklearn.ensemble import HistGradientBoostingRegressor
from sklearn.metrics import mean_absolute_percentage_error, mean_squared_error

from pathlib import Path
import datetime as dt
import os

from google.cloud import bigquery
from google.oauth2 import service_account

import numpy as np, pandas as pd
from sklearn.metrics import mean_absolute_percentage_error, mean_squared_error, mean_absolute_error
from statsmodels.stats.diagnostic import acorr_ljungbox

In [2]:
# Información del proyecto y autenticación a BQ
project_id = "enersinc-tbsg-bq"
key_path = "C:\BigQuery\eramirez-tbsg.json"

# Cargar las credenciales del archivo JSON
credentials = service_account.Credentials.from_service_account_file(key_path)

# Crear el cliente de BigQuery
client = bigquery.Client(project=project_id, credentials=credentials)

In [3]:
FechaIni='2024-01-01'
FechaFin='2025-10-31'

In [4]:
# Consulta a la maestra de recursos
query = rf"""
select * from `enersinc-tbsg-bq`.tbsg.public_ddem
where fechaoperacion>='{FechaIni}' and fechaoperacion <='{FechaFin}'
"""

# Ejecutar la consulta
df_DemIni = client.query(query).to_dataframe()



In [5]:
# Función para asignar los días de la semana a cada fecha, si es festivo se trata como un domingo
import holidays
co_holidays = holidays.Colombia()

def typedays(row,tipo):

     if tipo=='WeekDay':
          return row['fecha'].weekday()
     
     elif tipo=='WeekMonth':
          return (row['fecha'].day - 1) // 7 + 1
     
     elif tipo=='DayType':
          if row['fecha'] in co_holidays:
               return 3
          elif row['fecha'].weekday()==5:
               return 2
          elif row['fecha'].weekday()==6:
               return 3
          else:
               return 1
          

In [6]:
data=df_DemIni.copy()
data = data[~data['nombre'].isin(['Total','ECUADOR138','ECUADOR220','COROZO','CUATRIC','SubArea Venezuela_Corozo','SubArea Venezuela_Cuatricentenario',
                                  'SubArea Ecuador138', 'SubArea Ecuador230'])]
data['nombre'] = data['nombre'].apply(lambda x: x if x in ['ATLANTIC', 'BOLIVAR', 'GCM','CERROMAT', 'CORDOSUC', 'SubArea Atlantico','SubArea GCM',
                                                            'SubArea Cerromatoso','SubArea Bolivar','SubArea Cordoba_Sucre'] else 'Interior')
data['nombre'] = data['nombre'].replace({'ATLANTIC': 'SubArea Atlantico', 'BOLIVAR': 'SubArea Bolivar', 'GCM': 'SubArea GCM',
                                         'CERROMAT': 'SubArea Cerromatoso', 'CORDOSUC': 'SubArea Cordoba_Sucre'})

for i in range(1,25):
    data = data.rename(columns={f'hora{i}': i})

data = data.melt(id_vars=['fechaoperacion', 'nombre'], 
                       value_vars=[i for i in range(1, 25)], 
                       var_name='periodo', 
                       value_name='demand')

data=data.rename(columns={'nombre':'subarea','fechaoperacion':'fecha'})
data=data.groupby(['fecha','subarea','periodo'])[['demand']].sum().round(2).reset_index()
data=data.sort_values(by=['fecha','periodo','subarea'])

# Definir tipo de día
data['day_osf']=data.apply(lambda row: typedays(row,tipo='DayType'),axis=1)
# Definir día de la semana
data['day_w']=data.apply(lambda row: typedays(row,tipo='WeekDay'),axis=1)

data['mes']=pd.to_datetime(data['fecha']).dt.month
data['year']=pd.to_datetime(data['fecha']).dt.year

data.head(7)


Unnamed: 0,fecha,subarea,periodo,demand,day_osf,day_w,mes,year
0,2024-01-01,Interior,1,4903.62,3,0,1,2024
24,2024-01-01,SubArea Atlantico,1,568.75,3,0,1,2024
48,2024-01-01,SubArea Bolivar,1,563.39,3,0,1,2024
72,2024-01-01,SubArea Cerromatoso,1,228.61,3,0,1,2024
96,2024-01-01,SubArea Cordoba_Sucre,1,532.93,3,0,1,2024
120,2024-01-01,SubArea GCM,1,542.02,3,0,1,2024
1,2024-01-01,Interior,2,4691.94,3,0,1,2024


In [7]:
df_2024=data.copy()
df_2024=df_2024[(df_2024.year==2024) & (df_2024['mes'].isin([8,9,10]))]
df_2024=df_2024.groupby(['subarea','periodo','day_osf','day_w','mes'])[['demand']].mean()
df_2024=df_2024.reset_index()
df_2024=df_2024.rename(columns={'demand':'dem2024'})


df_2025=data.copy()
df_2025=df_2025[(df_2025.year==2025) & (df_2025['mes'].isin([8,9,10]))]
df_2025=df_2025.groupby(['subarea','periodo','day_osf','day_w','mes'])[['demand']].mean()
df_2025=df_2025.reset_index()
df_2025=df_2025.rename(columns={'demand':'dem2025'})

# Realizar merge con las llaves requeridas
df_merge=df_2024.merge(df_2025,left_on=['subarea','periodo','day_osf','day_w','mes'],right_on=['subarea','periodo','day_osf','day_w','mes'], how='inner')

df_merge['Cambio']=100*(df_merge['dem2025']-df_merge['dem2024'])/df_merge['dem2024']

df_cambio=df_merge.groupby(['subarea','periodo','day_osf','day_w'])[['Cambio']].mean()
df_cambio=df_cambio.reset_index()

# Calcular el promedio del valor absoluto por periodo
promedio_abs = df_cambio.groupby(['subarea','periodo'])['Cambio'].apply(lambda x: x.abs().mean()).rename('promedio_abs')
promedio_abs=promedio_abs.reset_index()

# Unir el promedio al dataframe y reemplazar Cambio por +promedio o -promedio según el signo original
df_cambio = df_cambio.merge(promedio_abs, left_on=['subarea','periodo'], right_on=['subarea','periodo'], how='left')
df_cambio['CambioMod'] = df_cambio['Cambio'].clip(lower=-df_cambio['promedio_abs'], upper=df_cambio['promedio_abs'])


# df_cambio.to_csv('CambioDem.csv')
df_cambio

Unnamed: 0,subarea,periodo,day_osf,day_w,Cambio,promedio_abs,CambioMod
0,Interior,1,1,0,1.435180,2.190529,1.435180
1,Interior,1,1,1,2.459191,2.190529,2.190529
2,Interior,1,1,2,2.077411,2.190529,2.077411
3,Interior,1,1,3,2.816843,2.190529,2.190529
4,Interior,1,1,4,2.047564,2.190529,2.047564
...,...,...,...,...,...,...,...
1147,SubArea GCM,24,1,3,10.774783,13.491627,10.774783
1148,SubArea GCM,24,1,4,11.603116,13.491627,11.603116
1149,SubArea GCM,24,2,5,14.225884,13.491627,13.491627
1150,SubArea GCM,24,3,0,17.302756,13.491627,13.491627


In [None]:
from datetime import date

fecha_ini_new = pd.to_datetime('2025-11-10')

year=fecha_ini_new.year
mes=fecha_ini_new.month
week=fecha_ini_new.isocalendar().week 
dia=fecha_ini_new.day
diaw=fecha_ini_new.isocalendar().weekday


fecha_new = pd.to_datetime(date(year-1, mes, dia))

weekold=fecha_new.isocalendar().week 

fecha_ini = date.fromisocalendar(year-1, weekold, diaw)
semanas=4
fecha_fin = fecha_ini + dt.timedelta(days=7*semanas-1)

# fecha=fecha + dt.timedelta(days=364)

In [21]:
fecha_ini

datetime.date(2024, 11, 4)

In [9]:
# fecha_ini = pd.to_datetime('2024-12-21')
# fecha_fin = pd.to_datetime('2025-01-17')
df_Dem=data[(data.fecha>=fecha_ini) & (data.fecha <= fecha_fin)]
df_Dem = df_Dem.sort_values(by=['fecha','subarea','periodo'], ascending=[True,True,True])
l_col=list(df_Dem.columns)

df_Dem=df_Dem.merge(df_cambio,left_on=['subarea','periodo','day_osf','day_w'],right_on=['subarea','periodo','day_osf','day_w'], how='left')[l_col + ['CambioMod']]
l_col=list(df_Dem.columns)

df_imputar = df_Dem.groupby(['subarea', 'periodo','day_osf'])['CambioMod'].mean()
df_imputar=df_imputar.reset_index()
df_imputar=df_imputar.rename(columns={'CambioMod':'CambioModAgre'})

df_Dem=df_Dem.merge(df_imputar,left_on=['subarea', 'periodo','day_osf'],right_on=['subarea', 'periodo','day_osf'], how='left')[l_col + ['CambioModAgre']]

df_Dem['CambioMod'] = df_Dem['CambioMod'].fillna(df_Dem['CambioModAgre'])

df_Dem=df_Dem[l_col]

# Verificar si hay valores faltantes en 'CambioMod'
n_missing = df_Dem['CambioMod'].isna().sum()
print("Hay valores faltantes en 'CambioMod'?:", n_missing > 0)
print("Cantidad de valores faltantes en 'CambioMod':", n_missing)
if n_missing > 0:
    print("Ejemplos de filas con missing en 'CambioMod':")
    display(df_Dem[df_Dem['CambioMod'].isna()].head())
df_Dem
# df_Dem=df_Dem[['Fecha','Subarea','Periodo','DemMW']]

Hay valores faltantes en 'CambioMod'?: False
Cantidad de valores faltantes en 'CambioMod': 0


Unnamed: 0,fecha,subarea,periodo,demand,day_osf,day_w,mes,year,CambioMod
0,2024-11-04,Interior,1,5129.25,3,0,11,2024,2.190529
1,2024-11-04,Interior,2,4924.77,3,0,11,2024,2.334120
2,2024-11-04,Interior,3,4795.49,3,0,11,2024,2.542240
3,2024-11-04,Interior,4,4733.13,3,0,11,2024,2.474304
4,2024-11-04,Interior,5,4765.84,3,0,11,2024,2.489205
...,...,...,...,...,...,...,...,...,...
4027,2024-12-01,SubArea GCM,20,759.60,3,6,12,2024,11.029737
4028,2024-12-01,SubArea GCM,21,776.57,3,6,12,2024,11.226004
4029,2024-12-01,SubArea GCM,22,723.53,3,6,12,2024,13.821799
4030,2024-12-01,SubArea GCM,23,713.56,3,6,12,2024,13.551838


In [10]:
df_Dem['demandMod'] = (df_Dem['demand'] * ((df_Dem['CambioMod']/100)+1)).round(2)
df_Dem

Unnamed: 0,fecha,subarea,periodo,demand,day_osf,day_w,mes,year,CambioMod,demandMod
0,2024-11-04,Interior,1,5129.25,3,0,11,2024,2.190529,5241.61
1,2024-11-04,Interior,2,4924.77,3,0,11,2024,2.334120,5039.72
2,2024-11-04,Interior,3,4795.49,3,0,11,2024,2.542240,4917.40
3,2024-11-04,Interior,4,4733.13,3,0,11,2024,2.474304,4850.24
4,2024-11-04,Interior,5,4765.84,3,0,11,2024,2.489205,4884.47
...,...,...,...,...,...,...,...,...,...,...
4027,2024-12-01,SubArea GCM,20,759.60,3,6,12,2024,11.029737,843.38
4028,2024-12-01,SubArea GCM,21,776.57,3,6,12,2024,11.226004,863.75
4029,2024-12-01,SubArea GCM,22,723.53,3,6,12,2024,13.821799,823.53
4030,2024-12-01,SubArea GCM,23,713.56,3,6,12,2024,13.551838,810.26


In [74]:
df_Dem2024Mod=df_Dem.copy()
df_Dem2024Mod=df_Dem2024Mod[['fecha','subarea','periodo','demandMod']]
df_Dem2024Mod.loc[df_Dem2024Mod['subarea'] == 'Interior', 'subarea'] = 'SubAntioquia'
df_Dem2024Mod.to_csv('DemandaModificada2024.csv')

In [11]:
df_DemFinal=df_Dem.copy()
df_DemFinal=df_DemFinal[['fecha','subarea','periodo','demandMod']]
df_DemFinal.loc[df_DemFinal['subarea'] == 'Interior', 'subarea'] = 'SubAntioquia'
df_DemFinal['fecha'] = pd.to_datetime(df_DemFinal['fecha']) + pd.Timedelta(days=364)
df_DemFinal.to_csv('DemandaModificada2025.csv')
