In [10]:
import pandas as pd
import numpy as np
import sys
from prophet import Prophet
import os
import json

In [11]:
sys.path.append('../scr')

from utils_clean import filtrar_ub_semanal_iso,preprocess_data_prophet
from utils_data import downloadTable
from utils_metrics import errorMetrics

Index(['date', 'price', 'priceId', 'priceName', 'currency', 'currencyId',
       'priceType', 'source'],
      dtype='object')

In [None]:
queryPrices = """
SELECT date, price/2.204, priceId, priceName, currency, currencyId,priceType, source FROM `desarrollo-444913.globalPrices.prices`
"""
# Usar forceDownload=True para actualizar la tabla
df = downloadTable(queryPrices,"globalPricesTable",forceDownload=False)
dfUbMensual = filtrar_ub_semanal_iso(df)
train = dfUbMensual.loc[:'2023-12-30']   # Ajusta el string según tu índice
# Test: enero, febrero, marzo 2025
test = dfUbMensual.loc['2024-01-01':'2024-12-31']



Folder 'bigqueryDatabases' already exists.
Reading bigqueryDatabases\globalPricesTable.csv from local CSV.


In [13]:
train = preprocess_data_prophet(train)
test = preprocess_data_prophet(test)

# Prophet horizonte de 12 meses

In [14]:
date = pd.Timestamp.now().strftime('%Y-%m-%d_%H-%M-%S')
dir_checkpoint = f"""results/Prophet_Base_semanal_un_año_{date}/"""
horizonte = 12
model = Prophet()
model.add_country_holidays(country_name='US')
model.fit(train)
start_date = test.iloc[0]["ds"]
end_date = test.iloc[-1]["ds"]
future = pd.date_range(start=start_date, end=end_date, freq='MS')
future = pd.DataFrame({'ds': future})
forecast = model.predict(future)
pred = forecast[["ds","yhat"]]
metric = errorMetrics(test["y"].to_numpy(), pred["yhat"].to_numpy())

if not os.path.exists(dir_checkpoint):
    os.makedirs(dir_checkpoint)

logDict = {"modelo": "base semanal 12 meses"}
with open(dir_checkpoint + 'log.json', 'w') as f:
    json.dump(logDict, f, indent=4)
pd.DataFrame.from_dict(metric).to_csv(str(dir_checkpoint)+'metricas.csv', index=False)
result = pd.concat([test["y"], pred["yhat"]], axis=1)
result.columns = ['real', 'pred']
result.to_csv(str(dir_checkpoint)+'predicciones.csv', index=True)

16:41:03 - cmdstanpy - INFO - Chain [1] start processing
16:41:03 - cmdstanpy - INFO - Chain [1] done processing


In [15]:
result

Unnamed: 0,real,pred
0,14.352076,12.974508
1,13.999337,13.086134
2,13.930443,13.553508
3,13.822967,13.595585
4,13.737538,13.894498
5,12.965921,13.40436
6,12.103364,13.215582
7,11.863611,12.74063
8,11.816763,12.618949
9,11.918727,12.244699


# Prophet one step refitting model

In [None]:
date = pd.Timestamp.now().strftime('%Y-%m-%d_%H-%M-%S')
dir_checkpoint = f"""results/Prophet_oneStepReFitting_{date}/"""
horizonte = 12
traincopy = train.copy()


pred = []
for i, row in test.iterrows():
    if i == 0:
        model = Prophet()
        model.add_country_holidays(country_name='US')
        model.fit(traincopy)
        future = pd.DataFrame({'ds': [row["ds"]]})
        forecast = model.predict(future)
        pred.append(forecast[["ds","yhat"]])
    else:
        model = Prophet()
        model.add_country_holidays(country_name='US')
        new_row = test.iloc[i-1:i]  # use double brackets to get a one-row DataFrame
        traincopy = pd.concat([traincopy, new_row], ignore_index=True)
        model.fit(traincopy)
        future = pd.DataFrame({'ds': [row["ds"]]})
        forecast = model.predict(future)
        pred.append(forecast[["ds","yhat"]])

pd.concat(pred)
pred = forecast[["ds","yhat"]]
metric = errorMetrics(test["y"].to_numpy(), pred["yhat"].to_numpy())

if not os.path.exists(dir_checkpoint):
    os.makedirs(dir_checkpoint)

logDict = {"modelo": "base refitting"}
with open(dir_checkpoint + 'log.json', 'w') as f:
    json.dump(logDict, f, indent=4)
pd.DataFrame.from_dict(metric).to_csv(str(dir_checkpoint)+'metricas.csv', index=False)
result = pd.concat([test["y"], pred["yhat"]], axis=1)
result.columns = ['real', 'pred']
result.to_csv(str(dir_checkpoint)+'predicciones.csv', index=True)