In [1]:
import pandas as pd
import numpy as np
import sys
from prophet import Prophet
import os
import json

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
sys.path.append('../scr')

from utils_clean import filtrar_ub_semanal_iso,preprocess_data_prophet
from utils_data import downloadTable
from utils_metrics import errorMetrics

In [3]:
queryPrices = """
SELECT date, price/2.204 as price, priceId, priceName, currency, currencyId,priceType, source FROM `desarrollo-444913.globalPrices.prices`
"""
# Usar forceDownload=True para actualizar la tabla
df = downloadTable(queryPrices,"globalPricesTable",forceDownload=False)
dfUbMensual = filtrar_ub_semanal_iso(df)
train = dfUbMensual.loc[:'2023-12-30']   # Ajusta el string según tu índice
# Test: enero, febrero, marzo 2025
test = dfUbMensual.loc['2024-01-01':'2024-12-31']



Folder 'bigqueryDatabases' already exists.
Reading bigqueryDatabases\globalPricesTable.csv from local CSV.


In [4]:
test

iso_monday
2024-01-01    5.926667
2024-01-08    6.376793
2024-01-15    6.776906
2024-01-22    6.851927
2024-01-29    6.626864
2024-02-05    6.476821
2024-02-12    6.451814
2024-02-19    6.301772
2024-02-26    6.176737
2024-03-04    6.151730
2024-03-11    6.251758
2024-03-18    6.376793
2024-03-25    6.501828
2024-04-01    6.426807
2024-04-08    6.301772
2024-04-15    6.226751
2024-04-22    6.201744
2024-04-29    6.201744
2024-05-06    6.251758
2024-05-13    6.251758
2024-05-20    6.251758
2024-05-27    6.176737
2024-06-03    6.001688
2024-06-10    5.901660
2024-06-17    5.901660
2024-06-24    5.726610
2024-07-01    5.651589
2024-07-08    5.551561
2024-07-15    5.451533
2024-07-22    5.401519
2024-07-29    5.401519
2024-08-05    5.401519
2024-08-12    5.401519
2024-08-19    5.401519
2024-08-26    5.326498
2024-09-02    5.251477
2024-09-09    5.351505
2024-09-16    5.451533
2024-09-23    5.401519
2024-09-30    5.351505
2024-10-07    5.351505
2024-10-14    5.351505
2024-10-21    5.401519


In [5]:
train = preprocess_data_prophet(train)
test = preprocess_data_prophet(test)

In [6]:
test

Unnamed: 0,ds,y
0,2024-01-01,5.926667
1,2024-01-08,6.376793
2,2024-01-15,6.776906
3,2024-01-22,6.851927
4,2024-01-29,6.626864
5,2024-02-05,6.476821
6,2024-02-12,6.451814
7,2024-02-19,6.301772
8,2024-02-26,6.176737
9,2024-03-04,6.15173


# Prophet horizonte de 12 meses

In [7]:
date = pd.Timestamp.now().strftime('%Y-%m-%d_%H-%M-%S')
dir_checkpoint = f"""results/Prophet_Base_semanal_un_año_{date}/"""
horizonte = 12
model = Prophet()
model.add_country_holidays(country_name='US')
model.fit(train)
start_date = test.iloc[0]["ds"]
end_date = test.iloc[-1]["ds"]
future = pd.DataFrame({'ds': test["ds"]})
forecast = model.predict(future)
pred = forecast[["ds","yhat"]]
metric = errorMetrics(test["y"].to_numpy(), pred["yhat"].to_numpy())

if not os.path.exists(dir_checkpoint):
    os.makedirs(dir_checkpoint)

logDict = {"modelo": "base semanal 12 meses"}
with open(dir_checkpoint + 'log.json', 'w') as f:
    json.dump(logDict, f, indent=4)
pd.DataFrame.from_dict(metric).to_csv(str(dir_checkpoint)+'metricas.csv', index=False)
result = pd.concat([test["y"], pred["yhat"]], axis=1)
result.columns = ['real', 'pred']
result.to_csv(str(dir_checkpoint)+'predicciones.csv', index=True)

17:09:03 - cmdstanpy - INFO - Chain [1] start processing
17:09:04 - cmdstanpy - INFO - Chain [1] done processing


# Prophet one step refitting model

In [15]:
date = pd.Timestamp.now().strftime('%Y-%m-%d_%H-%M-%S')
dir_checkpoint = f"""results/Prophet_oneStepReFitting_{date}/"""
horizonte = 12
traincopy = train.copy()


pred = []
for i, row in test.iterrows():
    if i == 0:
        model = Prophet()
        model.add_country_holidays(country_name='US')
        model.fit(traincopy)
        future = pd.DataFrame({'ds': [row["ds"]]})
        forecast = model.predict(future)
        pred.append(forecast[["ds","yhat"]])
    else:
        model = Prophet()
        model.add_country_holidays(country_name='US')
        new_row = test.iloc[i-1:i]  # use double brackets to get a one-row DataFrame
        traincopy = pd.concat([traincopy, new_row], ignore_index=True)
        model.fit(traincopy)
        future = pd.DataFrame({'ds': [row["ds"]]})
        forecast = model.predict(future)
        pred.append(forecast[["ds","yhat"]])

pred = pd.concat(pred)
pred = pred[["ds","yhat"]]
pred = pred.reset_index(drop=True)
metric = errorMetrics(test["y"].to_numpy(), pred["yhat"].to_numpy())

if not os.path.exists(dir_checkpoint):
    os.makedirs(dir_checkpoint)

logDict = {"modelo": "base refitting"}
with open(dir_checkpoint + 'log.json', 'w') as f:
    json.dump(logDict, f, indent=4)
pd.DataFrame.from_dict(metric).to_csv(str(dir_checkpoint)+'metricas.csv', index=False)
result = pd.concat([test["y"], pred["yhat"]], axis=1)
result.columns = ['real', 'pred']
result.to_csv(str(dir_checkpoint)+'predicciones.csv', index=True)

17:15:19 - cmdstanpy - INFO - Chain [1] start processing
17:15:19 - cmdstanpy - INFO - Chain [1] done processing
17:15:19 - cmdstanpy - INFO - Chain [1] start processing
17:15:19 - cmdstanpy - INFO - Chain [1] done processing
17:15:19 - cmdstanpy - INFO - Chain [1] start processing
17:15:19 - cmdstanpy - INFO - Chain [1] done processing
17:15:19 - cmdstanpy - INFO - Chain [1] start processing
17:15:20 - cmdstanpy - INFO - Chain [1] done processing
17:15:20 - cmdstanpy - INFO - Chain [1] start processing
17:15:20 - cmdstanpy - INFO - Chain [1] done processing
17:15:20 - cmdstanpy - INFO - Chain [1] start processing
17:15:20 - cmdstanpy - INFO - Chain [1] done processing
17:15:20 - cmdstanpy - INFO - Chain [1] start processing
17:15:20 - cmdstanpy - INFO - Chain [1] done processing
17:15:21 - cmdstanpy - INFO - Chain [1] start processing
17:15:21 - cmdstanpy - INFO - Chain [1] done processing
17:15:21 - cmdstanpy - INFO - Chain [1] start processing
17:15:21 - cmdstanpy - INFO - Chain [1]

In [14]:
pred

Unnamed: 0,ds,yhat
0,2024-01-01,6.553125
0,2024-01-08,6.321482
0,2024-01-15,6.478093
0,2024-01-22,6.617276
0,2024-01-29,6.672281
0,2024-02-05,6.677592
0,2024-02-12,6.619924
0,2024-02-19,6.650578
0,2024-02-26,6.738618
0,2024-03-04,6.80086


In [13]:
test

Unnamed: 0,ds,y
0,2024-01-01,5.926667
1,2024-01-08,6.376793
2,2024-01-15,6.776906
3,2024-01-22,6.851927
4,2024-01-29,6.626864
5,2024-02-05,6.476821
6,2024-02-12,6.451814
7,2024-02-19,6.301772
8,2024-02-26,6.176737
9,2024-03-04,6.15173
