In [1]:
# get the list of models
from os import getcwd, listdir
from os.path import isfile, join, abspath
import sys

import polars as pl

sys.path.append(abspath(join(getcwd(), '..')))

from shared.forecasters import DirectMultihorizonForecaster

In [2]:
MODELS_PATH = "./models"
models = sorted(
    [f for f in listdir(MODELS_PATH) if isfile(join(MODELS_PATH, f)) and f.endswith(".pkl")],
    reverse=True
)

path_to_latest_model = models[0] if models else None

In [3]:
path_to_latest_model

'demand_forecaster_20250717.pkl'

In [4]:
# open the saved model

import joblib
forecaster: DirectMultihorizonForecaster = joblib.load(join(MODELS_PATH, path_to_latest_model))

In [5]:
x_train = pl.read_parquet("../../data/favorita_dataset/output/train_input.parquet")
c_train = pl.read_parquet("../../data/favorita_dataset/output/train_dates.parquet")
y_train = pl.read_parquet("../../data/favorita_dataset/output/train_target.parquet")

In [6]:
x_train = x_train.with_columns(
    pl.col.product_group.cast(pl.Categorical)
)

In [7]:
from datetime import date, timedelta as td
today = '2016-08-15'
today = date.fromisoformat(today)
date_interval = (today, today)

In [8]:
def split_by_date(x_train, c_train, date_interval):
    return x_train.with_columns(
        c_train.get_column("c_date")
    ).filter(
        pl.col.c_date.is_between(*date_interval)
    ).drop("c_date")

In [9]:
x = split_by_date(x_train, c_train, date_interval)

In [10]:
predictions_df = forecaster.predict(x).with_columns((pl.all().exp()-1).round().cast(pl.Int32))

In [15]:
predictions_df

pred_h1_log_units_sold,pred_h2_log_units_sold,pred_h3_log_units_sold,pred_h4_log_units_sold,pred_h5_log_units_sold,pred_h6_log_units_sold,pred_h7_log_units_sold
i32,i32,i32,i32,i32,i32,i32
20,23,22,23,26,23,21
29,27,24,27,30,26,27
18,20,17,21,24,21,19
17,14,14,16,20,19,16
25,24,22,24,28,24,25
…,…,…,…,…,…,…
23,25,21,25,31,31,26
16,19,16,20,23,22,19
33,22,19,23,27,28,27
18,14,13,17,20,19,16


In [11]:
f = forecaster.predict(x_train).with_columns((pl.all().exp()-1).round().cast(pl.Int32))

In [17]:
(y_train.with_columns((pl.all().exp()-1).round().cast(pl.Int32)) - f).describe()

statistic,h1_log_units_sold,h2_log_units_sold,h3_log_units_sold,h4_log_units_sold,h5_log_units_sold,h6_log_units_sold,h7_log_units_sold
str,f64,f64,f64,f64,f64,f64,f64
"""count""",46039.0,46039.0,46039.0,46039.0,46039.0,46039.0,46039.0
"""null_count""",0.0,0.0,0.0,0.0,0.0,0.0,0.0
"""mean""",0.56072,2.76963,4.530919,5.229892,5.673581,5.342188,3.984926
"""std""",6.297499,18.730769,20.657535,21.687886,22.353822,22.052324,20.209575
"""min""",-41.0,-100.0,-82.0,-80.0,-81.0,-72.0,-96.0
"""25%""",-2.0,-4.0,-4.0,-4.0,-5.0,-4.0,-4.0
"""50%""",0.0,1.0,1.0,1.0,1.0,1.0,1.0
"""75%""",2.0,7.0,8.0,9.0,10.0,9.0,8.0
"""max""",516.0,2617.0,2614.0,2620.0,2608.0,2629.0,2628.0


In [18]:
f

pred_h1_log_units_sold,pred_h2_log_units_sold,pred_h3_log_units_sold,pred_h4_log_units_sold,pred_h5_log_units_sold,pred_h6_log_units_sold,pred_h7_log_units_sold
i32,i32,i32,i32,i32,i32,i32
31,25,33,33,26,25,24
18,39,45,33,30,33,31
33,37,28,25,29,26,25
31,26,25,28,25,27,27
25,23,26,24,26,26,25
…,…,…,…,…,…,…
22,21,26,21,21,22,17
16,24,21,20,22,18,21
22,20,19,20,19,21,22
21,19,20,18,21,23,22


In [19]:
predictions_df

pred_h1_log_units_sold,pred_h2_log_units_sold,pred_h3_log_units_sold,pred_h4_log_units_sold,pred_h5_log_units_sold,pred_h6_log_units_sold,pred_h7_log_units_sold
f64,f64,f64,f64,f64,f64,f64
9.126813,7.200412,6.445803,6.444699,7.49126,7.779622,7.605497
9.305963,7.200412,6.445803,6.444699,7.49126,7.779622,7.580261
8.257113,7.200412,6.445803,6.444699,7.49126,7.779622,6.839527
9.176438,7.200412,6.445803,6.444699,7.49126,7.779622,7.307474
8.807521,7.200412,6.445803,6.444699,7.49126,7.779622,7.438648
…,…,…,…,…,…,…
7.946477,6.972132,6.380788,6.298425,7.49126,7.837717,6.727163
8.042556,6.972132,6.380788,6.298425,7.49126,7.837717,7.121872
8.51257,6.972132,6.380788,6.298425,7.49126,7.779622,6.906885
9.116579,6.972132,5.742553,5.651673,7.462812,7.779622,7.570501


In [None]:
prediction_range = [today + td(days=h) for h in range(forecaster.horizons)]

predictions_df = x.select(
    pl.col.product_id.alias("dp_p_id"),
    pl.col.store_id.alias("dp_s_id"),
    pl.lit(prediction_range).alias("dp_date").cast(pl.String),
    pl.concat_list(predictions_df).alias("dp_mean")
).explode("dp_date", "dp_mean")

In [1]:
from requests import post

API_URI = "http://localhost:8000"

def upload_json(data, endpoint):
    url = f"{API_URI}/{endpoint}"    
    headers = {"Content-Type": "application/json"}
    post_response = post(url, json=data, headers=headers)
    print(url, post_response.status_code)
    if post_response.status_code != 200:
        print("Error:", post_response.text)
        return {}
    return post_response.json()

In [None]:
dp_ids = upload_json(predictions_df.to_dicts(), "demandpredictions")

http://localhost:8000/demandpredictions 200


{'demandpredictions': [1,
  2,
  3,
  4,
  5,
  6,
  7,
  8,
  9,
  10,
  11,
  12,
  13,
  14,
  15,
  16,
  17,
  18,
  19,
  20,
  21,
  22,
  23,
  24,
  25,
  26,
  27,
  28,
  29,
  30,
  31,
  32,
  33,
  34,
  35,
  36,
  37,
  38,
  39,
  40,
  41,
  42,
  43,
  44,
  45,
  46,
  47,
  48,
  49,
  50,
  51,
  52,
  53,
  54,
  55,
  56,
  57,
  58,
  59,
  60,
  61,
  62,
  63,
  64,
  65,
  66,
  67,
  68,
  69,
  70,
  71,
  72,
  73,
  74,
  75,
  76,
  77,
  78,
  79,
  80,
  81,
  82,
  83,
  84,
  85,
  86,
  87,
  88,
  89,
  90,
  91,
  92,
  93,
  94,
  95,
  96,
  97,
  98,
  99,
  100,
  101,
  102,
  103,
  104,
  105,
  106,
  107,
  108,
  109,
  110,
  111,
  112,
  113,
  114,
  115,
  116,
  117,
  118,
  119,
  120,
  121,
  122,
  123,
  124,
  125,
  126,
  127,
  128,
  129,
  130,
  131,
  132,
  133,
  134,
  135,
  136,
  137,
  138,
  139,
  140,
  141,
  142,
  143,
  144,
  145,
  146,
  147,
  148,
  149,
  150,
  151,
  152,
  153,
  154,
  155,
  

In [2]:
import sys
from datetime import date, timedelta
from pathlib import Path

import click
import polars as pl

# Agregar el directorio padre (services) al path
current_file = Path("predict.py")
services_dir = current_file.parent.parent
sys.path.insert(0, "..")

from shared.s3config import get_s3_params

In [3]:
s3_path, s3_storage_options = get_s3_params("2016-08-15")
s3_demand_predictions_path = s3_path + "/demand_predictions.parquet"
df_demand_predictions = pl.read_parquet(
    s3_demand_predictions_path, storage_options=s3_storage_options
)

In [7]:
predictions_df = df_demand_predictions.with_columns(
    pl.col("dp_date").cast(pl.String)
)

In [8]:
dp_ids = upload_json(predictions_df.to_dicts(), "demandpredictions")

http://localhost:8000/demandpredictions 200
