In [1]:
# Reg fetch new batch of features and compute predictions and save to feature store
# 

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import sys
import os

# Add the parent directory to the Python path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))
import src.config as config

In [4]:
from src.inference import get_feature_store
from datetime import datetime, timedelta
import pandas as pd  

# Get the current datetime64[us, Etc/UTC]  
current_date = pd.Timestamp.now(tz='Etc/UTC')
feature_store = get_feature_store()

# read time-series data from the feature store
fetch_data_to = current_date - timedelta(hours=1)
fetch_data_from = current_date - timedelta(days=1*29)
print(f"Fetching data from {fetch_data_from} to {fetch_data_to}")
feature_view = feature_store.get_feature_view(
    name=config.FEATURE_VIEW_NAME, version=config.FEATURE_VIEW_VERSION
)

ts_data = feature_view.get_batch_data(
    start_time=(fetch_data_from - timedelta(days=1)),
    end_time=(fetch_data_to + timedelta(days=1)),
)
ts_data = ts_data[ts_data.pickup_hour.between(fetch_data_from, fetch_data_to)]
ts_data.sort_values(["pickup_location_id", "pickup_hour"]).reset_index(drop=True)
ts_data["pickup_hour"] = ts_data["pickup_hour"].dt.tz_localize(None)

from src.data_utils import transform_ts_data_info_features
features = transform_ts_data_info_features(ts_data, window_size=24*28, step_size=23)

2025-03-03 02:16:11,999 INFO: Initializing external client
2025-03-03 02:16:11,999 INFO: Base URL: https://c.app.hopsworks.ai:443
2025-03-03 02:16:12,932 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1215651
Fetching data from 2025-02-02 07:16:11.999039+00:00 to 2025-03-03 06:16:11.999039+00:00
Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (4.83s) from Hopsworks, using Hopsworks Feature Query Service.   Reading data from Hopsworks, using Hopsworks Feature Query Service...   Reading data from Hopsworks, using Hopsworks Feature Query Service.   


In [5]:
from src.inference import load_model_from_registry

model = load_model_from_registry()

2025-03-03 02:16:27,484 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-03-03 02:16:27,486 INFO: Initializing external client
2025-03-03 02:16:27,486 INFO: Base URL: https://c.app.hopsworks.ai:443
2025-03-03 02:16:28,262 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1215651
Downloading model artifact (0 dirs, 1 files)... DONE

In [6]:
from src.inference import get_model_predictions
predictions = get_model_predictions(model, features)

In [7]:
predictions["pickup_hour"] = current_date.ceil('h')
predictions

Unnamed: 0,pickup_location_id,predicted_demand,pickup_hour
0,263,93.0,2025-03-03 08:00:00+00:00
1,210,0.0,2025-03-03 08:00:00+00:00
2,216,1.0,2025-03-03 08:00:00+00:00
3,147,0.0,2025-03-03 08:00:00+00:00
4,217,1.0,2025-03-03 08:00:00+00:00
...,...,...,...
246,67,0.0,2025-03-03 08:00:00+00:00
247,62,1.0,2025-03-03 08:00:00+00:00
248,86,0.0,2025-03-03 08:00:00+00:00
249,142,163.0,2025-03-03 08:00:00+00:00


In [8]:
from src.inference import get_feature_store

feature_group = get_feature_store().get_or_create_feature_group(
    name=config.FEATURE_GROUP_MODEL_PREDICTION,
    version=1,
    description="Predictions from LGBM Model",
    primary_key=["pickup_location_id", "pickup_hour"],
    event_time="pickup_hour",
)

2025-03-03 02:16:43,778 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-03-03 02:16:43,780 INFO: Initializing external client
2025-03-03 02:16:43,781 INFO: Base URL: https://c.app.hopsworks.ai:443
2025-03-03 02:16:44,599 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1215651


In [9]:
feature_group.insert(predictions, write_options={"wait_for_job": False})

Uploading Dataframe: 100.00% |██████████| Rows 251/251 | Elapsed Time: 00:01 | Remaining Time: 00:00


Launching job: taxi_hourly_model_prediction_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai:443/p/1215651/jobs/named/taxi_hourly_model_prediction_1_offline_fg_materialization/executions


(Job('taxi_hourly_model_prediction_1_offline_fg_materialization', 'SPARK'),
 None)