In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
import os

# Add the parent directory to the Python path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))
import src.config as config

In [3]:
from src.inference import get_feature_store

In [4]:
from datetime import datetime, timedelta
import pandas as pd  

# Get the current datetime64[us, Etc/UTC]  
current_date = pd.Timestamp.now(tz='Etc/UTC')
feature_store = get_feature_store()

# read time-series data from the feature store
fetch_data_to = current_date - timedelta(hours=1)
fetch_data_from = current_date - timedelta(days=1*29)
print(f"Fetching data from {fetch_data_from} to {fetch_data_to}")
feature_view = feature_store.get_feature_view(
    name=config.FEATURE_VIEW_NAME, version=config.FEATURE_VIEW_VERSION
)

ts_data = feature_view.get_batch_data(
    start_time=(fetch_data_from - timedelta(days=1)),
    end_time=(fetch_data_to + timedelta(days=1)),
)
ts_data = ts_data[ts_data.pickup_hour.between(fetch_data_from, fetch_data_to)]

2025-03-04 14:42:15,025 INFO: Initializing external client
2025-03-04 14:42:15,026 INFO: Base URL: https://c.app.hopsworks.ai:443
2025-03-04 14:42:15,663 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1215651
Fetching data from 2025-02-03 19:42:15.025011+00:00 to 2025-03-04 18:42:15.025011+00:00
Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (4.90s) from Hopsworks, using Hopsworks Feature Query Service.   Reading data from Hopsworks, using Hopsworks Feature Query Service...   Reading data from Hopsworks, using Hopsworks Feature Query Service.   


In [5]:
ts_data.sort_values(["pickup_location_id", "pickup_hour"]).reset_index(drop=True)

Unnamed: 0,pickup_hour,pickup_location_id,rides
0,2025-02-03 20:00:00+00:00,2,0
1,2025-02-03 21:00:00+00:00,2,0
2,2025-02-03 22:00:00+00:00,2,0
3,2025-02-03 23:00:00+00:00,2,0
4,2025-02-04 00:00:00+00:00,2,0
...,...,...,...
174440,2025-03-04 14:00:00+00:00,263,146
174441,2025-03-04 15:00:00+00:00,263,126
174442,2025-03-04 16:00:00+00:00,263,113
174443,2025-03-04 17:00:00+00:00,263,117


In [6]:
ts_data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 174445 entries, 0 to 180719
Data columns (total 3 columns):
 #   Column              Non-Null Count   Dtype                  
---  ------              --------------   -----                  
 0   pickup_hour         174445 non-null  datetime64[us, Etc/UTC]
 1   pickup_location_id  174445 non-null  int32                  
 2   rides               174445 non-null  int32                  
dtypes: datetime64[us, Etc/UTC](1), int32(2)
memory usage: 4.0 MB


In [7]:
ts_data["pickup_hour"] = ts_data["pickup_hour"].dt.tz_localize(None)

In [8]:
ts_data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 174445 entries, 0 to 180719
Data columns (total 3 columns):
 #   Column              Non-Null Count   Dtype         
---  ------              --------------   -----         
 0   pickup_hour         174445 non-null  datetime64[us]
 1   pickup_location_id  174445 non-null  int32         
 2   rides               174445 non-null  int32         
dtypes: datetime64[us](1), int32(2)
memory usage: 4.0 MB


In [18]:
from src.data_utils import transform_ts_data_info_features
features = transform_ts_data_info_features(ts_data, window_size=24*28, step_size=23)

In [10]:
features

Unnamed: 0,rides_t-672,rides_t-671,rides_t-670,rides_t-669,rides_t-668,rides_t-667,rides_t-666,rides_t-665,rides_t-664,rides_t-663,...,rides_t-8,rides_t-7,rides_t-6,rides_t-5,rides_t-4,rides_t-3,rides_t-2,rides_t-1,pickup_location_id,pickup_hour
0,32,48,92,76,8,155,62,60,80,6,...,39,53,2,16,131,16,100,57,263,2025-02-24 23:00:00
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,210,2025-02-27 05:00:00
2,3,0,0,0,0,2,0,2,0,1,...,1,0,0,1,3,1,0,2,216,2025-02-09 20:00:00
3,0,0,0,0,0,0,1,0,0,0,...,1,0,0,0,0,0,0,1,147,2025-02-13 02:00:00
4,3,2,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,217,2025-02-05 21:00:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
246,0,0,1,0,0,0,2,2,0,1,...,0,3,0,1,0,0,0,0,62,2025-02-05 17:00:00
247,0,0,0,0,0,4,0,1,0,0,...,0,3,0,0,0,0,0,0,86,2025-02-09 23:00:00
248,243,130,148,572,217,231,173,141,160,338,...,209,241,152,223,261,5,198,138,142,2025-02-08 08:00:00
249,6,5,22,5,23,2,6,1,14,28,...,10,10,4,3,11,4,1,2,74,2025-02-24 03:00:00


In [11]:
from src.inference import load_batch_of_features_from_store
current_date = pd.Timestamp.now(tz='Etc/UTC')
features = load_batch_of_features_from_store(current_date)

2025-03-04 14:42:30,113 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-03-04 14:42:30,116 INFO: Initializing external client
2025-03-04 14:42:30,116 INFO: Base URL: https://c.app.hopsworks.ai:443
2025-03-04 14:42:30,660 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1215651
Fetching data from 2025-02-03 19:42:30.113573+00:00 to 2025-03-04 18:42:30.113573+00:00
Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (4.44s) from Hopsworks, using Hopsworks Feature Query Service.   Reading data from Hopsworks, using Hopsworks Feature Query Service...   Reading data from Hopsworks, using Hopsworks Feature Query Service.   


In [12]:
current_date

Timestamp('2025-03-04 19:42:30.113573+0000', tz='Etc/UTC')

In [13]:
features

Unnamed: 0,rides_t-672,rides_t-671,rides_t-670,rides_t-669,rides_t-668,rides_t-667,rides_t-666,rides_t-665,rides_t-664,rides_t-663,...,rides_t-8,rides_t-7,rides_t-6,rides_t-5,rides_t-4,rides_t-3,rides_t-2,rides_t-1,pickup_location_id,pickup_hour
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,2,2025-03-03 20:00:00
1,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,3,2025-03-03 20:00:00
2,3,3,6,2,3,6,2,0,1,1,...,1,2,2,3,0,1,1,0,4,2025-03-03 20:00:00
3,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,6,2025-03-03 20:00:00
4,1,2,0,0,0,0,1,0,0,1,...,8,3,4,1,3,4,1,5,7,2025-03-03 20:00:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
246,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,259,2025-03-03 20:00:00
247,0,1,0,0,0,0,0,0,0,0,...,4,3,2,5,1,6,1,1,260,2025-03-03 20:00:00
248,17,16,11,3,1,3,0,0,2,2,...,25,36,32,26,33,43,45,24,261,2025-03-03 20:00:00
249,34,22,4,6,1,3,1,1,3,19,...,79,71,82,97,87,82,103,65,262,2025-03-03 20:00:00


In [14]:
from src.inference import load_model_from_registry

model = load_model_from_registry()

2025-03-04 14:42:41,165 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-03-04 14:42:41,167 INFO: Initializing external client
2025-03-04 14:42:41,167 INFO: Base URL: https://c.app.hopsworks.ai:443
2025-03-04 14:42:41,828 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1215651
Downloading model artifact (0 dirs, 1 files)... DONE

In [15]:
from src.inference import get_model_predictions
predictions = get_model_predictions(model, features)

In [16]:
predictions

Unnamed: 0,pickup_location_id,predicted_demand
0,2,0.0
1,3,0.0
2,4,1.0
3,6,0.0
4,7,2.0
...,...,...
246,259,0.0
247,260,1.0
248,261,16.0
249,262,37.0


In [17]:
predictions.sort_values("predicted_demand", ascending=False).head(10)["pickup_location_id"].values

array([161, 132, 138, 162, 237, 230, 163, 236, 142, 234], dtype=int32)