In [15]:
# üìÇ Imports and Setup
import sys
import os

# Add parent directory to the Python path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))

# üõ† Project imports
import src.config as config
from src.citi_interface import (
    get_feature_store,
    load_model_from_local
)

from datetime import datetime, timedelta
import pandas as pd
import pytz

# ==============================
# üîë Hopsworks Connection
# ==============================

# Connect to Feature Store
feature_store = get_feature_store()

# ==============================
# üö≤ Fetch Citi Bike Data (January to March 2025)
# ==============================

# Fixed Start and End
fetch_data_from = pd.Timestamp("2025-01-01 00:00:00", tz="Etc/UTC")
fetch_data_to = pd.Timestamp("2025-03-31 23:59:59", tz="Etc/UTC")

print(f"üìÖ Fetching Citi Bike data from {fetch_data_from} to {fetch_data_to}...")

feature_view = feature_store.get_feature_view(
    name=config.FEATURE_VIEW_NAME,
    version=config.FEATURE_VIEW_VERSION
)

# Fetch batch data
ts_data = feature_view.get_batch_data(
    start_time=(fetch_data_from - timedelta(days=1)),
    end_time=(fetch_data_to + timedelta(days=1)),
)

# Filter exact range
ts_data = ts_data[ts_data["hour_ts"].between(fetch_data_from, fetch_data_to)]

# Sort and Reset
ts_data = ts_data.sort_values(["start_station_id", "hour_ts"]).reset_index(drop=True)

print(f"‚úÖ Timeseries data shape after filtering: {ts_data.shape}")

# üõë Early Exit if No Data
if ts_data.empty:
    print("‚ö†Ô∏è No Citi Bike data available in Feature View for the selected period. Exiting.")
    sys.exit(0)

# ==============================
# üì¶ Load Trained Model
# ==============================

model = load_model_from_local()

# ==============================
# ‚öôÔ∏è Prepare Features for Prediction
# ==============================

# Always drop these non-feature columns
non_feature_cols = ["hour_ts", "start_station_name", "time_of_day"]

# Exact trained feature columns (688 features total)
trained_features = [
    "hour", "hour_sin", "hour_cos", "day_of_week", "is_holiday_or_weekend",
    "month", "is_peak_hour", "day_of_year", "ride_count_roll3"
] + [f"ride_count_lag_{i}" for i in range(1, 679)] + ["target_ride_count"]

# Prepare X
X = ts_data.drop(columns=non_feature_cols, errors="ignore")

# Fill missing columns with 0
for col in trained_features:
    if col not in X.columns:
        print(f"‚ö†Ô∏è Missing column: {col}. Filling with 0.")
        X[col] = 0

# Reorder properly
X = X[trained_features]

# ‚úÖ Confirm feature shape
print(f"‚úÖ Final feature shape for prediction: {X.shape}")

# ==============================
# üîÆ Predict Ride Counts
# ==============================

# Predict
predictions = model.predict(X)

# ==============================
# üõ† Build Prediction Results
# ==============================

# Create result DataFrame
# Create result DataFrame
results = pd.DataFrame()
results["start_station_id"] = ts_data["start_station_id"].values

# ‚úÖ Correct UTC ‚ûî EST timezone conversion
results["hour_ts_est"] = pd.to_datetime(ts_data["hour_ts"]).dt.tz_convert('America/New_York')

# Predicted ride counts
results["predicted_ride_count"] = predictions

print(f"‚úÖ Predictions completed. Shape: {results.shape}")


# ==============================
# üö¥ Top 10 Stations by Predicted Demand
# ==============================

top_5_locations = results.sort_values("predicted_ride_count", ascending=False).head(5)

print("\nüèÜ Top 5 Stations by Predicted Demand:")
print(top_5_locations[["start_station_id", "hour_ts_est", "predicted_ride_count"]])


2025-05-11 03:23:48,745 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-05-11 03:23:48,754 INFO: Initializing external client
2025-05-11 03:23:48,755 INFO: Base URL: https://c.app.hopsworks.ai:443
2025-05-11 03:23:49,349 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1231002
üìÖ Fetching Citi Bike data from 2025-01-01 00:00:00+00:00 to 2025-03-31 23:59:59+00:00...
Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (10.25s) 
‚úÖ Timeseries data shape after filtering: (10071, 686)
‚úÖ Loaded model from C:\Users\MD\Desktop\citi\models\lgbmhyper.pkl
‚ö†Ô∏è Missing column: ride_count_lag_673. Filling with 0.
‚ö†Ô∏è Missing column: ride_count_lag_674. Filling with 0.
‚ö†Ô∏è Missing column: ride_count_lag_675. Filling with 0.
‚ö†Ô∏è Missing column: ride_count_lag_676. Filling with 0.
‚ö†Ô∏è Missing column: ride_count_lag_677. Filling with 0.
‚ö†Ô∏è Missing column: ride_coun