In [40]:
import subprocess
from datetime import datetime

import pandas as pd

from feast import FeatureStore
from feast.data_source import PushMode

In [41]:
store = FeatureStore(repo_path="flight_feature_repo/feature_repo")

In [56]:
def fetch_historical_features_entity_df(store: FeatureStore, searchdate: datetime):
    entity_df = pd.DataFrame.from_dict(
        {
            "flight_ID": [
                "WN_3609",
                "WN_3610",
                "WN_3611"
            ],
            "event_timestamp": [
                 searchdate,
                 searchdate,
                 searchdate]
        }
    )

    training_df = store.get_historical_features(
        entity_df=entity_df,
        features=[
            "flight_stats:Distance",
            "flight_stats:CRSElapsedTime",
            "flight_stats:DayOfWeek",
            "flight_stats:Month",
            "flight_stats:WeatherDelay",
            "flight_stats:NASDelay",
        ],
    ).to_df()
    print(training_df.head())

In [46]:
def fetch_online_features(store, source: str = ""):
    entity_rows = [
        {
            "flight_ID": "WN_3609",
        },
        {
            "flight_ID": "WN_3610",
        },
        {
            "flight_ID": "WN_3611",
        }
    ]

    if source == "feature_service":
        features_to_fetch = store.get_feature_service("flight_prediction_v1")
    elif source == "advanced_feature_service":
        features_to_fetch = store.get_feature_service("flight_prediction_v2")
    elif source == "push":
        features_to_fetch = store.get_feature_service("flight_prediction_v3")
    else:
        features_to_fetch = [
            "flight_stats:Distance",
            "flight_stats:WeatherDelay",
        ]

    returned_features = store.get_online_features(
        features=features_to_fetch,
        entity_rows=entity_rows,
    ).to_dict()

    for key, value in sorted(returned_features.items()):
        print(key, " : ", value)

In [47]:
fetch_historical_features_entity_df(store)

+----------------------------+------------+-------------+
| Merge columns              | left dtype | right dtype |
+----------------------------+------------+-------------+
| ('flight_ID', 'flight_ID') | object     | string      |
+----------------------------+------------+-------------+
Cast dtypes explicitly to avoid unexpected results.


  flight_ID           event_timestamp  Distance  CRSElapsedTime  DayOfWeek  \
0   WN_3609 2024-09-01 00:00:00+00:00     619.0           105.0          7   
1   WN_3610 2024-09-01 00:00:00+00:00    1670.0           250.0          7   

   Month  WeatherDelay  NASDelay  
0      9           NaN       NaN  
1      9           NaN       NaN  


In [48]:
from datetime import datetime

start_date = datetime.strptime('2024-09-01', '%Y-%m-%d')
end_date = datetime.strptime('2024-09-30', '%Y-%m-%d')

store.materialize(start_date=start_date, end_date=end_date)

Materializing [1m[32m2[0m feature views from [1m[32m2024-09-01 00:00:00+00:00[0m to [1m[32m2024-09-30 00:00:00+00:00[0m into the [1m[32msqlite[0m online store.

[1m[32mflight_stats[0m:


100%|███████████████████████████████████████████████████████| 21956/21956 [00:09<00:00, 2204.61it/s]


[1m[32mflight_stats_fresh[0m:


100%|███████████████████████████████████████████████████████| 21956/21956 [00:07<00:00, 2979.28it/s]


In [49]:
fetch_online_features(store)

Distance  :  [392.0, 787.0, 879.0]
WeatherDelay  :  [None, 0.0, None]
flight_ID  :  ['WN_3609', 'WN_3610', 'WN_3611']


In [50]:
print("\n--- Online features retrieved through feature service v1 ---")
fetch_online_features(store, source="feature_service")


--- Online features retrieved through feature service v1 ---
CRSElapsedTime  :  [80.0, 145.0, 150.0]
DayOfWeek  :  [7, 7, 6]
Distance  :  [392.0, 787.0, 879.0]
Month  :  [9, 9, 9]
flight_ID  :  ['WN_3609', 'WN_3610', 'WN_3611']


In [51]:
print("\n--- Online features retrieved through feature service v2 ---")
fetch_online_features(store, source="advanced_feature_service")


--- Online features retrieved through feature service v2 ---
ArrDelay  :  [-19.0, 53.0, -6.0]
CRSElapsedTime  :  [80.0, 145.0, 150.0]
CarrierDelay  :  [None, 45.0, None]
DayOfWeek  :  [7, 7, 6]
DepDelay  :  [-6.0, 45.0, 7.0]
Dest  :  ['ICT', 'MCO', 'PHX']
Distance  :  [392.0, 787.0, 879.0]
LateAircraftDelay  :  [None, 0.0, None]
Month  :  [9, 9, 9]
NASDelay  :  [None, 8.0, None]
Origin  :  ['STL', 'BWI', 'DAL']
Quarter  :  [3, 3, 3]
SecurityDelay  :  [None, 0.0, None]
WeatherDelay  :  [None, 0.0, None]
flight_ID  :  ['WN_3609', 'WN_3610', 'WN_3611']


In [52]:
print("\n--- Online features retrieved using feature service v3 (with push source) ---")
fetch_online_features(store, source="push")


--- Online features retrieved using feature service v3 (with push source) ---
CRSElapsedTime  :  [80.0, 145.0, 150.0]
DayOfWeek  :  [7, 7, 6]
DepDelay  :  [-6.0, 45.0, 7.0]
Dest  :  ['ICT', 'MCO', 'PHX']
Distance  :  [392.0, 787.0, 879.0]
Month  :  [9, 9, 9]
NASDelay  :  [None, 8.0, None]
Origin  :  ['STL', 'BWI', 'DAL']
Quarter  :  [3, 3, 3]
WeatherDelay  :  [None, 0.0, None]
flight_ID  :  ['WN_3609', 'WN_3610', 'WN_3611']


In [53]:
print("\n--- Simulate a stream event ingestion ---")
event_df = pd.DataFrame.from_dict(
    {
        "flight_ID": ["WN_3609"],
        "FlightDate": [datetime.now()],
        "Origin": ["ABQ"],
        "Dest": ["AUS"],
        "Distance": [619.0],
        "CRSElapsedTime": [95.0],
        "DayOfWeek": [7],
        "Month": [9],
        "Quarter": [3],
        "DepDelay": [15.0],
        "WeatherDelay": [10.0],
        "NASDelay": [0.0],
        "SecurityDelay": [0.0],      # Added
        "LateAircraftDelay": [0.0],  # Added
        "ArrDelay": [25.0],          # Added (sum of delays)
        "CarrierDelay": [0.0],       # Added
    }
    )
print(event_df)
store.push("flight_stats_push_source", event_df, to=PushMode.ONLINE_AND_OFFLINE)


--- Simulate a stream event ingestion ---
  flight_ID                 FlightDate Origin Dest  Distance  CRSElapsedTime  \
0   WN_3609 2025-01-20 19:03:20.453291    ABQ  AUS     619.0            95.0   

   DayOfWeek  Month  Quarter  DepDelay  WeatherDelay  NASDelay  SecurityDelay  \
0          7      9        3      15.0          10.0       0.0            0.0   

   LateAircraftDelay  ArrDelay  CarrierDelay  
0                0.0      25.0           0.0  


In [54]:
print("\n--- Online features again with updated values from stream push ---")
fetch_online_features(store, source="push")


--- Online features again with updated values from stream push ---
CRSElapsedTime  :  [95.0, 145.0, 150.0]
DayOfWeek  :  [7, 7, 6]
DepDelay  :  [15.0, 45.0, 7.0]
Dest  :  ['AUS', 'MCO', 'PHX']
Distance  :  [619.0, 787.0, 879.0]
Month  :  [9, 9, 9]
NASDelay  :  [0.0, 8.0, None]
Origin  :  ['ABQ', 'BWI', 'DAL']
Quarter  :  [3, 3, 3]
WeatherDelay  :  [10.0, 0.0, None]
flight_ID  :  ['WN_3609', 'WN_3610', 'WN_3611']


In [36]:
flight_pd = pd.read_parquet("flight_feature_repo/feature_repo/data/flights.parquet")

In [37]:
 flight_pd.columns.tolist()

['flight_ID',
 'FlightDate',
 'Origin',
 'Dest',
 'Distance',
 'CRSElapsedTime',
 'DayOfWeek',
 'Month',
 'Quarter',
 'DepDelay',
 'CarrierDelay',
 'WeatherDelay',
 'NASDelay',
 'SecurityDelay',
 'LateAircraftDelay',
 'ArrDelay']

In [59]:
fetch_historical_features_entity_df(store, datetime.now())

+----------------------------+------------+-------------+
| Merge columns              | left dtype | right dtype |
+----------------------------+------------+-------------+
| ('flight_ID', 'flight_ID') | object     | string      |
+----------------------------+------------+-------------+
Cast dtypes explicitly to avoid unexpected results.


  flight_ID                  event_timestamp  Distance  CRSElapsedTime  \
0   WN_3609 2025-01-20 19:07:04.080467+00:00     619.0            95.0   

   DayOfWeek  Month  WeatherDelay  NASDelay  
0          7      9          10.0       0.0  
