In [1]:
from datetime import datetime
import pandas as pd

from feast import FeatureStore

# Note: see https://docs.feast.dev/getting-started/concepts/feature-retrieval for 
# more details on how to retrieve for all entities in the offline store instead
entity_df = pd.DataFrame.from_dict(
    {
        # entity's join key -> entity values
        "driver_id": [1001, 1002, 1003],
        # "event_timestamp" (reserved key) -> timestamps
        "event_timestamp": [
            datetime(2021, 4, 12, 10, 59, 42),
            datetime(2021, 4, 12, 8, 12, 10),
            datetime(2021, 4, 12, 16, 40, 26),
        ],
        # (optional) label name -> label values. Feast does not process these
        "label_driver_reported_satisfaction": [1, 5, 3],
    }
)

store = FeatureStore(repo_path="./good_mallard")

training_df = store.get_historical_features(
    entity_df=entity_df,
    features=[
        "driver_hourly_stats:conv_rate",
        "driver_hourly_stats:acc_rate",
        "driver_hourly_stats:avg_daily_trips",
    ],
).to_df()

print("----- Feature schema -----\n")
print(training_df.info())

print()
print("----- Example features -----\n")
print(training_df.head())

Hive Session ID = e110350d-0ed7-4bd9-a4fc-0ad84b6e4561
                                                                                

----- Feature schema -----

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 6 columns):
 #   Column                              Non-Null Count  Dtype         
---  ------                              --------------  -----         
 0   driver_id                           3 non-null      int64         
 1   event_timestamp                     3 non-null      datetime64[ns]
 2   label_driver_reported_satisfaction  3 non-null      int64         
 3   conv_rate                           3 non-null      float32       
 4   acc_rate                            3 non-null      float32       
 5   avg_daily_trips                     3 non-null      int32         
dtypes: datetime64[ns](1), float32(2), int32(1), int64(2)
memory usage: 236.0 bytes
None

----- Example features -----

   driver_id     event_timestamp  label_driver_reported_satisfaction  \
0       1001 2021-04-12 10:59:42                                   1   
1       1002 2021-04-12 08:12:10

In [2]:
# Offline Inferencing 

entity_df["event_timestamp"] = pd.to_datetime("now", utc=True)
training_df = store.get_historical_features(
    entity_df=entity_df,
    features=[
        "driver_hourly_stats:conv_rate",
        "driver_hourly_stats:acc_rate",
        "driver_hourly_stats:avg_daily_trips",
    ],
).to_df()

print("\n----- Example features -----\n")
print(training_df.head())

                                                                                


----- Example features -----

   driver_id            event_timestamp  label_driver_reported_satisfaction  \
0       1001 2022-11-28 12:27:02.362484                                   1   
1       1002 2022-11-28 12:27:02.362484                                   5   
2       1003 2022-11-28 12:27:02.362484                                   3   

   conv_rate  acc_rate  avg_daily_trips  
0   0.193120  0.052746              802  
1   0.965542  0.295209              967  
2   0.551666  0.524331              388  


In [5]:
%%bash
#ingest data into Online store 
cd good_mallard
CURRENT_TIME=$(date -u +"%Y-%m-%dT%H:%M:%S")
feast materialize-incremental $CURRENT_TIME

Materializing [1m[32m1[0m feature views to [1m[32m2022-11-28 12:33:41+00:00[0m into the [1m[32msqlite[0m online store.

[1m[32mdriver_hourly_stats[0m from [1m[32m2022-11-21 12:33:43+00:00[0m to [1m[32m2022-11-28 12:33:41+00:00[0m:
Pulling latest features from spark offline store


Hive Session ID = 4c7306cc-f8ac-42cb-9c08-4706eb473955
100%|█████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 55.03it/s]


In [6]:
# Fetching Feature vectors for inference
from pprint import pprint
from feast import FeatureStore

store = FeatureStore(repo_path="./good_mallard")

feature_vector = store.get_online_features(
    features=[
        "driver_hourly_stats:conv_rate",
        "driver_hourly_stats:acc_rate",
        "driver_hourly_stats:avg_daily_trips",
    ],
    entity_rows=[
        # {join_key: entity_value}
        {"driver_id": 1004},
        {"driver_id": 1005},
    ],
).to_dict()

pprint(feature_vector)


{'acc_rate': [0.4139282703399658, 0.6532202959060669],
 'avg_daily_trips': [950, 969],
 'conv_rate': [0.8150131106376648, 0.4830891788005829],
 'driver_id': [1004, 1005]}




In [10]:
#USING FEATURE SERVICE TO FETCH ONLINE FEATURES

from pprint import pprint
from feast import FeatureStore

feature_store = FeatureStore('./good_mallard')  # Initialize the feature store

feature_service = feature_store.get_feature_service("driver_activity_v1")
feature_vector = feature_store.get_online_features(
    features=feature_service,
    entity_rows=[
        # {join_key: entity_value}
        {"driver_id": 1004},
        {"driver_id": 1005},
    ],
).to_dict()
pprint(feature_vector)

{'acc_rate': [None, None],
 'avg_daily_trips': [None, None],
 'conv_rate': [None, None],
 'driver_id': [1004, 1005]}


In [None]:
%%bash
#Step 4: Step 4: Browse your features with the Web UI (experimental)
cd good_mallard
feast ui