## Импорты и инициализация

In [1]:
from pathlib import Path

from feast import FeatureStore
import pandas as pd

In [2]:
feast_path = Path("feast")
data_path = Path("feast") / "data" / "driver_stats.parquet"

In [4]:
store = FeatureStore(repo_path=feast_path)

## Проверка Pandas

In [5]:
df = pd.read_parquet(data_path)
df.info()
df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1808 entries, 0 to 1807
Data columns (total 6 columns):
 #   Column           Non-Null Count  Dtype              
---  ------           --------------  -----              
 0   event_timestamp  1808 non-null   datetime64[ns, UTC]
 1   driver_id        1808 non-null   int64              
 2   conv_rate        1808 non-null   float32            
 3   acc_rate         1808 non-null   float32            
 4   avg_daily_trips  1808 non-null   int32              
 5   created          1808 non-null   datetime64[us]     
dtypes: datetime64[ns, UTC](1), datetime64[us](1), float32(2), int32(1), int64(1)
memory usage: 63.7 KB


Unnamed: 0,event_timestamp,driver_id,conv_rate,acc_rate,avg_daily_trips,created
0,2024-10-17 12:07:08.228578+00:00,1001,1.0,1.0,1000,2024-10-17 12:07:08.228581
1,2024-10-02 11:00:00+00:00,1005,0.429879,0.194598,582,2024-10-17 11:30:07.072000
2,2024-10-02 12:00:00+00:00,1005,0.230119,0.642878,551,2024-10-17 11:30:07.072000
3,2024-10-02 13:00:00+00:00,1005,0.1286,0.674187,38,2024-10-17 11:30:07.072000
4,2024-10-02 14:00:00+00:00,1005,0.400603,0.473636,583,2024-10-17 11:30:07.072000


## Проверка доступности Feast

In [None]:
print(
    f"FeatureView:         {', '.join(fv.name for fv in store.list_batch_feature_views())}"
)
print(
    f"OnDemandFeatureView: {', '.join(odfv.name for odfv in store.list_on_demand_feature_views())}"
)

FeatureView:         driver_activity, driver_stats
OnDemandFeatureView: activity_score


## Запрос исторических признаков (offline)

In [None]:
# Выбор первых трех водителей и их последних записей
entity_df = pd.DataFrame(df.driver_id.unique()[:3], columns=["driver_id"])
entity_df["event_timestamp"] = df.groupby("driver_id")[
    "event_timestamp"
].transform("max")
entity_df

Unnamed: 0,driver_id,event_timestamp
0,1001,2024-10-17 12:07:08.228578+00:00
1,1005,2024-10-17 10:00:00+00:00
2,1004,2024-10-17 10:00:00+00:00


In [None]:
feature_list = [
    "driver_stats:conv_rate",
    "driver_stats:acc_rate",
    "driver_stats:avg_daily_trips",
    "driver_activity:event_timestamp",
    "driver_activity:created",
    "activity_score:activity_score",
]

# Получаем исторические признаки
historical_features = store.get_historical_features(
    entity_df=entity_df,
    features=feature_list,
)

# Конвертируем в DataFrame
training_df = historical_features.to_df()
training_df



Unnamed: 0,driver_id,event_timestamp,conv_rate,acc_rate,avg_daily_trips,created,activity_score
0,1004,2024-10-17 10:00:00+00:00,0.347947,0.661438,610,2024-10-17 11:30:07.072000+00:00,0.785397
1,1005,2024-10-17 10:00:00+00:00,0.530015,0.752123,228,2024-10-17 11:30:07.072000+00:00,2.301972
2,1001,2024-10-17 12:07:08.228578+00:00,1.0,1.0,1000,2024-10-17 12:07:08.228581+00:00,1.612667


## Online-запрос

In [None]:
entity_rows = [{"driver_id": int(id)} for id in entity_df.driver_id]

In [38]:
store.get_online_features(features=feature_list, entity_rows=entity_rows)



<feast.online_response.OnlineResponse at 0x1c51a9e2f90>

А вот тут я так и не понял, что я делал не так. У меня в базе данных online всего 5 строк с какой-то ерундой. Ни собственный ни искуственный мозги мне не помогли разобраться, что не так. Так что отправляю как есть, может Вы натолкнёте на правильное решение :)