In [34]:
import feast
import pandas as pd

In [35]:
from feast import FeatureStore, FeatureService, RepoConfig

In [36]:
repo_config = RepoConfig(
    project = 'default',
    registry = 'data/registry.db',
    provider = 'local',
    core_url = 'http://127.0.0.1:6566'
)



In [37]:
fs = FeatureStore(repo_path = '/Users/annamalaivr/feast/demo_store/feature_repo/')

In [38]:
fs.get_feature_view('iris_fv')

<FeatureView(name = iris_fv, entities = ['irisid'], ttl = 2 days, 0:00:00, stream_source = None, batch_source = {
  "type": "BATCH_FILE",
  "timestampField": "event_timestamp",
  "fileOptions": {
    "uri": "/Users/annamalaivr/Downloads/predictors_df.parquet"
  },
  "name": "iris_stats_source"
}, entity_columns = [irisid-Int64], features = [sepal_length-Float64, sepal_width-Float64, petal_length-Float64, petal_width-Float64], description = , tags = {}, owner = , projection = FeatureViewProjection(name='iris_fv', name_alias=None, desired_features=[], features=[sepal_length-Float64, sepal_width-Float64, petal_length-Float64, petal_width-Float64], join_key_map={}), created_timestamp = 2024-06-08 18:00:47.802609, last_updated_timestamp = 2024-06-08 18:00:47.802609, online = True, materialization_intervals = [])>

In [39]:
entity_df = pd.read_parquet('target_df.parquet')

In [40]:
training_df = fs.get_historical_features(
    entity_df = entity_df,
    features = [
        "iris_fv:sepal_length",
        "iris_fv:sepal_width",
        "iris_fv:petal_length",
        "iris_fv:petal_width"
    ]
)

In [43]:
train_df = training_df.to_df()

In [73]:
train_df

Unnamed: 0,event_timestamp,irisid,species,sepal_length,sepal_width,petal_length,petal_width
0,2024-01-11 21:15:55.321761+00:00,0,Iris-setosa,5.1,3.5,1.4,0.2
1,2024-01-12 21:15:55.321761+00:00,1,Iris-setosa,4.9,3.0,1.4,0.2
2,2024-01-13 21:15:55.321761+00:00,2,Iris-setosa,4.7,3.2,1.3,0.2
3,2024-01-14 21:15:55.321761+00:00,3,Iris-setosa,4.6,3.1,1.5,0.2
4,2024-01-15 21:15:55.321761+00:00,4,Iris-setosa,5.0,3.6,1.4,0.2
...,...,...,...,...,...,...,...
145,2024-06-04 21:15:55.321761+00:00,145,Iris-virginica,6.7,3.0,5.2,2.3
146,2024-06-05 21:15:55.321761+00:00,146,Iris-virginica,6.3,2.5,5.0,1.9
147,2024-06-06 21:15:55.321761+00:00,147,Iris-virginica,6.5,3.0,5.2,2.0
148,2024-06-07 21:15:55.321761+00:00,148,Iris-virginica,6.2,3.4,5.4,2.3


In [42]:
from feast.infra.offline_stores.file_source import SavedDatasetFileStorage

In [46]:
dataset = fs.create_saved_dataset(from_ = training_df,
                                 name='iris_dataset',
                                 storage = SavedDatasetFileStorage("/Users/annamalaivr/feast/demo_store/feature_repo/data/iris_dataset.parquet"))



In [47]:
import sklearn

In [48]:
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

In [50]:
df = train_df[['sepal_length','sepal_width','petal_length','petal_width','species']]

In [51]:
df.head(3)

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa


In [52]:
encoder = LabelEncoder()

In [53]:
df['species'] = encoder.fit_transform(df['species'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['species'] = encoder.fit_transform(df['species'])


In [54]:
X = df.iloc[:,:-1].values
y = df.iloc[:,-1].values

In [55]:
X_train, X_test, y_train, y_test = train_test_split(X,y, train_size=0.80)

In [56]:
model = LogisticRegression()

In [57]:
model.fit(X_train,y_train)

In [58]:
ans = model.predict(X_test)

In [59]:
from sklearn.metrics import accuracy_score, confusion_matrix

In [60]:
accuracy_score(ans,y_test)

0.9333333333333333

In [61]:
confusion_matrix(ans, y_test)

array([[ 8,  0,  0],
       [ 0,  9,  1],
       [ 0,  1, 11]])

In [63]:
from datetime import datetime

fs.materialize_incremental(end_date=datetime.now())

Materializing [1m[32m2[0m feature views to [1m[32m2024-06-08 23:40:06+05:30[0m into the [1m[32msqlite[0m online store.

[1m[32miris_fv[0m from [1m[32m2024-06-06 18:10:06+05:30[0m to [1m[32m2024-06-08 23:40:06+05:30[0m:


100%|████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 516.18it/s]


[1m[32mtarget_fv[0m from [1m[32m2024-06-06 18:10:06+05:30[0m to [1m[32m2024-06-09 05:10:06+05:30[0m:


100%|███████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 1834.78it/s]


# Online Feature Store

In [64]:
from feast import FeatureStore

In [65]:
store = FeatureStore(repo_path='/Users/annamalaivr/feast/demo_store/feature_repo/')

In [67]:
store.list_feature_views()

[<FeatureView(name = iris_fv, entities = ['irisid'], ttl = 2 days, 0:00:00, stream_source = None, batch_source = {
   "type": "BATCH_FILE",
   "timestampField": "event_timestamp",
   "fileOptions": {
     "uri": "/Users/annamalaivr/Downloads/predictors_df.parquet"
   },
   "name": "iris_stats_source"
 }, entity_columns = [irisid-Int64], features = [sepal_length-Float64, sepal_width-Float64, petal_length-Float64, petal_width-Float64], description = , tags = {}, owner = , projection = FeatureViewProjection(name='iris_fv', name_alias=None, desired_features=[], features=[sepal_length-Float64, sepal_width-Float64, petal_length-Float64, petal_width-Float64], join_key_map={}), created_timestamp = 2024-06-08 18:00:47.802609, last_updated_timestamp = 2024-06-08 18:10:06.566934, online = True, materialization_intervals = [(datetime.datetime(2024, 6, 6, 18, 10, 6, 407802, tzinfo=<UTC>), datetime.datetime(2024, 6, 8, 23, 40, 6, 402713, tzinfo=<UTC>))])>,
 <FeatureView(name = target_fv, entities = 

In [77]:
feast_features = [
        "iris_fv:sepal_length",
        "iris_fv:sepal_width",
        "iris_fv:petal_length",
    ]

In [91]:
today = datetime.today()
today

datetime.datetime(2024, 6, 8, 23, 50, 24, 131401)

In [97]:
features = store.get_online_features(
  features= feast_features,
  entity_rows = [{"event_timestamp":today}]
).to_dict()

EntityNotFoundException: Entity event_timestamp does not exist in project demo_store

In [88]:
rows = pd.DataFrame.from_dict(features)

In [89]:
rows

Unnamed: 0,irisid,sepal_width,petal_length,sepal_length
0,148,3.4,5.4,6.2
1,147,3.0,5.2,6.5
