# Install feast

In [14]:
!pip install feast==0.29.0







# Initialize the feast project with project name 'my_project'


In [47]:
# create a new project folder for the demo
!feast init my_project


Creating a new Feast repository in c:\Users\fabri\OneDrive\Documents\Demo\demo_feast\my_project.



#### When you go to 'my_project' you can see the file structure:


In [None]:
'''
feature_repo
├── data 
│ 	└── driver_stats.csv           ---- sample data
    └── online_store.db            ---- online database for low latency serving
    └── registry.db                ---- shareable registry database where metadata is stored about the features and entities when they are registered and updated 
├── example_repo.py 	           ---- definition of the pipeline functions (Feature views)	 

└── feature_store.yaml             ---- configuration files (link to databases, connection string) 

'''

#### Create an application using a feast feature store:

The application just output data sourced from a feature view.
Next, you will make changes in the application by updating an already existing repo and see the changes in the application

In [None]:
# Install streamlit
!pip install streamlit

#### Now make change in the example_repo_2.py

In [None]:
%%writefile my_project/feature_repo/example_repo.py
# This is an example feature definition file with changes made to the file
import os

from datetime import timedelta
import pandas as pd
from feast import (
    Entity,
    Field,
    FeatureView,
    FileSource,
    PushSource,
    RequestSource,
)
from pathlib import Path
from feast.feature_service import FeatureService
from feast.on_demand_feature_view import on_demand_feature_view
from feast.types import Int64, Float32, Float64

DATA_DIR = Path(os.getcwd(), "data")
# Define an entity for the driver. You can think of entity as a primary key used to
# fetch features.
driver = Entity(name="driver", join_keys=["driver_id"])

# Read data from parquet files. Parquet is convenient for local development mode. For
# production, you can use your favorite DWH, such as BigQuery. See Feast documentation
# for more info.
driver_stats_source = FileSource(
    name="driver_hourly_stats_source",
    path=str(Path(DATA_DIR, "driver_stats.parquet")),
    timestamp_field="event_timestamp",
    created_timestamp_column="created",
)

# Our parquet files contain sample data that includes a driver_id column, timestamps and
# three feature column. Here we define a Feature View that will allow us to serve this
# data to our model online.
driver_stats_fv = FeatureView(
    # The unique name of this feature view. Two feature views in a single
    # project cannot have the same name
    name="driver_hourly_stats",
    entities=[driver],
    ttl=timedelta(days=1),
    # The list of features defined below act as a schema to both define features
    # for both materialization of features into a store, and are used as references
    # during retrieval for building a training dataset or serving features
    schema=[
        Field(name="conv_rate", dtype= Float32),
        Field(name="acc_rate", dtype= Float32),
        Field(name="avg_daily_trips", dtype= Int64),
    ],
    online=True,
    source=driver_stats_source,
    # Tags are user defined key/value pairs that are attached to each
    # feature view
    tags={"team": "driver_performance"},
)

# Defines a way to push data (to be available offline, online or both) into Feast.
driver_stats_push_source = PushSource(
    name="driver_stats_push_source",
    batch_source=driver_stats_source,
)

# Define a request data source which encodes features / information only
# available at request time (e.g. part of the user initiated HTTP request)
input_request = RequestSource(
    name="vals_to_add",
    schema=[
        Field(name="val_to_add", dtype=Int64),
        Field(name="val_to_add_2", dtype=Int64),
    ],
)


# Define an on demand feature view which can generate new features based on
# existing feature views and RequestSource features
@on_demand_feature_view(
    sources=[driver_stats_fv, input_request],
    schema=[
        Field(name="conv_rate_plus_val1", dtype=Float64),
        Field(name="conv_rate_plus_val2", dtype=Float64),
    ],
)
def transformed_conv_rate(inputs: pd.DataFrame) -> pd.DataFrame:
    df = pd.DataFrame()
    df["conv_rate_plus_val1"] = inputs["conv_rate"] - inputs["val_to_add"]
    df["conv_rate_plus_val2"] = inputs["conv_rate"] - inputs["val_to_add_2"]
    return df


# This groups features into a model version
driver_activity_v1 = FeatureService(
    name="driver_activity_v1",
    features=[
        driver_stats_fv[["conv_rate"]],  # Sub-selects a feature from a feature view
        transformed_conv_rate,  # Selects all features from the feature view
    ],
)
driver_activity_v2 = FeatureService(
    name="driver_activity_v2", features=[driver_stats_fv, transformed_conv_rate]
)

#### create streamlit visual to see changes you can make on the app

In [66]:
%%writefile my_project/streamlit_app.py
import pandas as pd
import streamlit as st
from feast import FeatureStore
from datetime import datetime
from pathlib import Path
import os

repo_dir = os.path.abspath(os.path.realpath(__file__)).replace("streamlit_app.py", "feature_repo")


#define entity_df

entity_df = pd.DataFrame.from_dict(
    {
        # entity's join key -> entity values
        "driver_id": [1001, 1002, 1003],
        # "event_timestamp" (reserved key) -> timestamps
        "event_timestamp": [
            datetime(2021, 4, 12, 10, 59, 42),
            datetime(2021, 4, 12, 8, 12, 10),
            datetime(2021, 4, 12, 16, 40, 26),
        ],
        # (optional) label name -> label values. Feast does not process these
        "label_driver_reported_satisfaction": [1, 5, 3],
        # values we're using for an on-demand transformation
        "val_to_add": [1, 2, 3],
        "val_to_add_2": [10, 20, 30],
    }
)

store = FeatureStore(repo_path=repo_dir)

training_df = store.get_historical_features(entity_df=entity_df, features=["driver_hourly_stats:conv_rate","driver_hourly_stats:acc_rate","driver_hourly_stats:avg_daily_trips","transformed_conv_rate:conv_rate_plus_val1","transformed_conv_rate:conv_rate_plus_val2"]).to_df()

if st.button("Click me"):
    st.write(training_df.head(3))

UsageError: %%writefile is a cell magic, but the cell body is empty.


In [61]:
# Submit your changes in the registry of feast
!cd my_project/feature_repo && feast apply

Updated feature view driver_hourly_stats
	batch_source: type: BATCH_FILE
timestamp_field: "datetime"
created_timestamp_column: "created"
file_options {
  uri: "c:\\Users\\fabri\\OneDrive\\Documents\\Demo\\demo_feast\\my_project\\feature_repo\\data\\driver_stats.parquet"
}
data_source_class_type: "feast.infra.offline_stores.file_source.FileSource"

  schema = ParquetDataset(path).schema.to_arrow_schema()



name: "driver_hourly_stats_source"
 -> type: BATCH_FILE
timestamp_field: "event_timestamp"
created_timestamp_column: "created"
file_options {
  uri: "c:\\Users\\fabri\\OneDrive\\Documents\\Demo\\demo_feast\\my_project\\feature_repo\\data\\driver_stats.parquet"
}
data_source_class_type: "feast.infra.offline_stores.file_source.FileSource"
name: "driver_hourly_stats_source"


No changes to infrastructure


#### See the changes on the app:

In [64]:
# Or locally
!streamlit run my_project/streamlit_app.py

^C
