In [1]:
import feast
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv("IRIS.csv")

In [3]:
df.head(5)

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [4]:
df.reset_index(inplace=True)

In [5]:
df.rename(columns={'index':'irisid'}, inplace=True)

In [6]:
from datetime import datetime, timedelta

In [7]:
timestamp = pd.date_range(end = pd.Timestamp.now(), periods = len(df), freq='D').to_frame(name='event_timestamp', index=False)

In [7]:
# event_timestamp = datetime.now() - timedelta(weeks=1)
# created_timestamp = datetime.now()

# df['created_timestamp'] = created_timestamp
# df['event_timestamp'] = event_timestamp

In [8]:
timestamp

Unnamed: 0,event_timestamp
0,2024-01-11 21:15:55.321761
1,2024-01-12 21:15:55.321761
2,2024-01-13 21:15:55.321761
3,2024-01-14 21:15:55.321761
4,2024-01-15 21:15:55.321761
...,...
145,2024-06-04 21:15:55.321761
146,2024-06-05 21:15:55.321761
147,2024-06-06 21:15:55.321761
148,2024-06-07 21:15:55.321761


In [9]:
df = pd.concat(objs = [df, timestamp], axis=1)

In [10]:
df.head(5)

Unnamed: 0,irisid,sepal_length,sepal_width,petal_length,petal_width,species,event_timestamp
0,0,5.1,3.5,1.4,0.2,Iris-setosa,2024-01-11 21:15:55.321761
1,1,4.9,3.0,1.4,0.2,Iris-setosa,2024-01-12 21:15:55.321761
2,2,4.7,3.2,1.3,0.2,Iris-setosa,2024-01-13 21:15:55.321761
3,3,4.6,3.1,1.5,0.2,Iris-setosa,2024-01-14 21:15:55.321761
4,4,5.0,3.6,1.4,0.2,Iris-setosa,2024-01-15 21:15:55.321761


In [21]:
predictors_df = df[['event_timestamp','sepal_length','petal_length','sepal_width','petal_width', 'irisid']]

In [22]:
predictors_df

Unnamed: 0,event_timestamp,sepal_length,petal_length,sepal_width,petal_width,irisid
0,2024-01-11 21:15:55.321761,5.1,1.4,3.5,0.2,0
1,2024-01-12 21:15:55.321761,4.9,1.4,3.0,0.2,1
2,2024-01-13 21:15:55.321761,4.7,1.3,3.2,0.2,2
3,2024-01-14 21:15:55.321761,4.6,1.5,3.1,0.2,3
4,2024-01-15 21:15:55.321761,5.0,1.4,3.6,0.2,4
...,...,...,...,...,...,...
145,2024-06-04 21:15:55.321761,6.7,5.2,3.0,2.3,145
146,2024-06-05 21:15:55.321761,6.3,5.0,2.5,1.9,146
147,2024-06-06 21:15:55.321761,6.5,5.2,3.0,2.0,147
148,2024-06-07 21:15:55.321761,6.2,5.4,3.4,2.3,148


In [23]:
target_df = df[["event_timestamp", "irisid", "species"]]

In [24]:
target_df.head(3)

Unnamed: 0,event_timestamp,irisid,species
0,2024-01-11 21:15:55.321761,0,Iris-setosa
1,2024-01-12 21:15:55.321761,1,Iris-setosa
2,2024-01-13 21:15:55.321761,2,Iris-setosa


In [25]:
predictors_df.to_parquet('predictors_df.parquet')
target_df.to_parquet('target_df.parquet')

In [26]:
predictors_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 6 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   event_timestamp  150 non-null    datetime64[ns]
 1   sepal_length     150 non-null    float64       
 2   petal_length     150 non-null    float64       
 3   sepal_width      150 non-null    float64       
 4   petal_width      150 non-null    float64       
 5   irisid           150 non-null    int64         
dtypes: datetime64[ns](1), float64(4), int64(1)
memory usage: 7.2 KB


In [27]:
target_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 3 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   event_timestamp  150 non-null    datetime64[ns]
 1   irisid           150 non-null    int64         
 2   species          150 non-null    object        
dtypes: datetime64[ns](1), int64(1), object(1)
memory usage: 3.6+ KB


In [16]:
from feast import Entity
from feast import FeatureView, FileSource, Field

In [17]:
from feast.types import Int32, Int64, Float32, String

In [16]:
plant = Entity(name='irisid', join_keys=['irisid'])

In [17]:
iris_stats_batch_source = FileSource(name='iris_stats_source',
                                    path='iris.parquet',
                                    timestamp_field='event_timestamp')

In [18]:
iris_fv = FeatureView(
    name='iris_fv',
    entities=[plant],
    schema= [
        Field(name='sepal_length', dtype=Float32),
        Field(name='sepal_width', dtype=Float32),
        Field(name='petal_length', dtype=Float32),
        Field(name='petal_width', dtype=Float32),
    ],
    source= iris_stats_batch_source
)

In [19]:
from feast import FeatureStore, RepoConfig

In [20]:
repo_config = RepoConfig(
    project='default',
    registry = 'data/registry.db',
    provider = 'local',
    core_url = 'http://127.0.0.1:6566'
)



In [23]:
fs = FeatureStore(config=repo_config)

In [24]:
fs.config

RepoConfig(project='default', provider='local', registry_config='data/registry.db', online_config='sqlite', offline_config='file', batch_engine_config='local', feature_server=None, flags=None, repo_path=None, entity_key_serialization_version=1, coerce_tz_aware=True, core_url='http://127.0.0.1:6566')

In [25]:
fs.apply(plant)

In [26]:
fs.apply(iris_fv)

In [27]:
featureview = fs.get_feature_view(name='iris_fv')

In [28]:
featureview

<FeatureView(name = iris_fv, entities = ['irisid'], ttl = 0:00:00, stream_source = None, batch_source = {
  "type": "BATCH_FILE",
  "timestampField": "event_timestamp",
  "fileOptions": {
    "uri": "iris.parquet"
  },
  "name": "iris_stats_source"
}, entity_columns = [irisid-Int64], features = [sepal_length-Float32, sepal_width-Float32, petal_length-Float32, petal_width-Float32], description = , tags = {}, owner = , projection = FeatureViewProjection(name='iris_fv', name_alias=None, desired_features=[], features=[sepal_length-Float32, sepal_width-Float32, petal_length-Float32, petal_width-Float32], join_key_map={}), created_timestamp = 2024-06-08 09:49:09.621961, last_updated_timestamp = 2024-06-08 09:49:09.621961, online = True, materialization_intervals = [])>