## Preparing the dataset for feast

In [1]:
import pandas as pd
import numpy as np
import os
from datetime import datetime, timedelta

In [2]:
df = pd.read_csv("data/iris.csv")

In [3]:
df["unique_identifier"] = [f"flower_{i+1}" for i in range(len(df))]

In [4]:
now = datetime.now()
df["event_timestamp"] = [now - timedelta(minutes=i) for i in range(len(df))]

In [5]:
df = df[["unique_identifier", "event_timestamp"] + list(df.columns[:-2])]

In [6]:
species_to_id = {
    'setosa': 0,
    'versicolor': 1,
    'virginica': 2
}

df["flower_id"] = df["species"].map(species_to_id)

In [7]:
df.head()

Unnamed: 0,unique_identifier,event_timestamp,sepal_length,sepal_width,petal_length,petal_width,species,flower_id
0,flower_1,2025-06-17 14:06:33.757091,5.1,3.5,1.4,0.2,setosa,0
1,flower_2,2025-06-17 14:05:33.757091,4.9,3.0,1.4,0.2,setosa,0
2,flower_3,2025-06-17 14:04:33.757091,4.7,3.2,1.3,0.2,setosa,0
3,flower_4,2025-06-17 14:03:33.757091,4.6,3.1,1.5,0.2,setosa,0
4,flower_5,2025-06-17 14:02:33.757091,5.0,3.6,1.4,0.2,setosa,0


In [8]:
df.to_csv("data/iris_feast_dataset.csv", index=False)

## Initializing feast

In [9]:
!pip install "protobuf<6.0.0" > {os.devnull} 2>&1

In [10]:
!pip install feast > {os.devnull} 2>&1

In [11]:
!feast version 

Feast SDK Version: "0.49.0"


In [12]:
!feast init iris_feast_project


Creating a new Feast repository in [1m[32m/home/jupyter/iris_feast_project[0m.



In [13]:
cd iris_feast_project/feature_repo

/home/jupyter/iris_feast_project/feature_repo


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


In [14]:
pwd

'/home/jupyter/iris_feast_project/feature_repo'

In [15]:
df.to_parquet(path='data/df.parquet')

## Applying feast

In [16]:
!echo ".ipynb_checkpoints/" >> .feastignore

In [17]:
!feast apply

  flower = Entity(name="flower", join_keys=["unique_identifier"])
No project found in the repository. Using project name iris_feast_project defined in feature_store.yaml
Applying changes for project iris_feast_project
Created project [1m[32miris_feast_project[0m
Created entity [1m[32mflower[0m
Created feature view [1m[32mdf_feature_view[0m

Created sqlite table [1m[32miris_feast_project_df_feature_view[0m



## Generating Training data from the offline store

In [18]:
from feast import FeatureStore

store = FeatureStore(repo_path='.')

target_df = df[["unique_identifier", "event_timestamp", "flower_id"]]

training_data = store.get_historical_features(
    entity_df=target_df,
    features=[
        "df_feature_view:sepal_length",
        "df_feature_view:sepal_width",
        "df_feature_view:petal_length",
        "df_feature_view:petal_width",
    ]
)



In [19]:
training_data.to_df()

Unnamed: 0,unique_identifier,event_timestamp,flower_id,sepal_length,sepal_width,petal_length,petal_width
0,flower_150,2025-06-17 11:37:33.757091+00:00,2,5.9,3.0,5.1,1.8
1,flower_149,2025-06-17 11:38:33.757091+00:00,2,6.2,3.4,5.4,2.3
2,flower_148,2025-06-17 11:39:33.757091+00:00,2,6.5,3.0,5.2,2.0
3,flower_147,2025-06-17 11:40:33.757091+00:00,2,6.3,2.5,5.0,1.9
4,flower_146,2025-06-17 11:41:33.757091+00:00,2,6.7,3.0,5.2,2.3
...,...,...,...,...,...,...,...
145,flower_5,2025-06-17 14:02:33.757091+00:00,0,5.0,3.6,1.4,0.2
146,flower_4,2025-06-17 14:03:33.757091+00:00,0,4.6,3.1,1.5,0.2
147,flower_3,2025-06-17 14:04:33.757091+00:00,0,4.7,3.2,1.3,0.2
148,flower_2,2025-06-17 14:05:33.757091+00:00,0,4.9,3.0,1.4,0.2


## Model Training 

In [20]:
!pip uninstall -y numpy scipy scikit-learn > {os.devnull} 2>&1
!pip cache purge  > {os.devnull} 2>&1
!pip install --no-cache-dir numpy scipy scikit-learn > {os.devnull} 2>&1

In [21]:
# Importing dependencies
from feast import FeatureStore
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

from joblib import dump

# Getting our FeatureStore
store = FeatureStore(repo_path=".")

# Retrieving the saved dataset and converting it to a DataFrame
training_df = training_data.to_df() #store.get_saved_dataset(name="diabetes_dataset").to_df()

# Separating the features and labels
y = training_df['flower_id']
X = training_df[["sepal_length", "sepal_width", "petal_length", "petal_width"]]

# Splitting the dataset into train and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, stratify=y, random_state=42
)

# Step 6: Train Logistic Regression Model
reg = LogisticRegression(max_iter=200)
reg.fit(X=X_train[sorted(X_train.columns)], y=y_train)

# Step 7: Predict and evaluate
y_pred = reg.predict(X_test[sorted(X_test.columns)])
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2f}")

dump(reg, "feast_iris_model.bin")



Model Accuracy: 0.97


['feast_iris_model.bin']