## Importing Libraries

In [1]:
!pip install feast['gcp'] numpy pandas scikit-learn -q

In [2]:
!pip install feast --upgrade -q

In [3]:
%rm -rf iris_feature_store

## GCP Configuration

In [4]:
PROJECT_ID = "neural-mantra-461520-m0"
TABLE_ID = "Offline_Store.Iris"
BUCKET_ID = "mlops-course-neural-mantra-461520-m0"
OFFLINE_STORE_NAME = "Iris_Offline_store"
ONLINE_STORE_NAME = "Iris_Online_Store"

## Prepare Iris Data

In [5]:
from datetime import datetime
import pandas as pd
from sklearn.datasets import load_iris

# Load the iris dataset from sklearn
iris = pd.read_csv("./data/iris.csv")

# Define the start date
start_date = pd.to_datetime('2025-01-01')

# Calculate the number of rows
num_rows = len(iris)

# Generate event timestamps starting from '2025-01-01'
# We use 'start=start_date' and 'periods=num_rows' with 'freq="D"'
event_timestamps = pd.date_range(start=start_date, periods=num_rows, freq='D')

# Add the 'event_timestamp' column to the DataFrame
iris['event_timestamp'] = event_timestamps

# Encode the 'species' column to create 'species_id'
iris['species_id'] = pd.factorize(iris['species'])[0]

## Create Entity DataFrame

In [6]:
entity = iris[["species_id", "event_timestamp"]]

entity.to_csv("./data/entity.csv")

entity.head(5)

Unnamed: 0,species_id,event_timestamp
0,0,2025-01-01
1,0,2025-01-02
2,0,2025-01-03
3,0,2025-01-04
4,0,2025-01-05


## Upload to BigQuery Database

In [7]:
# Creating the BigQuery source table
import pandas_gbq

# Define table schema for BigQuery
table_schema = [
    {'name': 'sepal_length', 'type': 'FLOAT'},
    {'name': 'sepal_width',  'type': 'FLOAT'},
    {'name': 'petal_length', 'type': 'FLOAT'},
    {'name': 'petal_width',  'type': 'FLOAT'},
    {'name': 'species',      'type': 'STRING'},
    {'name': 'event_timestamp', 'type': 'TIMESTAMP'},
    {'name': 'species_id', 'type': 'INTEGER'}
]

# Upload dataframe to BigQuery
# if_exists='replace' will overwrite existing table
pandas_gbq.to_gbq(
    iris,
    TABLE_ID,
    project_id=PROJECT_ID,
    if_exists='replace',
    table_schema=table_schema
)

100%|██████████| 1/1 [00:00<00:00, 6374.32it/s]


## Initialize Feast Project

In [8]:
!feast init -m iris_feature_store -t gcp


Creating a new Feast repository in [1m[32m/home/jupyter/iris_feature_store[0m.



Update Project Files

In [9]:
%cd iris_feature_store/feature_repo

/home/jupyter/iris_feature_store/feature_repo


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


In [10]:
!rm -rf .ipynb_checkpoints

In [11]:
!feast apply

No project found in the repository. Using project name iris_feature_store defined in feature_store.yaml
Applying changes for project iris_feature_store
Deploying infrastructure for [1m[32miris_features[0m


## Model Training

In [12]:
!pwd

/home/jupyter/iris_feature_store/feature_repo


In [13]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from pandas.plotting import parallel_coordinates
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn import metrics
from feast import FeatureStore

store = FeatureStore(repo_path=".")

entity = pd.read_csv("../../data/entity.csv", parse_dates=["event_timestamp"])

hist_data = store.get_historical_features(
    entity_df=entity,
    features=store.get_feature_service("iris_model")
).to_df()

hist_data.head(5)



Unnamed: 0.1,Unnamed: 0,species_id,event_timestamp,sepal_length,sepal_width,petal_length,petal_width,species
0,11,0,2025-01-12 00:00:00+00:00,4.8,3.4,1.6,0.2,setosa
1,29,0,2025-01-30 00:00:00+00:00,4.7,3.2,1.6,0.2,setosa
2,33,0,2025-02-03 00:00:00+00:00,5.5,4.2,1.4,0.2,setosa
3,41,0,2025-02-11 00:00:00+00:00,4.5,2.3,1.3,0.3,setosa
4,46,0,2025-02-16 00:00:00+00:00,5.1,3.8,1.6,0.2,setosa


In [14]:
train, test = train_test_split( hist_data, test_size = 0.4, stratify = hist_data['species'], random_state = 42)
X_train = train[['sepal_length','sepal_width','petal_length','petal_width']]
y_train = train.species
X_test = test[['sepal_length','sepal_width','petal_length','petal_width']]
y_test = test.species

In [15]:
mod_dt = DecisionTreeClassifier(max_depth = 3, random_state = 1)
mod_dt.fit(X_train,y_train)
prediction=mod_dt.predict(X_test)
print('The accuracy of the Decision Tree is',"{:.3f}".format(metrics.accuracy_score(prediction,y_test)))

The accuracy of the Decision Tree is 0.983


## Materialize

In [16]:
!feast materialize 2025-01-01 2025-01-15

Materializing [1m[32m1[0m feature views from [1m[32m2025-01-01 00:00:00+00:00[0m to [1m[32m2025-01-15 00:00:00+00:00[0m into the [1m[32mdatastore[0m online store.

[1m[32miris_features[0m:
100%|█████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.89it/s]


In [17]:
online_data = store.get_online_features(
    features=[
        "iris_features:sepal_length",
        "iris_features:sepal_width",
        "iris_features:petal_length",
        "iris_features:petal_width",
        "iris_features:species"
    ],
    entity_rows=[
        {"species_id": 0}
    ],
).to_df()

online_data


Unnamed: 0,species_id,sepal_width,species,petal_length,petal_width,sepal_length
0,0,4.0,setosa,1.2,0.2,5.8


In [18]:
mod_dt.predict(online_data[["sepal_length", "sepal_width", "petal_length", "petal_width"]])

array(['setosa'], dtype=object)

In [20]:
import joblib

joblib.dump(mod_dt, "../../artifacts/feast/model.joblib")

['../../artifacts/feast/model.joblib']