In [3]:
import joblib
from feast import FeatureStore
import os

# --- DEFINE ABSOLUTE PATHS ---
# This is the root of your project
BASE_DIR = "/home/jupyter/"

# Define absolute paths for your model files
MODEL_PATH = os.path.join(BASE_DIR, "models/model.joblib")
ENCODER_PATH = os.path.join(BASE_DIR, "models/label_encoder.joblib")

# This is the correct, nested path to your Feast repo
FEAST_REPO_PATH = os.path.join(BASE_DIR, "feature_repo/feature_repo")

# 1. Pull the DVC-tracked model
# We still run this from the notebook's location,
# but we need to ensure the notebook is in /home/jupyter/
print("Pulling models with DVC...")
!dvc pull models/model.joblib models/label_encoder.joblib -f
print("Pull complete.")

# 2. Load model and encoder using ABSOLUTE paths
print("Loading models...")
model = joblib.load(MODEL_PATH)
le = joblib.load(ENCODER_PATH)
print("Models loaded.")

# 3. Connect to the feature store using the CORRECT ABSOLUTE path
print("Connecting to Feature Store...")
store = FeatureStore(repo_path=FEAST_REPO_PATH)
print("Model, encoder, and feature store loaded.")

Pulling models with DVC...
[31mERROR[39m: failed to pull data from the cloud - 'models/model.joblib' does not exist as an output or a stage name in 'dvc.yaml': 'dvc.yaml' does not exist
[0mPull complete.
Loading models...
Models loaded.
Connecting to Feature Store...
Model, encoder, and feature store loaded.


In [4]:
# These are the IDs we want to predict *right now*
# (Your augmented data only had '1001', let's add a hypothetical '1002')
entity_rows = [
    {"iris_id": 1001},
    # {"iris_id": 1002}, # This would return 'None' as it's not in the store
]

# Define the features we need
feature_names = [
    "iris_features:sepal_length",
    "iris_features:sepal_width",
    "iris_features:petal_length",
    "iris_features:petal_width",
]

# This is the MAGIC!
# Feast fetches the LATEST values from the 'online store' (SQLite)
print("Fetching online features...")
online_features = store.get_online_features(
    features=feature_names,
    entity_rows=entity_rows
).to_dict()

print(online_features)

Fetching online features...
{'iris_id': [1001], 'sepal_width': [2.359999895095825], 'petal_width': [1.090000033378601], 'petal_length': [3.8399999141693115], 'sepal_length': [5.449999809265137]}


In [5]:
# Re-format the features for scikit-learn
import pandas as pd
features_df = pd.DataFrame.from_dict(online_features)
X_inference = features_df[
    ["sepal_length", "sepal_width", "petal_length", "petal_width"]
]

# Run prediction
predictions_encoded = model.predict(X_inference)
predictions_labels = le.inverse_transform(predictions_encoded)

print(f"--- Inference Results for Iris ID: {features_df['iris_id'].values[0]} ---")
print(f"Features: {X_inference.iloc[0].to_dict()}")
print(f"Predicted Species: {predictions_labels[0]}")

--- Inference Results for Iris ID: 1001 ---
Features: {'sepal_length': 5.449999809265137, 'sepal_width': 2.359999895095825, 'petal_length': 3.8399999141693115, 'petal_width': 1.090000033378601}
Predicted Species: versicolor
