# Feast Feature Store Validation

This notebook validates the Feast feature store integration.

In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
from feast import FeatureStore

# Set up plotting
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 8)

## 1. Initialize Feast Feature Store

In [None]:
# Initialize Feast feature store
repo_path = "../infra/feast/feature_repo"
fs = FeatureStore(repo_path=repo_path)

print(f"Initialized Feast feature store at {repo_path}")

## 2. Apply Feature Definitions

First, let's apply the feature definitions to register them in the feature store.

In [None]:
# Apply feature definitions
!cd {repo_path} && feast apply

## 3. List Feature Views and Entities

In [None]:
# List feature views
feature_views = fs.list_feature_views()

print("Feature Views:")
for fv in feature_views:
    print(f"- {fv.name}: {fv.description}")
    print(f"  Entities: {[entity for entity in fv.entities]}")
    print(f"  Features: {[field.name for field in fv.schema]}")
    print(f"  TTL: {fv.ttl}")
    print()

In [None]:
# List entities
entities = fs.list_entities()

print("Entities:")
for entity in entities:
    print(f"- {entity.name}: {entity.description}")
    print(f"  Value Type: {entity.value_type}")
    print(f"  Join Keys: {entity.join_keys}")
    print()

In [None]:
# List feature services
feature_services = fs.list_feature_services()

print("Feature Services:")
for service in feature_services:
    print(f"- {service.name}: {service.description}")
    print(f"  Features: {[f.name for f in service.features]}")
    print()

## 4. Materialize Features

Let's materialize the features to the online store.

In [None]:
# Materialize features
end_date = datetime.now()
start_date = end_date - timedelta(days=7)

print(f"Materializing features from {start_date} to {end_date}")
!cd {repo_path} && feast materialize {start_date.isoformat()} {end_date.isoformat()}

## 5. Retrieve Features

Now, let's retrieve features from the feature store.

In [None]:
# Define entity rows
entity_rows = [
    {"symbol": "AAPL"},
    {"symbol": "MSFT"},
    {"symbol": "GOOGL"}
]

In [None]:
# Retrieve technical indicators
technical_features = [
    "technical_indicators:ma_5",
    "technical_indicators:ma_15",
    "technical_indicators:ma_60",
    "technical_indicators:rsi_14",
    "technical_indicators:atr_14"
]

try:
    technical_df = fs.get_online_features(
        features=technical_features,
        entity_rows=entity_rows
    ).to_df()
    
    print("Technical Indicators:")
    display(technical_df)
except Exception as e:
    print(f"Error retrieving technical indicators: {e}")

In [None]:
# Retrieve fundamental indicators
fundamental_features = [
    "fundamental_indicators:price_to_earnings",
    "fundamental_indicators:debt_to_equity",
    "fundamental_indicators:earnings_surprise",
    "fundamental_indicators:pe_category",
    "fundamental_indicators:avg_sentiment",
    "fundamental_indicators:mention_count"
]

try:
    fundamental_df = fs.get_online_features(
        features=fundamental_features,
        entity_rows=entity_rows
    ).to_df()
    
    print("Fundamental Indicators:")
    display(fundamental_df)
except Exception as e:
    print(f"Error retrieving fundamental indicators: {e}")

In [None]:
# Retrieve streaming features
streaming_features = [
    "streaming_features:vwap_1m",
    "streaming_features:vwap_5m",
    "streaming_features:volatility_1m",
    "streaming_features:volatility_5m",
    "streaming_features:momentum_1m",
    "streaming_features:momentum_5m"
]

try:
    streaming_df = fs.get_online_features(
        features=streaming_features,
        entity_rows=entity_rows
    ).to_df()
    
    print("Streaming Features:")
    display(streaming_df)
except Exception as e:
    print(f"Error retrieving streaming features: {e}")

## 6. Retrieve Combined Features

Let's retrieve all features using the combined feature service.

In [None]:
# Retrieve all features using the combined feature service
try:
    combined_df = fs.get_online_features(
        features=[
            "technical_indicators:ma_5",
            "technical_indicators:rsi_14",
            "fundamental_indicators:price_to_earnings",
            "fundamental_indicators:avg_sentiment",
            "streaming_features:vwap_1m",
            "streaming_features:volatility_1m"
        ],
        entity_rows=entity_rows
    ).to_df()
    
    print("Combined Features:")
    display(combined_df)
except Exception as e:
    print(f"Error retrieving combined features: {e}")

## 7. Historical Feature Retrieval

Let's retrieve historical features for training a model.

In [None]:
# Create an entity DataFrame for historical feature retrieval
entity_df = pd.DataFrame(
    {
        "symbol": ["AAPL", "MSFT", "GOOGL"],
        "event_timestamp": [datetime.now() - timedelta(days=1)] * 3
    }
)

In [None]:
# Retrieve historical features
try:
    historical_features = fs.get_historical_features(
        entity_df=entity_df,
        features=[
            "technical_indicators:ma_5",
            "technical_indicators:rsi_14",
            "fundamental_indicators:price_to_earnings",
            "streaming_features:vwap_1m"
        ]
    )
    
    historical_df = historical_features.to_df()
    
    print("Historical Features:")
    display(historical_df)
except Exception as e:
    print(f"Error retrieving historical features: {e}")

## 8. Feature Statistics and Visualization

In [None]:
# Visualize feature distributions
def visualize_features(df, title):
    if df is None or len(df) == 0:
        print(f"No data available for {title}")
        return
    
    # Select numeric columns
    numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
    numeric_cols = [col for col in numeric_cols if col != 'symbol']
    
    if not numeric_cols:
        print(f"No numeric features available for {title}")
        return
    
    # Create a figure with subplots
    n_cols = min(3, len(numeric_cols))
    n_rows = (len(numeric_cols) + n_cols - 1) // n_cols
    
    fig, axes = plt.subplots(n_rows, n_cols, figsize=(15, n_rows * 4))
    axes = axes.flatten() if n_rows * n_cols > 1 else [axes]
    
    # Plot each feature
    for i, col in enumerate(numeric_cols):
        if i < len(axes):
            ax = axes[i]
            if df[col].nunique() > 1:  # Only plot if there's variation
                sns.barplot(x='symbol', y=col, data=df, ax=ax)
                ax.set_title(f'{col} by Symbol')
                ax.set_xlabel('Symbol')
                ax.set_ylabel(col)
                ax.tick_params(axis='x', rotation=45)
            else:
                ax.text(0.5, 0.5, f"No variation in {col}", 
                        horizontalalignment='center', verticalalignment='center')
                ax.set_title(f'{col} by Symbol')
                ax.axis('off')
    
    # Hide unused subplots
    for i in range(len(numeric_cols), len(axes)):
        axes[i].axis('off')
    
    plt.suptitle(title, fontsize=16)
    plt.tight_layout(rect=[0, 0, 1, 0.97])
    plt.show()

In [None]:
# Visualize technical indicators
try:
    visualize_features(technical_df, "Technical Indicators")
except Exception as e:
    print(f"Error visualizing technical indicators: {e}")

# Visualize fundamental indicators
try:
    visualize_features(fundamental_df, "Fundamental Indicators")
except Exception as e:
    print(f"Error visualizing fundamental indicators: {e}")

# Visualize streaming features
try:
    visualize_features(streaming_df, "Streaming Features")
except Exception as e:
    print(f"Error visualizing streaming features: {e}")

## 9. Summary and Findings

### Feast Integration
- Successfully registered feature views, entities, and feature services
- Materialized features to the online store
- Retrieved features for online serving and historical training

### Feature Consistency
- The same features are available for both online serving and offline training
- Feature values are consistent across different retrieval methods

### Feature Coverage
- Technical indicators provide trend and volatility information
- Fundamental indicators provide valuation and sentiment context
- Streaming features provide real-time market signals

### Next Steps
- Use these features to train machine learning models
- Implement a model serving layer that retrieves features from Feast
- Set up automated feature pipelines for continuous updates