# Feature Store Demo

This notebook demonstrates how to use the feature store for computing technical indicators and price transforms.

The same features work in both research (notebooks) and production (backtests/live trading).

In [None]:
import sys
sys.path.insert(0, '../..')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Set style
sns.set_style('darkgrid')
plt.rcParams['figure.figsize'] = (14, 7)

%matplotlib inline

## 1. Load Sample Data

We'll create some sample OHLCV data for demonstration.

In [None]:
# Generate sample OHLCV data
np.random.seed(42)
dates = pd.date_range('2023-01-01', periods=252, freq='D')

# Simulate price with trend and noise
trend = np.linspace(100, 120, 252)
noise = np.random.randn(252) * 2
close = trend + noise

# Generate OHLC from close
high = close + np.abs(np.random.randn(252)) * 1.5
low = close - np.abs(np.random.randn(252)) * 1.5
open_price = close + np.random.randn(252) * 0.5
volume = np.random.randint(1_000_000, 5_000_000, 252)

data = pd.DataFrame({
    'open': open_price,
    'high': high,
    'low': low,
    'close': close,
    'volume': volume
}, index=dates)

print(f"Data shape: {data.shape}")
data.head()

In [None]:
# Visualize price data
plt.figure(figsize=(14, 6))
plt.plot(data.index, data['close'], label='Close', linewidth=2)
plt.fill_between(data.index, data['low'], data['high'], alpha=0.3, label='High-Low Range')
plt.title('Sample Price Data', fontsize=16)
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.tight_layout()
plt.show()

## 2. Initialize Feature Store

Create a feature store and register features.

In [None]:
from features import FeatureStore
from features.definitions.technical import RSI, SMA, EMA, MACD, MACDSignal, BollingerUpper, BollingerLower, ATR
from features.definitions.transforms import SimpleReturns, LogReturns, StandardizedPrice

# Create store
store = FeatureStore(enable_cache=True)

# Register features
store.register_batch([
    # Trend
    SMA(period=20),
    SMA(period=50),
    EMA(period=12),
    EMA(period=26),
    
    # Momentum
    RSI(period=14),
    MACD(),
    MACDSignal(),
    
    # Volatility
    BollingerUpper(period=20, num_std=2.0),
    BollingerLower(period=20, num_std=2.0),
    ATR(period=14),
    
    # Transforms
    SimpleReturns(),
    LogReturns(),
    StandardizedPrice(window=20)
])

print(f"Registered features: {len(store.list_features())}")
print("\nAvailable features:")
for name in sorted(store.list_features()):
    info = store.get_info(name)
    print(f"  - {name:25s} (window={info['window']:3d}): {info['description']}")

## 3. Compute Individual Features

Compute features one at a time.

In [None]:
# Compute RSI
rsi = store.compute_single('rsi_14', data)

print(f"RSI shape: {rsi.shape}")
print(f"RSI range: {rsi.min():.2f} to {rsi.max():.2f}")
print(f"RSI mean: {rsi.mean():.2f}")
print(f"\nFirst 10 values:")
print(rsi.head(10))

In [None]:
# Visualize RSI
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(14, 8), sharex=True)

# Price
ax1.plot(data.index, data['close'], label='Close', linewidth=2)
ax1.set_ylabel('Price')
ax1.set_title('Price and RSI', fontsize=16)
ax1.legend()
ax1.grid(True, alpha=0.3)

# RSI
ax2.plot(data.index, rsi, label='RSI(14)', color='purple', linewidth=2)
ax2.axhline(70, color='red', linestyle='--', alpha=0.7, label='Overbought (70)')
ax2.axhline(30, color='green', linestyle='--', alpha=0.7, label='Oversold (30)')
ax2.fill_between(data.index, 30, 70, alpha=0.1)
ax2.set_ylabel('RSI')
ax2.set_xlabel('Date')
ax2.set_ylim(0, 100)
ax2.legend()
ax2.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 4. Compute Multiple Features at Once

The feature store automatically resolves dependencies.

In [None]:
# Compute multiple features
features = store.compute([
    'sma_20', 'sma_50',
    'rsi_14',
    'macd_12_26', 'macd_signal_9',
    'returns_close'
], data)

print(f"Features shape: {features.shape}")
print(f"\nFeature columns: {list(features.columns)}")
print(f"\nFirst few rows:")
features.head()

In [None]:
# Visualize moving averages
plt.figure(figsize=(14, 6))
plt.plot(data.index, data['close'], label='Close', linewidth=2, alpha=0.7)
plt.plot(features.index, features['sma_20'], label='SMA(20)', linewidth=2)
plt.plot(features.index, features['sma_50'], label='SMA(50)', linewidth=2)
plt.title('Price with Moving Averages', fontsize=16)
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

## 5. MACD Example

MACD signal is a composite feature that depends on the MACD line.

In [None]:
# Compute MACD components
macd_features = store.compute(['macd_12_26', 'macd_signal_9'], data)

# Visualize MACD
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(14, 8), sharex=True)

# Price
ax1.plot(data.index, data['close'], label='Close', linewidth=2)
ax1.set_ylabel('Price')
ax1.set_title('Price and MACD', fontsize=16)
ax1.legend()
ax1.grid(True, alpha=0.3)

# MACD
ax2.plot(macd_features.index, macd_features['macd_12_26'], label='MACD', linewidth=2)
ax2.plot(macd_features.index, macd_features['macd_signal_9'], label='Signal', linewidth=2)
histogram = macd_features['macd_12_26'] - macd_features['macd_signal_9']
ax2.bar(macd_features.index, histogram, label='Histogram', alpha=0.3)
ax2.axhline(0, color='black', linestyle='-', alpha=0.3)
ax2.set_ylabel('MACD')
ax2.set_xlabel('Date')
ax2.legend()
ax2.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 6. Bollinger Bands

Volatility bands around a moving average.

In [None]:
# Compute Bollinger Bands
bb_features = store.compute(['bb_upper_20_2.0', 'bb_lower_20_2.0', 'sma_20'], data)

# Visualize
plt.figure(figsize=(14, 6))
plt.plot(data.index, data['close'], label='Close', linewidth=2, color='black')
plt.plot(bb_features.index, bb_features['sma_20'], label='SMA(20)', linewidth=2, color='blue')
plt.plot(bb_features.index, bb_features['bb_upper_20_2.0'], label='Upper Band', 
         linewidth=1.5, linestyle='--', color='red')
plt.plot(bb_features.index, bb_features['bb_lower_20_2.0'], label='Lower Band', 
         linewidth=1.5, linestyle='--', color='green')
plt.fill_between(bb_features.index, 
                 bb_features['bb_lower_20_2.0'], 
                 bb_features['bb_upper_20_2.0'], 
                 alpha=0.1)
plt.title('Bollinger Bands', fontsize=16)
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

## 7. Feature Correlation Analysis

Analyze relationships between features.

In [None]:
# Compute many features
all_features = store.compute([
    'returns_close',
    'rsi_14',
    'macd_12_26',
    'atr_14',
    'zscore_close_20'
], data)

# Calculate correlations
correlations = all_features.corr()

# Visualize
plt.figure(figsize=(10, 8))
sns.heatmap(correlations, annot=True, fmt='.2f', cmap='coolwarm', 
            center=0, square=True, linewidths=1)
plt.title('Feature Correlations', fontsize=16)
plt.tight_layout()
plt.show()

## 8. Performance and Caching

The feature store caches results to avoid redundant computation.

In [None]:
import time

# First computation (no cache)
store.clear_cache()
start = time.time()
result1 = store.compute(['rsi_14', 'sma_50', 'macd_12_26'], data)
time1 = time.time() - start

# Second computation (with cache)
start = time.time()
result2 = store.compute(['rsi_14', 'sma_50', 'macd_12_26'], data)
time2 = time.time() - start

print(f"First computation:  {time1*1000:.2f} ms")
print(f"Second computation: {time2*1000:.2f} ms (cached)")
print(f"Speedup: {time1/time2:.1f}x")

# Verify results are identical
assert result1.equals(result2), "Cached results should be identical"
print("\nResults verified: cached values match original computation")

## 9. Feature Store Statistics

Track which features are being computed.

In [None]:
# Get computation statistics
stats = store.get_stats()

print("Feature Computation Counts:")
for name, count in sorted(stats.items(), key=lambda x: x[1], reverse=True):
    print(f"  {name:25s}: {count:3d} times")

## 10. Creating Custom Features

You can easily create your own features.

In [None]:
from features.store.base import Feature
import pandas as pd

class PriceVelocity(Feature):
    """Rate of change of returns (acceleration)."""
    
    def __init__(self, period: int = 5):
        super().__init__(
            name=f"price_velocity_{period}",
            version="1.0.0",
            description=f"Price acceleration over {period} periods",
            dependencies=["close"],
            window=period + 2,
            parameters={"period": period}
        )
        self.period = period
    
    def compute(self, data: pd.DataFrame) -> pd.Series:
        self.validate_data(data)
        returns = data["close"].pct_change()
        velocity = returns.diff(periods=self.period)
        return velocity

# Register and use
store.register(PriceVelocity(period=5))
velocity = store.compute_single('price_velocity_5', data)

print(f"Price velocity computed: {len(velocity)} values")
print(f"Range: {velocity.min():.4f} to {velocity.max():.4f}")

## Summary

The feature store provides:

1. **Reusable features** - define once, use everywhere
2. **Automatic dependency resolution** - composite features just work
3. **Caching** - avoid redundant computation
4. **Versioning** - reproducible research
5. **Type safety** - consistent data types and validation
6. **Easy extension** - create custom features with minimal code

Next steps:
- Use these features in your backtests
- Create strategy-specific features
- Build feature selection pipelines
- Monitor feature distributions in production