# Quant Equity Alpha Platform - Quickstart

This notebook demonstrates the end-to-end workflow:
1. Data ingestion from EODHD
2. Feature engineering
3. Model training with walk-forward CV
4. Portfolio optimization
5. Backtesting with realistic costs
6. Report generation

In [None]:
import os
import sys
import yaml
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Add src to path
sys.path.insert(0, os.path.abspath('..'))

from src.eodhd_client import EODHDClient
from src.pit_store import PITDataStore
from src.features.factors import FactorEngine, compute_forward_returns
from src.models.train import train_with_cv
from src.portfolio.optimizer import PortfolioOptimizer, PortfolioConstraints
from src.backtest.runner import Backtester, compute_backtest_summary
from src.backtest.costs import TransactionCostModel
from src.utils.clock import TradingCalendar, get_rebalance_dates
from src.reporting.report import ReportGenerator

print("✓ Imports successful")

## 1. Configuration

In [None]:
# Load config
with open('../config/defaults.yaml', 'r') as f:
    config = yaml.safe_load(f)

# Quick test configuration
config['start_date'] = '2022-01-01'
config['end_date'] = '2023-12-31'
config['universe']['max_tickers'] = 50  # Limit for quick demo

print(f"Date range: {config['start_date']} to {config['end_date']}")
print(f"Max tickers: {config['universe']['max_tickers']}")

## 2. Data Ingestion (Simplified)

For this demo, we'll create synthetic data. In production, use scripts/ingest.py to fetch real EODHD data.

In [None]:
# Create synthetic data for demo
np.random.seed(42)

tickers = [f"TICK{i:02d}" for i in range(50)]
dates = pd.date_range(config['start_date'], config['end_date'], freq='D')

# Generate synthetic prices
price_data = []
for ticker in tickers:
    base_price = np.random.uniform(50, 200)
    returns = np.random.randn(len(dates)) * 0.02  # 2% daily vol
    prices = base_price * np.exp(returns.cumsum())
    
    for date, price in zip(dates, prices):
        price_data.append({
            'date': date,
            'ticker': ticker,
            'close': price,
            'adj_close': price,
            'volume': np.random.uniform(1e6, 10e6),
            'sector': np.random.choice(['Tech', 'Finance', 'Healthcare']),
        })

prices_df = pd.DataFrame(price_data)

# Generate synthetic fundamentals
fundamentals_data = []
for ticker in tickers:
    for quarter in pd.date_range(config['start_date'], config['end_date'], freq='Q'):
        fundamentals_data.append({
            'ticker': ticker,
            'filing_date': quarter,
            'shares_outstanding': np.random.uniform(1e9, 10e9),
            'net_income_ttm': np.random.uniform(1e9, 10e9),
            'fcf_ttm': np.random.uniform(0.5e9, 8e9),
            'ebitda_ttm': np.random.uniform(2e9, 15e9),
            'total_assets': np.random.uniform(10e9, 100e9),
            'total_liabilities': np.random.uniform(5e9, 80e9),
        })

fundamentals_df = pd.DataFrame(fundamentals_data)

print(f"Generated {len(prices_df)} price records")
print(f"Generated {len(fundamentals_df)} fundamental records")

In [None]:
# Store in PIT store
pit_store = PITDataStore(
    data_dir='../data/demo',
    pit_lag_days=config['pit_lag_days']
)

pit_store.store_prices(prices_df)
pit_store.store_fundamentals(fundamentals_df)

print("✓ Data stored in PIT store")

# Validate PIT integrity
validation = pit_store.validate_pit_integrity()
print(f"PIT validation: {'PASSED' if validation['passed'] else 'FAILED'}")

## 3. Feature Engineering

In [None]:
# Load data
df = pit_store.merge_prices_fundamentals(
    start_date=config['start_date'],
    end_date=config['end_date']
)

print(f"Loaded {len(df)} rows for {df['ticker'].nunique()} tickers")

# Compute factors
factor_engine = FactorEngine(
    winsorize_quantiles=tuple(config['features']['winsorize_quantiles']),
    min_sector_size=5  # Relaxed for demo
)

factor_dfs = []
for date, date_df in df.groupby('date'):
    date_df = factor_engine.compute_all_factors(date_df, compute_composite=True)
    factor_dfs.append(date_df)

df = pd.concat(factor_dfs, ignore_index=True)

# Compute forward returns
df = compute_forward_returns(df, horizon_days=21)

print(f"✓ Features computed")
print(f"Available features: {[col for col in df.columns if col.endswith('_z')]}")

## 4. Model Training

In [None]:
# Filter to samples with valid target
target_col = 'next_21d_excess_vs_sector'
train_df = df[df[target_col].notna()].copy()

print(f"Training samples: {len(train_df)}")

# Get feature columns
feature_cols = factor_engine.get_feature_columns(standardized=True)
feature_cols = [col for col in feature_cols if col in train_df.columns]

print(f"Using {len(feature_cols)} features")

# Train with walk-forward CV
model, cv_results = train_with_cv(
    df=train_df,
    model_config=config['model'],
    cv_config=config['cv'],
    feature_cols=feature_cols,
    target_col=target_col,
    date_col='date'
)

print(f"\n✓ Model trained")
print(f"Mean Test IC: {cv_results['test_ic'].mean():.4f}")
print(f"Mean Rank IC: {cv_results['test_rank_ic'].mean():.4f}")

In [None]:
# Plot CV results
plt.figure(figsize=(12, 5))
plt.plot(cv_results['fold'], cv_results['test_ic'], marker='o', label='Test IC')
plt.axhline(cv_results['test_ic'].mean(), color='r', linestyle='--', label=f"Mean: {cv_results['test_ic'].mean():.4f}")
plt.xlabel('Fold')
plt.ylabel('IC')
plt.title('Information Coefficient by Fold')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

## 5. Generate Predictions

In [None]:
# Generate predictions for all dates
df['alpha_score'] = model.predict(df)

print(f"✓ Predictions generated")
print(f"Score range: [{df['alpha_score'].min():.4f}, {df['alpha_score'].max():.4f}]")

## 6. Portfolio Optimization & Backtest

In [None]:
# Setup calendar and rebalance dates
calendar = TradingCalendar(
    config['start_date'],
    config['end_date'],
    exchange='US'
)

rebalance_dates = get_rebalance_dates(
    config['start_date'],
    config['end_date'],
    frequency='weekly',
    calendar=calendar
)

print(f"Rebalance dates: {len(rebalance_dates)}")

# Setup portfolio optimizer
constraints = PortfolioConstraints(
    long_pct=config['portfolio']['long_pct'],
    short_pct=config['portfolio']['short_pct'],
    sector_max_weight=config['portfolio']['sector_max_weight'],
    single_name_max_weight=config['portfolio']['single_name_max_weight'],
    gross_leverage=config['portfolio']['gross_leverage'],
    turnover_penalty=config['optimizer']['turnover_penalty'],
)

optimizer = PortfolioOptimizer(constraints, use_pca_risk=False)

# Weight function
prev_weights = None

def get_weights(date):
    global prev_weights
    date_df = df[df['date'] == date].copy()
    if date_df.empty:
        return pd.Series(dtype=float)
    
    date_df = date_df.set_index('ticker')
    weights = optimizer.optimize(
        scores=date_df['alpha_score'],
        sectors=date_df['sector'],
        prices=date_df['adj_close'],
        prev_weights=prev_weights
    )
    prev_weights = weights.copy()
    return weights

# Setup backtester
cost_model = TransactionCostModel(
    bps_per_side=config['costs']['bps_per_side'],
    spread_proxy_bps=config['costs']['spread_proxy_bps']
)

backtester = Backtester(
    initial_capital=1_000_000,
    cost_model=cost_model,
    calendar=calendar
)

# Run backtest
print("Running backtest...")
price_data = df[['date', 'ticker', 'adj_close', 'volume']].copy()

result = backtester.run(
    rebalance_dates=rebalance_dates[:20],  # Limit for demo
    weight_func=get_weights,
    price_data=price_data,
    execution_lag=1
)

print("\n✓ Backtest complete")

## 7. Results Analysis

In [None]:
# Print summary
summary = compute_backtest_summary(result)
print(summary.to_string(index=False))

In [None]:
# Plot equity curve
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 8))

# Portfolio value
ax1.plot(result.portfolio_values.index, result.portfolio_values.values, linewidth=2)
ax1.set_title('Portfolio Value', fontsize=14, fontweight='bold')
ax1.set_ylabel('Value ($)', fontsize=12)
ax1.grid(True, alpha=0.3)

# Cumulative returns
cum_returns = (1 + result.returns).cumprod()
ax2.plot(cum_returns.index, cum_returns.values, linewidth=2, color='green')
ax2.set_title('Cumulative Returns', fontsize=14, fontweight='bold')
ax2.set_ylabel('Cumulative Return', fontsize=12)
ax2.set_xlabel('Date', fontsize=12)
ax2.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 8. Generate Full Report

For production use, run: `python scripts/report.py`

In [None]:
print("\n" + "="*50)
print("QUICKSTART COMPLETE")
print("="*50)
print(f"\nFinal Portfolio Value: ${result.portfolio_values.iloc[-1]:,.0f}")
print(f"Total Return: {result.metrics['total_return']*100:.2f}%")
print(f"Sharpe Ratio: {result.metrics['sharpe']:.2f}")
print(f"\nNext steps:")
print("1. Use scripts/ingest.py for real EODHD data")
print("2. Customize config/defaults.yaml")
print("3. Run full pipeline with scripts/train.py and scripts/backtest.py")
print("4. Generate comprehensive report with scripts/report.py")