# Alpha Signal Development for Statistical Arbitrage

This notebook focuses on developing and testing alpha generation signals for statistical arbitrage strategies. We'll explore various signal types including mean reversion, momentum, cross-sectional, and cointegration-based signals.

## Objectives
- Develop and validate multiple alpha signal types
- Test signal performance and stability over time
- Analyze signal correlations and combine signals effectively
- Create a robust signal generation framework for production

## Signal Types to Explore
1. **Mean Reversion Signals**: Z-score, RSI, Bollinger Bands
2. **Momentum Signals**: Moving average crossovers, price momentum
3. **Cross-Sectional Signals**: Relative strength, sector rotation
4. **Cointegration Signals**: Pairs trading, spread reversion
5. **Volatility Signals**: GARCH-based, volatility breakouts
6. **Fundamental Signals**: P/E ratios, earnings surprises (if available)

---

In [None]:
# Import libraries for signal development
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

# Financial and statistical libraries
import yfinance as yf
from scipy import stats
from scipy.stats import zscore
from statsmodels.tsa.stattools import coint, adfuller
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Technical analysis
import talib
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Set up plotting
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

print("Libraries imported successfully!")

# Load our dataset from previous analysis
try:
    # Try to load saved data or recreate it
    tech_stocks = ['AAPL', 'MSFT', 'GOOGL', 'NVDA', 'TSLA']
    financial_stocks = ['JPM', 'BAC', 'WFC', 'GS', 'MS']
    energy_stocks = ['XOM', 'CVX', 'COP', 'EOG', 'SLB']
    all_tickers = tech_stocks + financial_stocks + energy_stocks
    
    # Download data
    price_data = yf.download(all_tickers, period="3y", interval="1d")['Adj Close']
    returns = price_data.pct_change().dropna()
    
    print(f"Data loaded: {len(all_tickers)} stocks, {len(price_data)} trading days")
    print(f"Date range: {price_data.index.min()} to {price_data.index.max()}")
    
except Exception as e:
    print(f"Error loading data: {e}")
    # Use synthetic data for development
    dates = pd.date_range(start='2021-01-01', end='2024-01-01', freq='D')
    np.random.seed(42)
    price_data = pd.DataFrame({
        ticker: 100 * np.exp(np.cumsum(np.random.normal(0.0005, 0.02, len(dates))))
        for ticker in all_tickers
    }, index=dates)
    returns = price_data.pct_change().dropna()
    print("Using synthetic data for development")

## 1. Mean Reversion Signals

Mean reversion is a key concept in statistical arbitrage. We'll develop several mean reversion signals and test their effectiveness.

In [None]:
# 1.1 Z-Score Based Mean Reversion Signal
def calculate_zscore_signal(prices, window=20):
    """
    Calculate Z-score based mean reversion signal
    Negative values suggest oversold (buy signal)
    Positive values suggest overbought (sell signal)
    """
    rolling_mean = prices.rolling(window=window).mean()
    rolling_std = prices.rolling(window=window).std()
    zscore = (prices - rolling_mean) / rolling_std
    return zscore

# Calculate Z-score signals for all stocks
zscore_signals = pd.DataFrame(index=price_data.index)
for stock in all_tickers[:5]:  # Test with first 5 stocks
    zscore_signals[f'{stock}_zscore'] = calculate_zscore_signal(price_data[stock])

# 1.2 RSI-Based Mean Reversion Signal
def calculate_rsi_signal(prices, window=14):
    """Calculate RSI signal (14-day default)"""
    try:
        rsi = talib.RSI(prices.values, timeperiod=window)
        # Convert RSI to mean reversion signal
        # RSI > 70: overbought (sell signal = +1)
        # RSI < 30: oversold (buy signal = -1)
        signal = np.where(rsi > 70, 1, np.where(rsi < 30, -1, 0))
        return pd.Series(signal, index=prices.index)
    except:
        # Fallback RSI calculation
        delta = prices.diff()
        gain = (delta.where(delta > 0, 0)).rolling(window=window).mean()
        loss = (-delta.where(delta < 0, 0)).rolling(window=window).mean()
        rs = gain / loss
        rsi = 100 - (100 / (1 + rs))
        signal = np.where(rsi > 70, 1, np.where(rsi < 30, -1, 0))
        return pd.Series(signal, index=prices.index)

# Calculate RSI signals
rsi_signals = pd.DataFrame(index=price_data.index)
for stock in all_tickers[:5]:
    rsi_signals[f'{stock}_rsi'] = calculate_rsi_signal(price_data[stock])

# 1.3 Bollinger Bands Mean Reversion Signal
def calculate_bollinger_signal(prices, window=20, num_std=2):
    """Calculate Bollinger Bands mean reversion signal"""
    rolling_mean = prices.rolling(window=window).mean()
    rolling_std = prices.rolling(window=window).std()
    
    upper_band = rolling_mean + (rolling_std * num_std)
    lower_band = rolling_mean - (rolling_std * num_std)
    
    # Signal: +1 when price above upper band (sell), -1 when below lower band (buy)
    signal = np.where(prices > upper_band, 1, 
                     np.where(prices < lower_band, -1, 0))
    
    return pd.Series(signal, index=prices.index), upper_band, lower_band

# Calculate Bollinger Band signals
bb_signals = pd.DataFrame(index=price_data.index)
bb_bands = {}

for stock in all_tickers[:5]:
    signal, upper, lower = calculate_bollinger_signal(price_data[stock])
    bb_signals[f'{stock}_bb'] = signal
    bb_bands[stock] = {'upper': upper, 'lower': lower, 'middle': price_data[stock].rolling(20).mean()}

print("Mean reversion signals calculated:")
print(f"Z-score signals shape: {zscore_signals.shape}")
print(f"RSI signals shape: {rsi_signals.shape}")
print(f"Bollinger Band signals shape: {bb_signals.shape}")

# Visualize mean reversion signals for one stock
stock_example = all_tickers[0]
fig, axes = plt.subplots(3, 1, figsize=(15, 12))

# Price and Bollinger Bands
axes[0].plot(price_data.index, price_data[stock_example], label='Price', linewidth=1)
axes[0].plot(bb_bands[stock_example]['upper'].index, bb_bands[stock_example]['upper'], 
             label='Upper Band', alpha=0.7, linestyle='--')
axes[0].plot(bb_bands[stock_example]['lower'].index, bb_bands[stock_example]['lower'], 
             label='Lower Band', alpha=0.7, linestyle='--')
axes[0].fill_between(bb_bands[stock_example]['upper'].index, 
                     bb_bands[stock_example]['lower'], bb_bands[stock_example]['upper'], 
                     alpha=0.1)
axes[0].set_title(f'{stock_example} - Price and Bollinger Bands')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# Z-score
axes[1].plot(zscore_signals.index, zscore_signals[f'{stock_example}_zscore'], 
             color='orange', linewidth=1)
axes[1].axhline(y=2, color='red', linestyle='--', alpha=0.7, label='Overbought')
axes[1].axhline(y=-2, color='green', linestyle='--', alpha=0.7, label='Oversold')
axes[1].axhline(y=0, color='black', linestyle='-', alpha=0.5)
axes[1].set_title(f'{stock_example} - Z-Score Signal')
axes[1].set_ylabel('Z-Score')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

# Combined signals
axes[2].plot(rsi_signals.index, rsi_signals[f'{stock_example}_rsi'], 
             label='RSI Signal', alpha=0.7, linewidth=2)
axes[2].plot(bb_signals.index, bb_signals[f'{stock_example}_bb'], 
             label='Bollinger Signal', alpha=0.7, linewidth=2)
axes[2].set_title(f'{stock_example} - Combined Mean Reversion Signals')
axes[2].set_ylabel('Signal')
axes[2].legend()
axes[2].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()