# Signal Discovery Template

This notebook helps you discover and validate trading signals.

## Workflow:
1. Load market data
2. Engineer features
3. Explore feature relationships
4. Generate candidate signals
5. Test signals with QuickBacktest
6. Analyze results

In [None]:
# Setup
import sys
sys.path.append('/Users/jacobliu/repos/projects/trading-bot')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from research.utils import (
    QuickBacktest,
    load_bars,
    load_sample_data,
    plot_equity_curve,
    plot_signals,
    plot_correlation_matrix,
    plot_feature_distributions,
    calculate_metrics,
    correlation_analysis,
    feature_importance,
)
from research.utils.data_loaders import add_features

# Jupyter settings
%matplotlib inline
%load_ext autoreload
%autoreload 2

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)
pd.set_option('display.precision', 4)

print("Setup complete!")

## 1. Load Data

In [None]:
# Option A: Load sample data for quick testing
data = load_sample_data('SPY', days=500)

# Option B: Load real data (if available)
# data = load_bars('SPY', '2022-01-01', '2024-01-01')

# Convert to single symbol (if needed)
if 'symbol' in data.columns:
    symbol = data['symbol'].iloc[0]
    data = data[data['symbol'] == symbol].set_index('timestamp').sort_index()
else:
    data = data.set_index('timestamp').sort_index()

print(f"Loaded {len(data)} bars")
print(f"Date range: {data.index[0].date()} to {data.index[-1].date()}")
data.head()

## 2. Engineer Features

In [None]:
# Add common technical features
data = add_features(data, features=['returns', 'sma', 'ema', 'rsi', 'bbands', 'volume_ma'])

# Add custom features
# Example: Momentum indicators
data['momentum_5'] = data['close'].pct_change(5)
data['momentum_20'] = data['close'].pct_change(20)

# Example: Volatility measures
data['vol_20'] = data['returns'].rolling(20).std()
data['vol_ratio'] = data['vol_20'] / data['vol_20'].rolling(60).mean()

# Example: Trend strength
data['sma_cross'] = (data['sma_10'] > data['sma_50']).astype(int)
data['ema_cross'] = (data['ema_10'] > data['ema_50']).astype(int)

# Drop NaN rows
data = data.dropna()

print(f"Features: {data.columns.tolist()}")
print(f"\nData shape after feature engineering: {data.shape}")
data.tail()

## 3. Explore Feature Distributions

In [None]:
# Visualize feature distributions
features_to_plot = ['returns', 'rsi_14', 'bb_width', 'volume_ratio', 'momentum_20', 'vol_ratio']
plot_feature_distributions(data, features=features_to_plot)

## 4. Correlation Analysis

In [None]:
# Correlation with future returns (1-day ahead)
data['future_returns'] = data['returns'].shift(-1)

# Find features correlated with future returns
feature_cols = [col for col in data.columns if col not in ['open', 'high', 'low', 'close', 'volume', 'returns', 'future_returns']]
corr_df = correlation_analysis(data[feature_cols + ['future_returns']], target='future_returns', threshold=0.02)

print("Features correlated with future returns:")
print(corr_df.head(10))

In [None]:
# Correlation heatmap
selected_features = ['returns', 'rsi_14', 'momentum_20', 'vol_ratio', 'bb_width', 'volume_ratio', 'future_returns']
plot_correlation_matrix(data[selected_features])

## 5. Feature Importance Analysis

In [None]:
# Calculate feature importance for predicting future returns
X = data[feature_cols]
y = data['future_returns'].dropna()
X = X.loc[y.index]  # Align indices

importance = feature_importance(X, y, method='mutual_info')

# Display top features
print("\nTop 10 most important features:")
for feat, score in list(importance.items())[:10]:
    print(f"{feat:20s}: {score:.4f}")

In [None]:
# Plot feature importance
from research.utils.visualization import plot_feature_importance
plot_feature_importance(importance, top_n=15)

## 6. Generate Trading Signals

Based on the analysis above, create candidate trading signals.

In [None]:
# Example Signal 1: RSI Mean Reversion
data['signal_rsi'] = 0.0
data.loc[data['rsi_14'] < 30, 'signal_rsi'] = 1.0  # Oversold - Buy
data.loc[data['rsi_14'] > 70, 'signal_rsi'] = -1.0  # Overbought - Sell

# Example Signal 2: Momentum + Volatility
data['signal_momentum'] = 0.0
buy_condition = (data['momentum_20'] > 0.05) & (data['vol_ratio'] < 1.5)
sell_condition = (data['momentum_20'] < -0.05) | (data['vol_ratio'] > 2.0)
data.loc[buy_condition, 'signal_momentum'] = 1.0
data.loc[sell_condition, 'signal_momentum'] = 0.0

# Example Signal 3: Multi-factor combination
data['signal_combo'] = 0.0
strong_buy = (data['rsi_14'] < 35) & (data['bb_width'] > data['bb_width'].rolling(20).mean()) & (data['volume_ratio'] > 1.2)
strong_sell = (data['rsi_14'] > 65) & (data['momentum_5'] < 0)
data.loc[strong_buy, 'signal_combo'] = 1.0
data.loc[strong_sell, 'signal_combo'] = 0.0

print("Signals generated!")
print(f"Signal 1 (RSI) positions: {data['signal_rsi'].abs().sum():.0f}")
print(f"Signal 2 (Momentum) positions: {data['signal_momentum'].abs().sum():.0f}")
print(f"Signal 3 (Combo) positions: {data['signal_combo'].abs().sum():.0f}")

## 7. Visualize Signals

In [None]:
# Plot signals on price chart
buy_signals = data['signal_combo'] > 0
sell_signals = data['signal_combo'] < 0

plot_signals(
    data.iloc[-252:],  # Last year
    buy_signals=buy_signals.iloc[-252:],
    sell_signals=sell_signals.iloc[-252:],
    show_features=['rsi_14', 'volume_ratio'],
    title='Trading Signals - Last Year'
)

## 8. Backtest Signals

In [None]:
# Initialize backtester
bt = QuickBacktest(initial_capital=100000, commission=0.001, slippage=0.0005)

# Test Signal 1: RSI
result_rsi = bt.run_signals(data, data['signal_rsi'], position_size=1.0, verbose=True)
print("\n" + "="*60)
print("RSI Signal Results:")
print(result_rsi)

In [None]:
# Test Signal 2: Momentum
result_momentum = bt.run_signals(data, data['signal_momentum'], position_size=1.0, verbose=True)
print("\n" + "="*60)
print("Momentum Signal Results:")
print(result_momentum)

In [None]:
# Test Signal 3: Combo
result_combo = bt.run_signals(data, data['signal_combo'], position_size=1.0, verbose=True)
print("\n" + "="*60)
print("Combo Signal Results:")
print(result_combo)

## 9. Compare Results

In [None]:
# Compare equity curves
equity_comparison = pd.DataFrame({
    'RSI': result_rsi.equity_curve,
    'Momentum': result_momentum.equity_curve,
    'Combo': result_combo.equity_curve,
})

plot_equity_curve(equity_comparison, title='Strategy Comparison')

In [None]:
# Metrics comparison table
comparison = pd.DataFrame({
    'RSI': result_rsi.metrics,
    'Momentum': result_momentum.metrics,
    'Combo': result_combo.metrics,
}).T

# Select key metrics
key_metrics = ['total_return', 'sharpe_ratio', 'max_drawdown', 'win_rate', 'num_trades']
print("\nStrategy Comparison:")
print(comparison[key_metrics])

## 10. Detailed Analysis of Best Strategy

In [None]:
# Pick the best strategy based on Sharpe ratio
best_result = result_combo  # Change based on results

# Plot comprehensive results
best_result.plot()

In [None]:
# Analyze returns distribution
from research.utils.visualization import plot_returns_distribution
plot_returns_distribution(best_result.returns)

## 11. Next Steps

Based on the analysis:

1. **If signals look promising:**
   - Refine parameters
   - Test on different time periods (walk-forward)
   - Test on different symbols
   - Move to `strategy_validation.ipynb` template

2. **If signals need work:**
   - Engineer new features
   - Try different combinations
   - Adjust thresholds
   - Consider regime detection

3. **Document findings:**
   - What worked?
   - What didn't work?
   - Key insights
   - Next experiments to try

In [None]:
# Export best signal parameters for production
signal_config = {
    'name': 'combo_signal_v1',
    'features': ['rsi_14', 'bb_width', 'volume_ratio'],
    'parameters': {
        'rsi_threshold': 35,
        'volume_min': 1.2,
        # Add other parameters
    },
    'performance': {
        'sharpe': best_result.metrics['sharpe_ratio'],
        'max_dd': best_result.metrics['max_drawdown'],
        'num_trades': best_result.metrics['num_trades'],
    },
}

print("Signal Configuration:")
import json
print(json.dumps(signal_config, indent=2))