# Factor Research

Develop and validate alpha factors through IC analysis, IC decay testing, turnover-performance tradeoffs, and factor orthogonalization.

In [18]:
from datetime import timedelta
import numpy as np
from clyptq import CostModel, Constraints
from clyptq.analytics.factors import FactorAnalyzer
from clyptq.data.loaders.ccxt import load_crypto_data
from clyptq.trading.execution import BacktestExecutor
from clyptq.trading.factors.library.momentum import MomentumFactor
from clyptq.trading.factors.library.volatility import VolatilityFactor
from clyptq.trading.factors.library.volume import VolumeFactor
from clyptq.trading.factors.library.mean_reversion import BollingerFactor
from clyptq.trading.factors.library.liquidity import AmihudFactor
from clyptq.trading.factors.ops.factor_combination import orthogonalize_factors, pca_factors
from clyptq.trading.portfolio.constructors import ScoreWeightedConstructor
from clyptq.trading.strategy.base import SimpleStrategy

## 1. Load Data

In [19]:
symbols = [
    "BTC/USDT", "ETH/USDT", "BNB/USDT", "SOL/USDT", "XRP/USDT",
    "ADA/USDT", "AVAX/USDT", "DOGE/USDT", "DOT/USDT", "MATIC/USDT",
]

store = load_crypto_data(symbols=symbols, exchange="binance", timeframe="1d", days=180)
date_range = store.get_date_range()
start = date_range.end - timedelta(days=120)
end = date_range.end

print(f"Research period: {start.date()} to {end.date()}")

No data returned for symbol
No data for symbol


Research period: 2025-09-06 to 2026-01-04


## 2. Define Factors

5 factors: Momentum, Mean Reversion, Volatility, Volume, Liquidity

In [20]:
factors = [
    MomentumFactor(lookback=30),
    BollingerFactor(lookback=20, num_std=2.0),
    VolatilityFactor(lookback=30),
    VolumeFactor(lookback=30),
    AmihudFactor(lookback=20),
]

print(f"Testing {len(factors)} factors:")
for i, factor in enumerate(factors, 1):
    lookback = getattr(factor, 'lookback', 'N/A')
    print(f"  {i}. {factor.__class__.__name__} (lookback={lookback})")

Testing 5 factors:
  1. MomentumFactor (lookback=30)
  2. BollingerFactor (lookback=20)
  3. VolatilityFactor (lookback=30)
  4. VolumeFactor (lookback=30)
  5. AmihudFactor (lookback=20)


## 3. Factor Correlation Matrix

Check if factors are independent or redundant

In [21]:
import pandas as pd

# Compute factor scores at latest timestamp
latest_date = date_range.end - timedelta(days=1)
view = store.get_view(latest_date)

factor_scores = {}
for factor in factors:
    scores = factor.compute(view)
    factor_scores[factor.__class__.__name__] = scores

# Get all symbols
all_symbols = sorted(set().union(*[set(scores.keys()) for scores in factor_scores.values()]))

# Create score matrix
score_matrix = {}
for factor_name, scores in factor_scores.items():
    score_matrix[factor_name] = [scores.get(symbol, np.nan) for symbol in all_symbols]

# Compute correlation
df = pd.DataFrame(score_matrix)
corr_matrix = df.corr()

print("FACTOR CORRELATION MATRIX")
print("=" * 80)
print(corr_matrix.round(3))
print("\nInterpretation:")
print("  |corr| < 0.3: Independent (good for diversification)")
print("  |corr| > 0.7: Redundant (consider removing one)")

FACTOR CORRELATION MATRIX
                  MomentumFactor  BollingerFactor  VolatilityFactor  \
MomentumFactor             1.000           -0.759             0.153   
BollingerFactor           -0.759            1.000            -0.126   
VolatilityFactor           0.153           -0.126             1.000   
VolumeFactor               0.159           -0.188            -0.632   
AmihudFactor               0.163           -0.027             0.761   

                  VolumeFactor  AmihudFactor  
MomentumFactor           0.159         0.163  
BollingerFactor         -0.188        -0.027  
VolatilityFactor        -0.632         0.761  
VolumeFactor             1.000        -0.357  
AmihudFactor            -0.357         1.000  

Interpretation:
  |corr| < 0.3: Independent (good for diversification)
  |corr| > 0.7: Redundant (consider removing one)


## 4. IC Decay Analysis

**Question**: How long does factor signal last?

- Fast decay → rebalance frequently
- Slow decay → hold longer, lower turnover

In [22]:
analyzer = FactorAnalyzer()

print("\nIC DECAY ANALYSIS (First 3 factors)")

for i, factor in enumerate(factors[:3], 1):
    print(f"\n{i}. {factor.__class__.__name__}")
    print("=" * 80)
    
    decay_df = analyzer.ic_decay_analysis(factor=factor, data=store, max_horizon=10)
    
    if not decay_df.empty:
        print(f"{'Horizon':<10} {'Mean IC':<12} {'Abs IC':<12}")
        print("-" * 40)
        for _, row in decay_df.head(5).iterrows():
            print(f"Day {int(row['horizon']):<5} {row['mean_ic']:>10.4f}  {row['abs_mean_ic']:>10.4f}")
        
        # Find half-life (where IC drops to 50%)
        initial_ic = abs(decay_df.iloc[0]['mean_ic'])
        if initial_ic > 0.01:
            half_ic = initial_ic * 0.5
            half_life_idx = (decay_df['abs_mean_ic'] < half_ic).idxmax() if (decay_df['abs_mean_ic'] < half_ic).any() else len(decay_df)
            print(f"\nHalf-life: ~{int(decay_df.iloc[half_life_idx]['horizon']) if half_life_idx < len(decay_df) else '>10'} days")
    else:
        print("Not enough data for decay analysis")


IC DECAY ANALYSIS (First 3 factors)

1. MomentumFactor
Horizon    Mean IC      Abs IC      
----------------------------------------
Day 1         0.0293      0.3666
Day 2         0.0341      0.3691
Day 3         0.0573      0.3683
Day 4         0.0653      0.3626
Day 5         0.0617      0.3436

Half-life: ~>10 days

2. BollingerFactor


  c /= stddev[:, None]
  c /= stddev[None, :]


Horizon    Mean IC      Abs IC      
----------------------------------------
Day 1            nan         nan
Day 2            nan         nan
Day 3            nan         nan
Day 4            nan         nan
Day 5            nan         nan

3. VolatilityFactor
Horizon    Mean IC      Abs IC      
----------------------------------------
Day 1         0.1302      0.4505
Day 2         0.1442      0.4465
Day 3         0.1847      0.4452
Day 4         0.1944      0.4336
Day 5         0.2107      0.4325

Half-life: ~>10 days


## 5. Turnover-Performance Tradeoff

**Goal**: Find optimal rebalancing frequency

- Daily: High costs, captures fast signals
- Weekly: Balanced
- Monthly: Low costs, misses opportunities

In [None]:
from clyptq.trading.engine import BacktestEngine

class TestStrategy(SimpleStrategy):
    def __init__(self, schedule="weekly"):
        super().__init__(
            factors_list=factors,
            constructor=ScoreWeightedConstructor(use_long_short=False),
            constraints_obj=Constraints(
                max_position_size=0.25,
                max_gross_exposure=0.95,
                min_position_size=0.10,
                max_num_positions=5,
            ),
            schedule_str=schedule,
            warmup=35,
            name=f"Test-{schedule}",
        )

cost_model = CostModel(maker_fee=0.001, taker_fee=0.001, slippage_bps=5.0)
executor = BacktestExecutor(cost_model)

# Test different rebalancing frequencies
frequencies = ["daily", "weekly", "monthly"]
results = {}

print("Testing rebalance frequencies...")
for freq in frequencies:
    print(f"  {freq}...")
    strategy = TestStrategy(schedule=freq)
    engine = BacktestEngine(strategy, store, executor, 100000.0)
    result = engine.run(start=start, end=end, verbose=False)
    
    # Calculate turnover (total traded value / average portfolio value)
    total_traded = sum(abs(f.amount * f.price) for f in result.trades)
    avg_equity = sum(s.equity for s in result.snapshots) / len(result.snapshots) if result.snapshots else 1.0
    turnover = total_traded / avg_equity if avg_equity > 0 else 0.0
    
    results[freq] = {
        "total_return": result.metrics.total_return,
        "sharpe_ratio": result.metrics.sharpe_ratio,
        "turnover": turnover,
        "num_trades": len(result.trades),
    }

print("\nTURNOVER-PERFORMANCE FRONTIER")
print("=" * 90)
print(f"{'Frequency':<15} {'Return':<12} {'Sharpe':<10} {'Turnover':<12} {'Trades':<10}")
print("=" * 90)

for freq, metrics in results.items():
    print(
        f"{freq:<15} "
        f"{metrics['total_return']:>10.2%}  "
        f"{metrics['sharpe_ratio']:>8.3f}  "
        f"{metrics['turnover']:>10.1%}  "
        f"{metrics['num_trades']:>8}"
    )

# Find optimal frequency (best Sharpe ratio)
optimal_freq = max(results.items(), key=lambda x: x[1]["sharpe_ratio"])[0]
print(f"\nOptimal frequency: {optimal_freq}")

## 6. Factor Orthogonalization

**Problem**: Factors may be correlated (redundant)

**Solution**: Orthogonalize to remove overlap

**Benefits**:
- Independent factors
- Better diversification
- Reduced multicollinearity

In [None]:
# Get factor scores at latest date
view = store.get_view(end - timedelta(days=1))
raw_scores = {}
for factor in factors:
    scores = factor.compute(view)
    raw_scores[factor.__class__.__name__] = scores

print(f"Computing {len(raw_scores)} factor scores...")
print(f"Universe size: {len(next(iter(raw_scores.values())))} symbols")

# Orthogonalize
orthogonal_scores = orthogonalize_factors(raw_scores)
print(f"\nOrthogonalized {len(orthogonal_scores)} factors")
print("Result: Independent factors with zero correlation")

# PCA reduction
n_components = 3
pca_scores = pca_factors(raw_scores, n_components=n_components)
print(f"\nPCA: {len(raw_scores)} factors -> {n_components} principal components")
print(f"PC names: {list(pca_scores.keys())}")

## 7. Research Workflow Summary

**Step 1**: Correlation Analysis
- Check factor independence
- Remove highly correlated factors (|corr| >0.7)

**Step 2**: IC Decay
- Fast decay → rebalance frequently
- Slow decay → hold longer

**Step 3**: Turnover Analysis
- Find sweet spot between signal capture and costs

**Step 4**: Orthogonalization
- Remove redundancy
- Use PCA for dimensionality reduction

## Next Steps

- **04_strategy_comparison.ipynb**: Build strategies with your factors
- **05_parameter_optimization.ipynb**: Optimize factor parameters
- Use only independent, strong factors