# Factor Neutralization

Factor neutralization techniques:
1. Size Neutralization (market cap/volume neutralization)
2. Beta Neutralization (market beta neutralization)
3. Sector/Industry Neutralization
4. Multi-Factor Neutralization
5. Orthogonalization methods

In [None]:
%load_ext autoreload
%autoreload 2

from datetime import datetime
import pandas as pd
import numpy as np

from clyptq import operator
from clyptq.data.provider import DataProvider
from clyptq.data.spec import OHLCVSpec
from clyptq.universe import CryptoLiquid

print("Imports successful!")

In [None]:
# Load data
ohlcv_spec = OHLCVSpec(exchange="gateio", market_type="spot", timeframe="1d")
universe = CryptoLiquid(top_n=50, min_dollar_volume=100_000)

p = DataProvider(
    universe=universe,
    specs={"ohlcv": ohlcv_spec},
    rebalance_freq="1d",
    mode="research",
)
p.load(start=datetime(2025, 10, 15), end=datetime(2026, 1, 5))

close = p["close"]
volume = p["volume"]
high = p["high"]
low = p["low"]

print(f"Data shape: {close.shape}")

## 1. Basic Concept: Why Neutralization?

If alpha signal correlates with a specific factor (e.g., size):
- Can't distinguish if returns come from factor exposure or pure alpha
- Exposed to factor risk
- Neutralization extracts pure alpha

In [None]:
# Generate basic momentum alpha
returns_20d = operator.ts_returns(close, period=20)
alpha_raw = operator.rank(returns_20d)

# Size factor (Dollar Volume as proxy)
dollar_volume = operator.mul(close, volume)
size_factor = operator.rank(operator.ts_mean(dollar_volume, 20))

# Check correlation
corr = operator.ts_corr(alpha_raw, size_factor, window=20)

print(f"Alpha-Size Correlation (last row):")
print(f"  Mean: {corr.iloc[-1].mean():.4f}")
print(f"  Range: [{corr.iloc[-1].min():.4f}, {corr.iloc[-1].max():.4f}]")

## 2. Cross-Sectional Demean (Simple Neutralization)

Simplest method: subtract mean at each timestamp to make dollar-neutral

In [None]:
# Simple demean
alpha_demeaned = operator.demean(alpha_raw)

print(f"Raw alpha sum: {alpha_raw.iloc[-1].sum():.4f}")
print(f"Demeaned alpha sum: {alpha_demeaned.iloc[-1].sum():.10f} (should be ~0)")

# Demean is dollar-neutral but not factor-neutral
corr_after = operator.ts_corr(alpha_demeaned, size_factor, window=20)
print(f"\nAlpha-Size Correlation after demean:")
print(f"  Mean: {corr_after.iloc[-1].mean():.4f}")

## 3. Regression-Based Neutralization

Remove factor exposure from alpha via regression:
```
alpha_neutral = alpha - beta * factor
where beta = cov(alpha, factor) / var(factor)
```

In [None]:
def neutralize_factor(alpha, factor, window=20):
    """Regression-based factor neutralization.
    
    alpha_neutral = alpha - beta * factor
    beta = cov(alpha, factor) / var(factor)
    """
    # Demean both (cross-sectional)
    alpha_dm = operator.demean(alpha)
    factor_dm = operator.demean(factor)
    
    # Rolling covariance and variance
    cov = operator.ts_cov(alpha_dm, factor_dm, window=window)
    var = operator.ts_std(factor_dm, window)
    var_sq = operator.mul(var, var)
    
    # Beta = cov / var
    beta = operator.div(cov, operator.add(var_sq, 1e-10))
    
    # Neutralized alpha
    neutralized = operator.sub(alpha_dm, operator.mul(beta, factor_dm))
    
    return neutralized


# Size neutralization
alpha_size_neutral = neutralize_factor(alpha_raw, size_factor)

# Check correlation
corr_neutral = operator.ts_corr(alpha_size_neutral, size_factor, window=20)

print(f"=== Size Neutralization ===")
print(f"Before - Alpha-Size Corr: {corr.iloc[-1].mean():.4f}")
print(f"After  - Alpha-Size Corr: {corr_neutral.iloc[-1].mean():.6f} (should be ~0)")

## 4. Volatility (Beta) Neutralization

Remove exposure to market volatility

In [None]:
# Volatility factor
returns_1d = operator.ts_returns(close, period=1)
volatility = operator.ts_std(returns_1d, 20)
vol_factor = operator.rank(volatility)

# Volatility neutralization
alpha_vol_neutral = neutralize_factor(alpha_raw, vol_factor)

# Check correlation
corr_vol_before = operator.ts_corr(alpha_raw, vol_factor, window=20)
corr_vol_after = operator.ts_corr(alpha_vol_neutral, vol_factor, window=20)

print(f"=== Volatility Neutralization ===")
print(f"Before - Alpha-Vol Corr: {corr_vol_before.iloc[-1].mean():.4f}")
print(f"After  - Alpha-Vol Corr: {corr_vol_after.iloc[-1].mean():.6f}")

## 5. Multi-Factor Neutralization (Sequential)

Neutralize multiple factors sequentially

In [None]:
def neutralize_multi_factors(alpha, factors, window=20):
    """Neutralize multiple factors sequentially.
    
    Args:
        alpha: Original alpha
        factors: [factor1, factor2, ...] List of factors
        window: Rolling window
    """
    result = alpha
    for factor in factors:
        result = neutralize_factor(result, factor, window)
    return result


# Size + Volatility neutralization
alpha_multi_neutral = neutralize_multi_factors(
    alpha_raw, 
    [size_factor, vol_factor]
)

# Check correlations
corr_size = operator.ts_corr(alpha_multi_neutral, size_factor, window=20)
corr_vol = operator.ts_corr(alpha_multi_neutral, vol_factor, window=20)

print(f"=== Multi-Factor Neutralization (Size + Vol) ===")
print(f"Alpha-Size Corr: {corr_size.iloc[-1].mean():.6f}")
print(f"Alpha-Vol Corr:  {corr_vol.iloc[-1].mean():.6f}")

## 6. Gram-Schmidt Orthogonalization

Orthogonalize factors to extract pure alpha

In [None]:
def orthogonalize(alpha, factors, window=20):
    """Remove factor exposure via Gram-Schmidt orthogonalization.
    
    Orthogonalize simultaneously against all factors.
    """
    alpha_dm = operator.demean(alpha)
    
    # Remove projection onto each factor
    result = alpha_dm
    
    for factor in factors:
        factor_dm = operator.demean(factor)
        
        # projection = (alpha · factor) / (factor · factor) * factor
        # Cross-sectional dot product approximation: rolling cov / var
        cov = operator.ts_cov(result, factor_dm, window=window)
        var = operator.ts_std(factor_dm, window)
        var_sq = operator.mul(var, var)
        
        coef = operator.div(cov, operator.add(var_sq, 1e-10))
        projection = operator.mul(coef, factor_dm)
        
        result = operator.sub(result, projection)
    
    return result


# Orthogonalization
alpha_ortho = orthogonalize(alpha_raw, [size_factor, vol_factor])

print(f"=== Orthogonalization ===")
print(f"Alpha-Size Corr: {operator.ts_corr(alpha_ortho, size_factor, window=20).iloc[-1].mean():.6f}")
print(f"Alpha-Vol Corr:  {operator.ts_corr(alpha_ortho, vol_factor, window=20).iloc[-1].mean():.6f}")

## 7. Winsorize + Neutralize Pipeline

Recommended pipeline for production use

In [None]:
def create_neutral_alpha(close, volume, lookback=20, neutralize_factors=None):
    """Neutralized alpha generation pipeline.
    
    1. Calculate raw signal
    2. Winsorize (remove outliers)
    3. Factor neutralization
    4. Rank + Normalize
    """
    # 1. Raw momentum signal
    returns = operator.ts_returns(close, period=lookback)
    
    # 2. Winsorize (3 std)
    returns_win = operator.winsorize(returns, std_mult=3)
    
    # 3. Rank
    alpha = operator.rank(returns_win)
    
    # 4. Factor neutralization (optional)
    if neutralize_factors:
        alpha = neutralize_multi_factors(alpha, neutralize_factors, window=lookback)
    else:
        alpha = operator.demean(alpha)
    
    # 5. Final normalization
    alpha = operator.l1_norm(alpha)
    
    return alpha


# Define factors
size_f = operator.rank(operator.ts_mean(operator.mul(close, volume), 20))
vol_f = operator.rank(operator.ts_std(operator.ts_returns(close, 1), 20))

# Without neutralization
alpha_raw = create_neutral_alpha(close, volume, neutralize_factors=None)

# Size neutralization
alpha_size_n = create_neutral_alpha(close, volume, neutralize_factors=[size_f])

# Size + Vol neutralization
alpha_multi_n = create_neutral_alpha(close, volume, neutralize_factors=[size_f, vol_f])

print(f"=== Pipeline Results ===")
print(f"Raw alpha abs sum:        {alpha_raw.iloc[-1].abs().sum():.4f}")
print(f"Size-neutral abs sum:     {alpha_size_n.iloc[-1].abs().sum():.4f}")
print(f"Multi-neutral abs sum:    {alpha_multi_n.iloc[-1].abs().sum():.4f}")

## 8. Backtest Comparison

In [None]:
from clyptq.strategy import Strategy
from clyptq.trading.engine import Engine


class RawMomentumStrategy(Strategy):
    """Momentum without neutralization."""
    name = "RawMomentum"
    data = {"ohlcv": OHLCVSpec(exchange="gateio", market_type="spot", timeframe="1d")}
    universe = CryptoLiquid(top_n=50, min_dollar_volume=100_000)
    rebalance_freq = "1d"
    
    def warmup_periods(self): return 30
    
    def compute_signal(self):
        close = self.provider["close"]
        returns = operator.ts_returns(close, period=20)
        signal = operator.rank(returns)
        return operator.l1_norm(signal)


class SizeNeutralStrategy(Strategy):
    """Size-neutralized momentum."""
    name = "SizeNeutral"
    data = {"ohlcv": OHLCVSpec(exchange="gateio", market_type="spot", timeframe="1d")}
    universe = CryptoLiquid(top_n=50, min_dollar_volume=100_000)
    rebalance_freq = "1d"
    
    def warmup_periods(self): return 30
    
    def compute_signal(self):
        close = self.provider["close"]
        volume = self.provider["volume"]
        
        # Alpha
        returns = operator.ts_returns(close, period=20)
        alpha = operator.rank(returns)
        
        # Size factor
        dollar_vol = operator.mul(close, volume)
        size = operator.rank(operator.ts_mean(dollar_vol, 20))

        # Neutralize
        alpha_dm = operator.demean(alpha)
        size_dm = operator.demean(size)
        
        cov = operator.ts_cov(alpha_dm, size_dm, window=20)
        var = operator.ts_std(size_dm, 20)
        var_sq = operator.mul(var, var)
        beta = operator.div(cov, operator.add(var_sq, 1e-10))
        
        neutral = operator.sub(alpha_dm, operator.mul(beta, size_dm))
        
        # Spot: long-only
        signal = operator.rank(neutral)
        return operator.l1_norm(signal)


# Backtest
engine = Engine()
START = datetime(2025, 10, 15)
END = datetime(2026, 1, 5)

results = {}

for name, strategy in [("Raw", RawMomentumStrategy()), ("SizeNeutral", SizeNeutralStrategy())]:
    result = engine.run(
        strategy=strategy,
        mode="backtest",
        start=START,
        end=END,
        initial_capital=10000.0,
        market_type="spot",
        verbose=False,
    )
    results[name] = result
    
    if result.metrics:
        print(f"{name}:")
        print(f"  Return: {result.metrics.total_return:.2%}")
        print(f"  Sharpe: {result.metrics.sharpe_ratio:.2f}")
        print(f"  MaxDD:  {result.metrics.max_drawdown:.2%}")
        print()

## Summary

### Factor Neutralization Methods

| Method | Description | Use Case |
|--------|-------------|----------|
| `demean` | Remove mean | Dollar-neutral |
| Regression | Remove beta * factor | Single factor neutral |
| Sequential | Sequential neutralization | Multi-factor |
| Orthogonalization | Simultaneous orthogonalization | Multi-factor (rigorous) |

### Key Operators

```python
# Basic neutralization
alpha_neutral = operator.demean(alpha)

# Factor neutralization (regression-based)
cov = operator.ts_cov(alpha, factor, window)
var = operator.ts_std(factor, window)
beta = operator.div(cov, operator.mul(var, var))
alpha_neutral = operator.sub(alpha, operator.mul(beta, factor))
```

### Best Practices

1. **Demean first**: Always demean alpha and factor before regression
2. **Winsorize**: Remove outliers before neutralization
3. **Rolling window**: Use sufficient lookback (20+ days)
4. **Numerical stability**: Add small value (1e-10) to variance

In [None]:
print("Factor Neutralization examples complete!")