# ClyptQ Operators & Transforms

Testing various features of the operator module:
1. Time-series operators (ts_*)
2. Cross-sectional operators (rank, demean, etc.)
3. Cross-alpha operators (ca_*)
4. Arithmetic operators
5. Transform pipeline

In [None]:
%load_ext autoreload
%autoreload 2

from datetime import datetime
import pandas as pd
import numpy as np

from clyptq import operator
from clyptq.data.provider import DataProvider
from clyptq.data.spec import OHLCVSpec
from clyptq.universe import CryptoLiquid

print("Imports successful!")

In [None]:
# Load data
ohlcv_spec = OHLCVSpec(exchange="gateio", market_type="spot", timeframe="1d")
universe = CryptoLiquid(top_n=30, min_dollar_volume=100_000)

p = DataProvider(
    universe=universe,
    specs={"ohlcv": ohlcv_spec},
    rebalance_freq="1d",
    mode="research",
)
p.load(start=datetime(2025, 10, 15), end=datetime(2026, 1, 5))

close = p["close"]
volume = p["volume"]
high = p["high"]
low = p["low"]

print(f"Data shape: {close.shape}")

## 1. Time-Series Operators (ts_*)

Time-axis operations (rolling window)

In [None]:
print("=== Time-Series Operators ===")

# Rolling statistics
ts_mean_20 = operator.ts_mean(close, 20)
ts_std_20 = operator.ts_std(close, 20)
ts_min_20 = operator.ts_min(close, 20)
ts_max_20 = operator.ts_max(close, 20)
ts_sum_20 = operator.ts_sum(volume, 20)

print(f"ts_mean(20): {ts_mean_20.shape}")
print(f"ts_std(20): {ts_std_20.shape}")
print(f"ts_min(20): {ts_min_20.shape}")
print(f"ts_max(20): {ts_max_20.shape}")
print(f"ts_sum(20): {ts_sum_20.shape}")

# Returns
returns_1d = operator.ts_returns(close, period=1)
returns_5d = operator.ts_returns(close, period=5)
returns_20d = operator.ts_returns(close, period=20)

print(f"\nts_returns(1): {returns_1d.shape}")
print(f"ts_returns(5): {returns_5d.shape}")
print(f"ts_returns(20): {returns_20d.shape}")

# Lag/Delay
lagged_1 = operator.ts_delay(close, 1)
lagged_5 = operator.ts_delay(close, 5)

print(f"\nts_delay(1): {lagged_1.shape}")
print(f"ts_delay(5): {lagged_5.shape}")

# Delta
delta_1 = operator.ts_delta(close, 1)
delta_5 = operator.ts_delta(close, 5)

print(f"\nts_delta(1): {delta_1.shape}")
print(f"ts_delta(5): {delta_5.shape}")

# Rank within rolling window
ts_rank_20 = operator.ts_rank(close, 20)
print(f"\nts_rank(20): {ts_rank_20.shape}, range: [{ts_rank_20.min().min():.2f}, {ts_rank_20.max().max():.2f}]")

In [None]:
# Rolling correlation & covariance
print("=== Rolling Correlation/Covariance ===")

ts_corr = operator.ts_corr(close, volume, window=20)
ts_cov = operator.ts_cov(close, volume, window=20)

print(f"ts_corr(close, volume, 20): {ts_corr.shape}")
print(f"ts_cov(close, volume, 20): {ts_cov.shape}")

# Sample values
print(f"\nCorrelation sample (last row, first 5 symbols):")
print(ts_corr.iloc[-1].dropna().head())

## 2. Cross-Sectional Operators

Operations across symbols at each timestamp (row-wise)

In [None]:
print("=== Cross-Sectional Operators ===")

# Rank: symbol ranking at each timestamp (0~1)
ranked = operator.rank(returns_20d)
print(f"rank: shape={ranked.shape}, range=[{ranked.min().min():.2f}, {ranked.max().max():.2f}]")

# Demean: remove mean at each timestamp
demeaned = operator.demean(ranked)
print(f"demean: row_mean={demeaned.iloc[-1].mean():.10f} (should be ~0)")

# Z-score normalization
zscored = operator.zscore(returns_20d)
print(f"zscore: row_mean={zscored.iloc[-1].mean():.6f}, row_std={zscored.iloc[-1].std():.6f}")

# L1 norm: sum of |weights| = 1
l1_normed = operator.l1_norm(ranked)
print(f"l1_norm: sum={l1_normed.iloc[-1].sum():.6f} (should be 1)")

# L2 norm: sum of weights^2 = 1
l2_normed = operator.l2_norm(ranked)
print(f"l2_norm: sum_sq={(l2_normed.iloc[-1]**2).sum():.6f} (should be 1)")

In [None]:
# Winsorize: cap extreme values (mean ± std_mult * std)
print("\n=== Winsorize ===")

# std_mult=3: cap at mean ± 3*std
winsorized = operator.winsorize(returns_20d, std_mult=3)
print(f"Original range: [{returns_20d.min().min():.4f}, {returns_20d.max().max():.4f}]")
print(f"Winsorized range (3 std): [{winsorized.min().min():.4f}, {winsorized.max().max():.4f}]")

In [None]:
# Clip: limit value range
print("\n=== Clip ===")

signal = operator.demean(operator.rank(returns_20d))
clipped = operator.clip(signal, lower=-0.1, upper=0.1)

print(f"Original range: [{signal.min().min():.4f}, {signal.max().max():.4f}]")
print(f"Clipped range: [{clipped.min().min():.4f}, {clipped.max().max():.4f}]")

## 3. Cross-Alpha Operators (ca_*)

Combining multiple alpha signals

In [None]:
print("=== Cross-Alpha Operators ===")

# Create multiple alphas
alpha1 = operator.rank(operator.ts_returns(close, period=5))
alpha2 = operator.rank(operator.ts_returns(close, period=10))
alpha3 = operator.rank(operator.ts_returns(close, period=20))

print(f"Alpha1 (5d momentum): {alpha1.shape}")
print(f"Alpha2 (10d momentum): {alpha2.shape}")
print(f"Alpha3 (20d momentum): {alpha3.shape}")

# Average
avg = operator.ca_reduce_avg(alpha1, alpha2, alpha3)
print(f"\nca_reduce_avg: {avg.shape}")

# Sum
summed = operator.ca_reduce_sum(alpha1, alpha2, alpha3)
print(f"ca_reduce_sum: {summed.shape}")

# Max/Min
maxed = operator.ca_reduce_max(alpha1, alpha2, alpha3)
mined = operator.ca_reduce_min(alpha1, alpha2, alpha3)
print(f"ca_reduce_max: {maxed.shape}")
print(f"ca_reduce_min: {mined.shape}")

# Weighted average
weights = [0.5, 0.3, 0.2]
weighted = operator.ca_weighted_sum(alpha1, alpha2, alpha3, weights=weights)
print(f"\nca_weighted_sum (w={weights}): {weighted.shape}")

# Standard deviation (alpha disagreement)
std = operator.ca_reduce_stddev(alpha1, alpha2, alpha3)
print(f"ca_reduce_stddev: {std.shape}")

# Median (robust to outliers)
median = operator.ca_reduce_median(alpha1, alpha2, alpha3)
print(f"ca_reduce_median: {median.shape}")

## 4. Arithmetic Operators

In [None]:
print("=== Arithmetic Operators ===")

# Basic arithmetic
a = operator.add(close, 100)
b = operator.sub(close, high)
c = operator.mul(close, volume)  # Dollar volume
d = operator.div(close, operator.add(volume, 1e-8))

print(f"add(close, 100): {a.shape}")
print(f"sub(close, high): {b.shape}")
print(f"mul(close, volume): {c.shape}")
print(f"div(close, volume): {d.shape}")

# Unary operations
negated = operator.neg(returns_20d)
absoluted = operator.abs(returns_20d)
signed = operator.sign(returns_20d)

print(f"\nneg: {negated.shape}")
print(f"abs: {absoluted.shape}")
print(f"sign: unique values = {signed.iloc[-1].dropna().unique()}")

# Math functions
logged = operator.log(operator.add(close, 1))  # log(1+x)
sqrted = operator.sqrt(operator.abs(returns_20d))
powered = operator.pow(returns_20d, 2)

print(f"\nlog(1+close): {logged.shape}")
print(f"sqrt(|returns|): {sqrted.shape}")
print(f"pow(returns, 2): {powered.shape}")

## 5. Comparison & Logical Operators

In [None]:
print("=== Comparison Operators ===")

# Comparison
gt_ma = operator.gt(close, ts_mean_20)  # close > MA20
lt_ma = operator.lt(close, ts_mean_20)  # close < MA20

print(f"gt(close, MA20): {gt_ma.sum().sum()} True values")
print(f"lt(close, MA20): {lt_ma.sum().sum()} True values")

# Logical
both = operator.logical_and(gt_ma, operator.gt(volume, operator.ts_mean(volume, 20)))
either = operator.logical_or(gt_ma, lt_ma)

print(f"\nlogical_and: {both.sum().sum()} True values")
print(f"logical_or: {either.sum().sum()} True values")

# Where (conditional)
# If returns positive, keep; if negative, set to 0
positive_only = operator.where(operator.gt(returns_20d, 0), returns_20d, 0)
print(f"\nwhere(returns > 0, returns, 0): min={positive_only.min().min():.4f}")

## 6. Missing Data Handling

In [None]:
print("=== Missing Data ===")

# Check NaN
is_na = operator.isna(returns_20d)
not_na = operator.notna(returns_20d)

print(f"isna: {is_na.sum().sum()} NaN values")
print(f"notna: {not_na.sum().sum()} non-NaN values")

# Fill NaN
filled_zero = operator.fillna(returns_20d, 0)
print(f"\nfillna(0): {filled_zero.isna().sum().sum()} NaN remaining")

# Forward fill
ffilled = operator.ffill(returns_20d)
print(f"ffill: {ffilled.isna().sum().sum()} NaN remaining")

## 7. Transform Pipeline Example

In [None]:
print("=== Transform Pipeline ===")

# Momentum signal generation pipeline
def momentum_signal(close, period=20):
    """Momentum signal pipeline."""
    # 1. Calculate returns
    returns = operator.ts_returns(close, period=period)
    
    # 2. Winsorize (remove outliers) - mean ± 3*std
    returns = operator.winsorize(returns, std_mult=3)
    
    # 3. Rank (0~1)
    ranked = operator.rank(returns)
    
    # 4. Demean (long/short)
    demeaned = operator.demean(ranked)
    
    # 5. L1 normalize
    normalized = operator.l1_norm(demeaned)
    
    return normalized

signal = momentum_signal(close, period=20)

print(f"Pipeline output:")
print(f"  - Shape: {signal.shape}")
print(f"  - Row sum (should be 0): {signal.iloc[-1].sum():.10f}")
print(f"  - Abs row sum (should be 1): {signal.iloc[-1].abs().sum():.6f}")
print(f"  - Range: [{signal.min().min():.4f}, {signal.max().max():.4f}]")

## 8. Multi-Timeframe Example

In [None]:
print("=== Multi-Timeframe ===")

# 1d, 3d, 5d data (auto ffill)
close_1d = p["close"]
close_3d = p["close", "3d"]
close_5d = p["close", "5d"]

print(f"1d shape: {close_1d.shape}")
print(f"3d shape: {close_3d.shape} (auto-aligned to 1d)")
print(f"5d shape: {close_5d.shape} (auto-aligned to 1d)")

# Calculate momentum for each timeframe
alpha_1d = operator.rank(operator.ts_returns(close_1d, period=5))
alpha_3d = operator.rank(operator.ts_returns(close_3d, period=3))
alpha_5d = operator.rank(operator.ts_returns(close_5d, period=5))

# Combine
combined = operator.ca_weighted_sum(
    alpha_1d, alpha_3d, alpha_5d,
    weights=[0.5, 0.3, 0.2]
)

# Normalize
final = operator.l1_norm(operator.demean(combined))

print(f"\nCombined signal:")
print(f"  - Shape: {final.shape}")
print(f"  - Abs sum: {final.iloc[-1].abs().sum():.6f}")

## Summary

### Operator Categories

| Category | Examples | Description |
|----------|----------|-------------|
| Time-series (ts_*) | ts_mean, ts_std, ts_returns, ts_delay | Time-axis rolling operations |
| Cross-sectional | rank, demean, zscore, l1_norm | Operations across symbols |
| Cross-alpha (ca_*) | ca_reduce_avg, ca_weighted_sum | Alpha combination |
| Arithmetic | add, sub, mul, div, neg, abs | Basic operations |
| Comparison | gt, lt, eq, where | Comparison/conditional |
| Missing | fillna, ffill, isna | Missing data handling |

In [None]:
print("Operators & Transforms test complete!")