# Feature Analysis: 4-Factor Model Signals

Analyze external macro, risk premium, adoption, and institutional signals.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from data_acquisition import DataAcquisition
from feature_engineering import FeatureEngineer

plt.style.use('seaborn-v0_8-darkgrid')
%matplotlib inline

# Load data and engineer features
data_acq = DataAcquisition('config.yaml')
dataset = data_acq.fetch_full_dataset()

engineer = FeatureEngineer('config.yaml')
features = engineer.engineer_all_features(dataset['prices'], dataset['events'])
target = engineer.create_target_variable(dataset['prices'])

print(f"Features shape: {features.shape}")
print(f"Feature names: {list(features.columns)}")

## 1. External Macro Signal

In [None]:
fig, ax = plt.subplots(figsize=(14, 6))
ax.plot(features.index, features['external_macro'], linewidth=2, color='darkblue')
ax.axhline(0, color='black', linestyle='--', alpha=0.5)
ax.axvspan(pd.Timestamp('2022-01-01'), pd.Timestamp('2022-12-31'), alpha=0.2, color='orange', label='Fed Tightening')
ax.set_xlabel('Date', fontsize=12)
ax.set_ylabel('Z-Score', fontsize=12)
ax.set_title('External Macro Signal: Z-Score(Δ(Treasury) + Δ(VIX))', fontsize=14, fontweight='bold')
ax.legend(loc='best')
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

print("\nExternal Macro Signal Statistics:")
print(f"  Mean: {features['external_macro'].mean():.4f}")
print(f"  Std: {features['external_macro'].std():.4f}")
print(f"  Min: {features['external_macro'].min():.4f}")
print(f"  Max: {features['external_macro'].max():.4f}")
print(f"\n  2022 Mean: {features[features.index.year == 2022]['external_macro'].mean():.4f}")

## 2. Crypto Risk Premium Signal

In [None]:
fig, axes = plt.subplots(2, 1, figsize=(14, 10), sharex=True)

# 5-day risk premium
axes[0].plot(features.index, features['crypto_risk_premium_5d'], linewidth=2, color='darkgreen')
axes[0].axhline(0, color='black', linestyle='--', alpha=0.5)
axes[0].axvspan(pd.Timestamp('2020-03-01'), pd.Timestamp('2020-04-01'), alpha=0.2, color='red', label='COVID')
axes[0].axvspan(pd.Timestamp('2022-11-01'), pd.Timestamp('2022-12-01'), alpha=0.2, color='purple', label='FTX')
axes[0].set_ylabel('Risk Premium (5d)', fontsize=12)
axes[0].set_title('Crypto Risk Premium Signal (5-day)', fontsize=13, fontweight='bold')
axes[0].legend(loc='best')
axes[0].grid(True, alpha=0.3)

# 20-day risk premium
axes[1].plot(features.index, features['crypto_risk_premium_20d'], linewidth=2, color='teal')
axes[1].axhline(0, color='black', linestyle='--', alpha=0.5)
axes[1].axvspan(pd.Timestamp('2020-03-01'), pd.Timestamp('2020-04-01'), alpha=0.2, color='red')
axes[1].axvspan(pd.Timestamp('2022-11-01'), pd.Timestamp('2022-12-01'), alpha=0.2, color='purple')
axes[1].set_ylabel('Risk Premium (20d)', fontsize=12)
axes[1].set_xlabel('Date', fontsize=12)
axes[1].set_title('Crypto Risk Premium Signal (20-day)', fontsize=13, fontweight='bold')
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print("\nRisk Premium Statistics:")
print(f"  5d Mean: {features['crypto_risk_premium_5d'].mean():.4f}")
print(f"  20d Mean: {features['crypto_risk_premium_20d'].mean():.4f}")

## 3. Adoption Signal

In [None]:
fig, ax = plt.subplots(figsize=(14, 6))
ax.plot(features.index, features['adoption_signal'], linewidth=2, color='purple')
ax.axhline(0, color='black', linestyle='--', alpha=0.5)
ax.axvspan(pd.Timestamp('2020-01-01'), pd.Timestamp('2021-12-31'), alpha=0.2, color='green', label='Bull Run')
ax.axvspan(pd.Timestamp('2022-01-01'), pd.Timestamp('2022-12-31'), alpha=0.2, color='red', label='Bear Market')
ax.set_xlabel('Date', fontsize=12)
ax.set_ylabel('Adoption Signal', fontsize=12)
ax.set_title('Adoption Signal: Δ(Log(Crypto MCap ex-Stablecoins))', fontsize=14, fontweight='bold')
ax.legend(loc='best')
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

print("\nAdoption Signal Statistics:")
print(f"  Mean: {features['adoption_signal'].mean():.4f}")
print(f"  Bull Run (2020-2021) Mean: {features[(features.index >= '2020-01-01') & (features.index <= '2021-12-31')]['adoption_signal'].mean():.4f}")
print(f"  Bear Market (2022) Mean: {features[features.index.year == 2022]['adoption_signal'].mean():.4f}")

## 4. Institutional Signal

In [None]:
fig, ax = plt.subplots(figsize=(14, 6))
ax.plot(features.index, features['institutional_signal'], linewidth=2, color='darkred', drawstyle='steps-post')
ax.axhline(0, color='black', linestyle='--', alpha=0.5)
ax.set_xlabel('Date', fontsize=12)
ax.set_ylabel('Institutional Signal', fontsize=12)
ax.set_title('Institutional Validation Signal (±1 for 30 days post-event)', fontsize=14, fontweight='bold')
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

print("\nInstitutional Signal:")
print(f"  Active periods: {(features['institutional_signal'] != 0).sum()} days")
print(f"  Positive events: {(features['institutional_signal'] > 0).sum()} days")
print(f"  Negative events: {(features['institutional_signal'] < 0).sum()} days")

## 5. Feature Correlation with Target (Next-Day BTC Returns)

In [None]:
# Calculate correlations
correlations = {}
for col in features.columns:
    aligned_features = features[col].dropna()
    aligned_target = target.reindex(aligned_features.index).dropna()
    common_idx = aligned_features.index.intersection(aligned_target.index)
    
    if len(common_idx) > 0:
        corr = np.corrcoef(
            aligned_features[common_idx],
            aligned_target[common_idx]
        )[0, 1]
        correlations[col] = corr

# Sort by absolute correlation
sorted_corr = sorted(correlations.items(), key=lambda x: abs(x[1]), reverse=True)

# Plot
fig, ax = plt.subplots(figsize=(10, 8))
features_list = [x[0] for x in sorted_corr]
corr_values = [x[1] for x in sorted_corr]

colors = ['green' if c > 0 else 'red' for c in corr_values]
ax.barh(features_list, corr_values, color=colors, alpha=0.7)
ax.axvline(0, color='black', linestyle='--', alpha=0.5)
ax.set_xlabel('Correlation', fontsize=12)
ax.set_title('Feature Correlation with Next-Day BTC Returns', fontsize=14, fontweight='bold')
ax.grid(True, alpha=0.3, axis='x')
plt.tight_layout()
plt.show()

print("\nTop 5 Features by Correlation:")
for feature, corr in sorted_corr[:5]:
    print(f"  {feature}: {corr:.4f}")

## 6. Interaction Terms Analysis

In [None]:
if 'macro_risk_compound' in features.columns:
    fig, ax = plt.subplots(figsize=(14, 6))
    ax.plot(features.index, features['macro_risk_compound'], linewidth=2, color='darkorange')
    ax.axhline(0, color='black', linestyle='--', alpha=0.5)
    ax.axvspan(pd.Timestamp('2020-03-01'), pd.Timestamp('2020-04-01'), alpha=0.2, color='red', label='COVID')
    ax.axvspan(pd.Timestamp('2022-11-01'), pd.Timestamp('2022-12-01'), alpha=0.2, color='purple', label='FTX')
    ax.set_xlabel('Date', fontsize=12)
    ax.set_ylabel('Compound Signal', fontsize=12)
    ax.set_title('Macro-Risk Compound Signal (External Macro × Risk Premium)', fontsize=14, fontweight='bold')
    ax.legend(loc='best')
    ax.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()
    
    print("\nCompound Signal during stress periods:")
    covid_period = features[(features.index >= '2020-03-01') & (features.index <= '2020-04-01')]
    ftx_period = features[(features.index >= '2022-11-01') & (features.index <= '2022-12-01')]
    
    print(f"  COVID (Mar 2020): {covid_period['macro_risk_compound'].mean():.4f}")
    print(f"  FTX (Nov 2022): {ftx_period['macro_risk_compound'].mean():.4f}")

## 7. Historical Validation: Do Signals Behave as Expected?

In [None]:
print("Historical Event Validation:")
print("="*60)

# COVID (Mar 2020): Risk premium should spike
covid_period = features[(features.index >= '2020-03-01') & (features.index <= '2020-04-01')]
print("\nCOVID Crash (March 2020):")
print(f"  External Macro (should be elevated): {covid_period['external_macro'].mean():.4f}")
print(f"  Risk Premium 5d (should spike): {covid_period['crypto_risk_premium_5d'].mean():.4f}")

# Fed Tightening (2022): Persistent negative macro signal
fed_period = features[features.index.year == 2022]
print("\nFed Tightening (2022):")
print(f"  External Macro (should be positive/elevated): {fed_period['external_macro'].mean():.4f}")
print(f"  Adoption Signal (should be negative): {fed_period['adoption_signal'].mean():.4f}")

# FTX Collapse (Nov 2022): Risk premium spike
ftx_period = features[(features.index >= '2022-11-01') & (features.index <= '2022-12-01')]
print("\nFTX Collapse (November 2022):")
print(f"  Risk Premium 5d (should spike): {ftx_period['crypto_risk_premium_5d'].mean():.4f}")
print(f"  Institutional Signal (should be -1): {ftx_period['institutional_signal'].mean():.4f}")