# EquiLend Factor Development Notebook
*Interactive development and testing of new factors*

Use this notebook for:
- 🧪 **Experimenting with new factor ideas**
- 📊 **Testing factor performance on historical data**
- 🔍 **Debugging and refining factor calculations**
- 📈 **Visualizing factor behavior and distributions**

In [None]:
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import date

# Import existing factor classes for comparison
sys.path.append('../models')
from core_factors import ShortInterestMomentum, BorrowCostShock

print(f"Factor Development Environment - {date.today()}")
print("Ready for interactive factor development!")

## 1. Load Test Data

Start with sample or historical data for testing new factors.

In [None]:
# Create sample data for testing
np.random.seed(42)
dates = pd.date_range('2024-01-01', periods=100, freq='D')
tickers = [f'STOCK_{i:02d}' for i in range(20)]

# Generate sample securities lending data
data = []
for date in dates:
    for ticker in tickers:
        data.append({
            'Date': date,
            'ticker': ticker,
            'Fee All (BPS)': np.random.uniform(10, 500),
            'Active Utilization (%)': np.random.uniform(1, 95),
            'On Loan Quantity': np.random.uniform(100000, 10000000),
            'Short Interest': np.random.uniform(1000000, 50000000),
            'Average Daily Volume': np.random.uniform(500000, 5000000)
        })

df = pd.DataFrame(data)
print(f"Test dataset: {len(df)} rows, {df['Date'].nunique()} dates, {df['ticker'].nunique()} tickers")
display(df.head())

## 2. Develop New Factor Ideas

This section is for experimenting with new factor concepts.

In [None]:
# Example: Develop a new "Fee Volatility Spike" factor
class FeeVolatilitySpike:
    """Detect sudden spikes in fee volatility - experimental factor"""
    
    def __init__(self, fee_col='Fee All (BPS)', window=20, spike_threshold=2.0):
        self.fee_col = fee_col
        self.window = window
        self.spike_threshold = spike_threshold
    
    def calculate(self, df):
        """Calculate fee volatility spike factor"""
        # Group by ticker for time series calculations
        results = []
        
        for ticker in df['ticker'].unique():
            ticker_data = df[df['ticker'] == ticker].sort_values('Date')
            
            # Calculate rolling volatility
            fee_returns = ticker_data[self.fee_col].pct_change()
            rolling_vol = fee_returns.rolling(self.window).std()
            
            # Detect spikes: current vol vs average vol
            avg_vol = rolling_vol.rolling(self.window * 2).mean()
            vol_spike = rolling_vol / avg_vol
            
            # Create factor scores
            ticker_data = ticker_data.copy()
            ticker_data['Fee_Vol_Spike'] = vol_spike
            ticker_data['Vol_Spike_Flag'] = (vol_spike > self.spike_threshold).astype(int)
            
            results.append(ticker_data)
        
        return pd.concat(results, ignore_index=True)

# Test the new factor
fvs = FeeVolatilitySpike()
df_with_new_factor = fvs.calculate(df)

print("New Factor Statistics:")
print(df_with_new_factor[['Fee_Vol_Spike', 'Vol_Spike_Flag']].describe())

# Visualize the new factor
plt.figure(figsize=(12, 4))
df_with_new_factor['Fee_Vol_Spike'].hist(bins=30, alpha=0.7)
plt.title('Fee Volatility Spike Factor Distribution')
plt.xlabel('Volatility Spike Ratio')
plt.ylabel('Frequency')
plt.axvline(2.0, color='red', linestyle='--', label='Spike Threshold')
plt.legend()
plt.show()

## 3. Compare with Existing Factors

Test how the new factor performs relative to existing ones.

In [None]:
# Compare with existing factors
sim = ShortInterestMomentum()
bcs = BorrowCostShock()

# Add existing factors for comparison
try:
    df_comparison = df_with_new_factor.copy()
    df_comparison['SIM'] = sim.score(df_comparison)
    df_comparison['BCS'] = bcs.score(df_comparison)
    
    # Calculate correlations
    factor_cols = ['Fee_Vol_Spike', 'SIM', 'BCS']
    available_cols = [col for col in factor_cols if col in df_comparison.columns]
    
    if len(available_cols) > 1:
        correlation_matrix = df_comparison[available_cols].corr()
        
        plt.figure(figsize=(8, 6))
        sns.heatmap(correlation_matrix, annot=True, cmap='RdBu_r', center=0, square=True)
        plt.title('Factor Correlation: New vs Existing')
        plt.show()
        
        print("\nFactor Correlations:")
        print(correlation_matrix)
    
except Exception as e:
    print(f"Note: Could not compute all factor comparisons: {e}")
    print("This is normal when developing new factors.")

## 4. Performance Testing

Test factor performance with synthetic or historical returns.

In [None]:
# Generate synthetic forward returns for testing
np.random.seed(456)
df_with_new_factor['Forward_Return_5D'] = np.random.normal(0, 0.02, len(df_with_new_factor))

# Test predictive power
def test_factor_performance(df, factor_col, return_col='Forward_Return_5D'):
    """Simple IC test for factor performance"""
    valid_data = df[[factor_col, return_col]].dropna()
    
    if len(valid_data) < 20:
        return {'IC': 0, 'Hit_Rate': 0.5, 'N': len(valid_data)}
    
    ic = valid_data[factor_col].corr(valid_data[return_col])
    
    # Quintile analysis
    valid_data['Quintile'] = pd.qcut(valid_data[factor_col], 5, labels=False)
    quintile_returns = valid_data.groupby('Quintile')[return_col].mean()
    
    # Hit rate: top quintile outperforms bottom quintile
    hit_rate = (quintile_returns.iloc[-1] > quintile_returns.iloc[0])
    
    return {
        'IC': ic,
        'Hit_Rate': hit_rate,
        'Top_Quintile_Return': quintile_returns.iloc[-1],
        'Bottom_Quintile_Return': quintile_returns.iloc[0],
        'N': len(valid_data)
    }

# Test the new factor
performance = test_factor_performance(df_with_new_factor, 'Fee_Vol_Spike')

print("New Factor Performance Test:")
print(f"Information Coefficient: {performance['IC']:.4f}")
print(f"Hit Rate (Top > Bottom): {performance['Hit_Rate']}")
print(f"Top Quintile Return: {performance['Top_Quintile_Return']:.4f}")
print(f"Bottom Quintile Return: {performance['Bottom_Quintile_Return']:.4f}")
print(f"Sample Size: {performance['N']}")

print("\n📝 Development Notes:")
print("- This is synthetic data for demonstration")
print("- Real testing requires historical price data")
print("- Consider regime analysis and stability testing")
print("- Factor may need refinement based on results")

## 5. Export Successful Factors

Once a factor shows promise, export it to the models module.

In [None]:
# Example: Export new factor to models
def export_factor_to_module(factor_class, factor_name, module_path='../models/experimental_factors.py'):
    """Helper to export successful factors to a module file"""
    
    import inspect
    import os
    
    # Get the source code of the class
    source_code = inspect.getsource(factor_class)
    
    # Create or append to experimental factors module
    header = '''# Experimental Factors
# New factors under development and testing

import pandas as pd
import numpy as np
from typing import Optional

'''
    
    if not os.path.exists(module_path):
        with open(module_path, 'w') as f:
            f.write(header)
    
    # Append the new factor
    with open(module_path, 'a') as f:
        f.write(f"\n\n# {factor_name}\n")
        f.write(source_code)
        f.write("\n")
    
    print(f"✅ Factor '{factor_name}' exported to {module_path}")

# Export our experimental factor
# export_factor_to_module(FeeVolatilitySpike, "Fee Volatility Spike Factor")

print("💡 Factor Development Complete!")
print("\nNext steps:")
print("1. Test with real historical data")
print("2. Validate on out-of-sample period")
print("3. Export to production models if successful")
print("4. Integrate into daily analysis workflow")