In [None]:
# Securities Lending-Based Stock Performance Prediction Models

This notebook outlines three model templates that leverage key securities finance factors to predict stock performance. These models range from straightforward factor ranking to a more sophisticated cross-asset strategy, all designed to identify potential underperformers and generate alpha.

---

## Model Template 1: Factor Ranking System

**Objective:** To systematically screen and rank a universe of securities to identify those with the highest probability of underperformance based on securities lending data.

This model can be implemented using single factors or a composite of multiple factors.

### A. Single-Factor Ranking

This is the most direct approach. The model would ingest daily securities finance data for a defined universe of stocks and rank them based on a single, highly predictive factor.

**Methodology:**
1.  **Select a universe of securities:** Examples include US Total Cap, FTSE Developed Europe.
2.  **Choose a primary ranking factor:** Research indicates the following are among the most potent single factors for predicting underperformance:
    * **Indicative Fee:** Identified as the top-performing metric in US Large Cap, US Small Cap, and European markets.
    * **Demand Supply Ratio (DSR):** Measures borrow demand relative to lendable inventory; strong performance, particularly in US Small Caps and the Developed Pacific region.
    * **Active Utilization:** A powerful predictor, especially in European markets, consistently stronger than simple short interest metrics.
3.  **Daily Ranking:** Each day, rank all securities in the universe from highest to lowest based on the chosen factor's value. A higher rank indicates a higher likelihood of future underperformance.
4.  **Output:** A simple, ranked list of securities that can be used to generate trading ideas or as an input for risk management.

### *Implementation Considerations (Single-Factor):*
* Data ingestion and preprocessing for the selected universe.
* Calculation of the chosen single factor (Indicative Fee, DSR, or Active Utilization) for all securities.
* Daily ranking algorithm to sort securities based on the factor value.
* Storing and presenting the ranked list.

```python
import pandas as pd
import numpy as np

# --- Dummy Data Generation (replace with actual data loading) ---
# For demonstration purposes, let's create a dummy DataFrame
# In a real scenario, you would load your daily securities finance data here.
dates = pd.to_datetime(pd.date_range(start='2023-01-01', periods=10, freq='D'))
security_ids = ['AAPL', 'GOOGL', 'MSFT', 'AMZN', 'TSLA']
data = []
for date in dates:
    for sec_id in security_ids:
        data.append({
            'Date': date,
            'SecurityID': sec_id,
            'IndicativeFee': np.random.uniform(0.01, 5.0), # Example values
            'DemandSupplyRatio': np.random.uniform(0.1, 10.0),
            'ActiveUtilization': np.random.uniform(0.05, 0.95),
            'DailyReturn': np.random.uniform(-0.02, 0.02) # Added for portfolio examples
        })
df_securities_data = pd.DataFrame(data)

def single_factor_ranking(df, date, factor_name):
    """
    Ranks securities for a given date based on a single factor.
    Higher factor value indicates higher underperformance likelihood.
    """
    daily_df = df[df['Date'] == date].copy()
    if daily_df.empty:
        return pd.DataFrame()
    daily_df['Rank'] = daily_df[factor_name].rank(ascending=False, method='min')
    return daily_df.sort_values(by='Rank')

# Example Usage: Rank for a specific date using 'IndicativeFee'
date_to_rank = pd.to_datetime('2023-01-05')
ranked_securities = single_factor_ranking(df_securities_data, date_to_rank, 'IndicativeFee')
print(f"Single-Factor Ranking for {date_to_rank.strftime('%Y-%m-%d')} (Factor: Indicative Fee):\n", ranked_securities)

In [None]:
# --- Continue with df_securities_data from above ---

def composite_factor_ranking(df, date, factors, weights=None):
    """
    Ranks securities for a given date based on a composite score of multiple factors.
    """
    daily_df = df[df['Date'] == date].copy()
    if daily_df.empty:
        return pd.DataFrame()

    if weights is None:
        weights = [1.0] * len(factors) # Equal weights by default
    elif len(weights) != len(factors):
        raise ValueError("Number of weights must match number of factors.")

    # Calculate percentile ranks for each factor
    for factor in factors:
        # Assuming higher factor value means higher underperformance probability, so rank higher values as higher percentile
        daily_df[f'{factor}_Rank'] = daily_df[factor].rank(pct=True, ascending=True) * 100

    # Calculate composite score (sum of weighted ranks)
    daily_df['CompositeScore'] = 0
    for i, factor in enumerate(factors):
        daily_df['CompositeScore'] += daily_df[f'{factor}_Rank'] * weights[i]

    # Final ranking based on composite score (higher score = higher rank for underperformance)
    daily_df['CompositeRank'] = daily_df['CompositeScore'].rank(ascending=False, method='min')
    return daily_df.sort_values(by='CompositeRank')

# Example Usage: Rank for a specific date using a composite of factors
date_to_rank = pd.to_datetime('2023-01-05')
composite_factors = ['IndicativeFee', 'ActiveUtilization']
# Example with equal weights, you could also define custom weights like [0.6, 0.4]
ranked_securities_composite = composite_factor_ranking(df_securities_data, date_to_rank, composite_factors)
print(f"\nComposite-Factor Ranking for {date_to_rank.strftime('%Y-%m-%d')}:\n", ranked_securities_composite)