In [3]:
import pandas as pd
import numpy as np

# --- 1. Create a Realistic Sample DataFrame ---
# In a real-world scenario, you would load your data here, e.g.,
# df = pd.read_csv('equilend_daily_data.csv', parse_dates=['date'])

# For this example, we'll create a sample dataset.
dates = pd.to_datetime(pd.date_range(start='2023-01-01', periods=180, freq='D'))
securities = ['Stock A', 'Stock B', 'Stock C', 'Stock D', 'Stock E', 'Stock F']
sectors = ['Tech', 'Tech', 'Healthcare', 'Healthcare', 'Industrials', 'Industrials']
security_sector_map = dict(zip(securities, sectors))

data = []
for date in dates:
    for security in securities:
        # Simulate some trends and randomness
        base_util = 40 + (securities.index(security) * 5) + np.random.uniform(-5, 5) + (date.dayofyear / 20)
        if security == 'Stock B': # Make one stock more volatile
            base_util += 20 * np.sin(date.dayofyear / 30)

        utilization = np.clip(base_util, 5, 98)
        borrow_fee = np.clip(0.25 + (utilization / 100)**2 * 10 + np.random.uniform(-0.2, 0.2), 0.1, 25)
        days_to_cover = np.clip(2 + (utilization / 100) * 15 + np.random.uniform(-1, 1), 1, 20)
        shares_on_loan = 1_000_000 * (utilization/50)

        data.append({
            'date': date,
            'security_id': security,
            'sector': security_sector_map[security],
            'utilization': utilization,
            'borrow_fee': borrow_fee,
            'days_to_cover': days_to_cover,
            'shares_on_loan': shares_on_loan
        })

df = pd.DataFrame(data)

# Sort data for time-series calculations
df = df.sort_values(by=['security_id', 'date']).reset_index(drop=True)


# --- 2. Dynamic Threshold Analysis (Utilization) ---
# Calculate a 60-day rolling average and standard deviation of utilization for each security.
# The window size (60 days) should be tested and optimized based on your specific needs.
df['util_rol_avg_60d'] = df.groupby('security_id')['utilization'].transform(lambda x: x.rolling(window=60, min_periods=10).mean())
df['util_rol_std_60d'] = df.groupby('security_id')['utilization'].transform(lambda x: x.rolling(window=60, min_periods=10).std())

# Define the dynamic threshold. Here, we use 1.5 standard deviations above the mean.
# This parameter (1.5) is a key candidate for backtesting and optimization.
df['dynamic_util_thresh'] = df['util_rol_avg_60d'] + (1.5 * df['util_rol_std_60d'])

# Generate the signal: 1 if utilization crosses above the dynamic threshold, 0 otherwise.
df['signal_dynamic_util'] = (df['utilization'] > df['dynamic_util_thresh']).astype(int)


# --- 3. Rate of Change Analysis (Shares on Loan) ---
# Calculate the percentage change in shares on loan over the last 30 days.
# This helps identify securities where short interest is accelerating rapidly.
df['sol_roc_30d_pct'] = df.groupby('security_id')['shares_on_loan'].transform(lambda x: x.pct_change(periods=30))

# Generate a signal if the rate of change is in the top 10% for that day.
# This identifies unusual acceleration compared to the rest of the market.
roc_threshold_90th_percentile = df.groupby('date')['sol_roc_30d_pct'].transform('quantile', 0.90)
df['signal_roc_sol'] = (df['sol_roc_30d_pct'] > roc_threshold_90th_percentile).astype(int)


# --- 4. Multi-Factor Gate Analysis (High Conviction Signal) ---
# This combines multiple conditions to create a more robust "gate".

# Gate 1: Static threshold for high utilization.
# We choose 80% as an example; this should be based on historical analysis.
df['gate1_high_util'] = (df['utilization'] > 80).astype(int)

# Gate 2: Static threshold for high Days to Cover.
# A high value can indicate higher risk for short sellers. We choose 10 days.
df['gate2_high_dtc'] = (df['days_to_cover'] > 10).astype(int)

# Gate 3: Sector-relative high borrow fee.
# A fee might be high for a utility but low for a tech stock.
# We'll flag fees in the top 20% (80th percentile) for their sector on any given day.
sector_fee_thresh_80th = df.groupby(['date', 'sector'])['borrow_fee'].transform('quantile', 0.80)
df['gate3_high_fee'] = (df['borrow_fee'] > sector_fee_thresh_80th).astype(int)


# The Final Multi-Factor Signal:
# This signal is triggered only if ALL gates are passed (are equal to 1).
df['MULTI_FACTOR_SIGNAL'] = (df['gate1_high_util'] & df['gate2_high_dtc'] & df['gate3_high_fee']).astype(int)


# --- 5. Final Output ---
# Display the results for a specific security to see the calculations in action.
final_df = df[df['security_id'] == 'Stock B'].tail(15)
output_columns = [
    'date', 'security_id', 'utilization', 'dynamic_util_thresh', 'signal_dynamic_util',
    'sol_roc_30d_pct', 'signal_roc_sol', 'borrow_fee', 'days_to_cover',
    'gate1_high_util', 'gate2_high_dtc', 'gate3_high_fee', 'MULTI_FACTOR_SIGNAL'
]
final_df = final_df[output_columns]
final_df.to_csv('threshold_analysis_output.csv', index=False)

OSError: [Errno 30] Read-only file system: 'threshold_analysis_output.csv'