# Hurst Exponent Trading Strategy: Momentum vs Mean Reversion

## Project Overview
This notebook implements a research-based trading strategy that combines:
- **Momentum Strategy** (SMA Crossover): Profits from trending markets
- **Mean Reversion Strategy** (Bollinger Bands): Profits from oscillating markets
- **Hurst Exponent**: Mathematical tool to classify time series as trending (H>0.5) or mean-reverting (H<0.5)

## Key Hypothesis
- Use momentum strategy on trending stocks (H > 0.5)
- Use mean reversion strategy on mean-reverting stocks (H < 0.5)
- This should optimize returns by matching strategy to market behavior

## Data & Methodology
- **100 pre-selected stocks** from diverse sectors and market caps
- **2024 data split**: Jan-June (Hurst calculation) | July-December (Strategy implementation)
- **Risk-adjustable**: Interactive section to customize risk tolerance

*Based on research by Chang, Lizardi & Shah (Cornell Data Science, 2022)*
"""


LINK TO REPORT: https://drive.google.com/file/d/1eps_RHUWOq_C9Mvixg2HFxcrvtvknC60/view?usp=sharing

# Pre-Selected 100 Stocks for Analysis
# Selected based on:
# 1. Market cap > $2B (following Cornell paper methodology)
# 2. Sector diversification across 11 major sectors
# 3. Expected mix of trending and mean-reverting behavior
# 4. Data availability for 2024

In [None]:
!pip install yfinance hurst ta --quiet

import os, pandas as pd
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from hurst import compute_Hc

CSV_PATH = 'selected_100_stocks.csv'

if not os.path.exists(CSV_PATH):
    # ------ auto-generate the file ------
    RAW_URL = "https://raw.githubusercontent.com/Aaditya-P-Sahay/Optimizing-Returns-Using-the-Hurst-Exponent-on-Momentum-and-Mean-Reversion-Strategies/refs/heads/main/selected_100_stocks.csv"
    stocks_df = pd.read_csv(RAW_URL)

    stocks_df.to_csv(CSV_PATH, index=False)
    print('CSV downloaded and saved locally!')
else:
    stocks_df = pd.read_csv(CSV_PATH)
    print('CSV already present, loaded into DataFrame.')


In [None]:
print("🎯 Analysis Plan:")
print("• Period: January 1, 2024 - December 31, 2024")
print("• First Half (Jan-Jun): Calculate Hurst exponents")
print("• Second Half (Jul-Dec): Apply trading strategies")
print("• Data: Daily OHLC from Yahoo Finance")

# Display stock distribution by sector
sector_counts = stocks_df['Sector'].value_counts()
print(f"\n📈 Sector Diversification ({len(stocks_df)} stocks):")
for sector, count in sector_counts.items():
    print(f"  {sector}: {count} stocks")


#  Hurst Exponent Calculation for Stock Classification

In [None]:


import seaborn as sns

# Download first half 2024 data for Hurst calculation
def calculate_hurst_exponents(stock_list):
    hurst_results = []

    for symbol in stock_list:
        try:
            # Download first half of 2024
            data = yf.download(symbol, start='2024-01-01', end='2024-06-30', progress=False)

            # Calculate Hurst exponent using closing prices
            prices = data['Close'].dropna()
            H, c, data_points = compute_Hc(prices, kind='price', simplified=True)

            hurst_results.append({
                'Symbol': symbol,
                'Hurst_Exponent': H,
                'Classification': 'Trending' if H > 0.5 else 'Mean Reverting',
                'Data_Points': len(prices)
            })

        except Exception as e:
            print(f"Error processing {symbol}: {e}")

    return pd.DataFrame(hurst_results)

# Calculate Hurst exponents
hurst_df = calculate_hurst_exponents(stocks_df['Symbol'].tolist())

# Display results in a formatted table
print("📋 Hurst Exponent Results:")
display(hurst_df.head(100))

# Summary statistics
print(f"\n📊 Classification Summary:")
print(f"Trending stocks (H > 0.5): {sum(hurst_df['Hurst_Exponent'] > 0.5)}")
print(f"Mean Reverting stocks (H < 0.5): {sum(hurst_df['Hurst_Exponent'] < 0.5)}")
print(f"Average Hurst Exponent: {hurst_df['Hurst_Exponent'].mean():.3f}")


#  Simple Moving Average (SMA) Crossover Strategy - Momentum Trading

"""

    Implements SMA crossover strategy on all the 100 stocks with PROPER crossover detection:
    - Short-term SMA: 5 days
    - Long-term SMA: 10 days
    - Buy signal: Short SMA crosses above Long SMA (Golden Cross)
    - Sell signal: Short SMA crosses below Long SMA (Death Cross)

"""

In [None]:


def sma_crossover_strategy(symbol, start_date='2024-07-01', end_date='2024-12-31'):

    try:
        # Download second half 2024 data
        data = yf.download(symbol, start=start_date, end=end_date, progress=False)

        if len(data) < 15:  # Need at least 15 days for proper calculation
            return None

        # Calculate SMAs
        data['SMA_5'] = data['Close'].rolling(window=5).mean()
        data['SMA_10'] = data['Close'].rolling(window=10).mean()

        # Initialize signals and positions
        data['Signal'] = 0
        data['Position'] = 0

        # Generate crossover signals only when actual crosses occur
        for i in range(1, len(data)):
            if (pd.notna(data['SMA_5'].iloc[i]) and pd.notna(data['SMA_10'].iloc[i]) and
                pd.notna(data['SMA_5'].iloc[i-1]) and pd.notna(data['SMA_10'].iloc[i-1])):

                # Golden Cross: 5-day crosses above 10-day
                if (data['SMA_5'].iloc[i] > data['SMA_10'].iloc[i] and
                    data['SMA_5'].iloc[i-1] <= data['SMA_10'].iloc[i-1]):
                    data.iloc[i, data.columns.get_loc('Signal')] = 1

                # Death Cross: 5-day crosses below 10-day
                elif (data['SMA_5'].iloc[i] < data['SMA_10'].iloc[i] and
                      data['SMA_5'].iloc[i-1] >= data['SMA_10'].iloc[i-1]):
                    data.iloc[i, data.columns.get_loc('Signal')] = -1

        # Position tracking - hold until opposite signal
        position = 0
        for i in range(len(data)):
            if data['Signal'].iloc[i] == 1:
                position = 1  # Enter long position
            elif data['Signal'].iloc[i] == -1:
                position = 0  # Exit position
            data.iloc[i, data.columns.get_loc('Position')] = position

        # Calculate returns using Position, not Signal
        data['Daily_Return'] = data['Close'].pct_change()
        data['Strategy_Return'] = data['Daily_Return'] * data['Position'].shift(1)

        # Calculate total return
        strategy_returns = data['Strategy_Return'].dropna()
        cumulative_return = (1 + strategy_returns).prod() - 1

        return {
            'Symbol': symbol,
            'Total_Return': cumulative_return,
            'Daily_Returns': strategy_returns.tolist(),
            'Signals_Generated': (data['Signal'] != 0).sum(),
            'Success': True
        }

    except Exception as e:
        print(f"Error processing {symbol}: {str(e)}")
        return {'Symbol': symbol, 'Success': False}

# Apply SMA strategy to all stocks
print("🔄 Applying SMA Crossover Strategy to all 100 stocks...")
print("This may take a few minutes to download data and process...")

sma_results = []
successful_stocks = []
failed_stocks = []

for i, symbol in enumerate(stocks_df['Symbol']):
    if i % 20 == 0:
        print(f"Processing stock {i+1}/100...")

    result = sma_crossover_strategy(symbol)

    if result and result.get('Success', False):
        sma_results.append(result)
        successful_stocks.append(symbol)
    else:
        failed_stocks.append(symbol)

print(f"\n✅ Successfully processed: {len(successful_stocks)} stocks")
print(f"❌ Failed to process: {len(failed_stocks)} stocks")

if failed_stocks:
    print(f"Failed stocks: {failed_stocks[:10]}{'...' if len(failed_stocks) > 10 else ''}")

# Extract returns for analysis
sma_returns = [r['Total_Return'] for r in sma_results if r['Total_Return'] is not None]
signal_counts = [r['Signals_Generated'] for r in sma_results if r['Signals_Generated'] is not None]

# Calculate Performance Statistics
if sma_returns:
    sma_stats = {
        'Mean Return': np.mean(sma_returns),
        'Median Return': np.median(sma_returns),
        'Standard Deviation': np.std(sma_returns),
        'Min Return': np.min(sma_returns),
        'Max Return': np.max(sma_returns),
        'Number of Stocks': len(sma_returns),
        'Average Signals per Stock': np.mean(signal_counts)
    }

    print("\n" + "="*50)
    print("📊 SMA CROSSOVER STRATEGY PERFORMANCE RESULTS")
    print("="*50)

    for metric, value in sma_stats.items():
        if metric in ['Mean Return', 'Median Return', 'Standard Deviation', 'Min Return', 'Max Return']:
            print(f"{metric:20s}: {value:8.4f} ({value*100:6.2f}%)")
        elif metric == 'Average Signals per Stock':
            print(f"{metric:20s}: {value:8.1f}")
        else:
            print(f"{metric:20s}: {value:8.0f}")

    # Calculate Sharpe Ratio (assuming 2% risk-free rate as mentioned in paper)
    risk_free_rate = 0.02 / 252  # Daily risk-free rate
    excess_return = sma_stats['Mean Return'] - risk_free_rate
    sharpe_ratio = excess_return / sma_stats['Standard Deviation'] if sma_stats['Standard Deviation'] > 0 else 0

    print(f"{'Sharpe Ratio':20s}: {sharpe_ratio:8.4f}")
    print("="*50)

    # Visualization
    fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 12))

    # 1. Histogram of Returns
    ax1.hist(sma_returns, bins=30, alpha=0.7, color='blue', edgecolor='black')
    ax1.axvline(sma_stats['Mean Return'], color='red', linestyle='--', linewidth=2,
                label=f'Mean: {sma_stats["Mean Return"]:.4f}')
    ax1.axvline(sma_stats['Median Return'], color='orange', linestyle='--', linewidth=2,
                label=f'Median: {sma_stats["Median Return"]:.4f}')
    ax1.set_title('Distribution of SMA Crossover Strategy Returns', fontsize=14, fontweight='bold')
    ax1.set_xlabel('Total Return')
    ax1.set_ylabel('Frequency')
    ax1.legend()
    ax1.grid(True, alpha=0.3)

    # 2. Box Plot
    ax2.boxplot(sma_returns, patch_artist=True,
                boxprops=dict(facecolor='lightblue', alpha=0.7))
    ax2.set_title('SMA Strategy Returns - Box Plot', fontsize=14, fontweight='bold')
    ax2.set_ylabel('Total Return')
    ax2.grid(True, alpha=0.3)

    # 3. Q-Q Plot (to check normality)
    from scipy import stats
    stats.probplot(sma_returns, dist="norm", plot=ax3)
    ax3.set_title('Q-Q Plot: Returns vs Normal Distribution', fontsize=14, fontweight='bold')
    ax3.grid(True, alpha=0.3)

    # 4. Signal Generation Distribution
    ax4.hist(signal_counts, bins=20, alpha=0.7, color='green', edgecolor='black')
    ax4.axvline(np.mean(signal_counts), color='red', linestyle='--', linewidth=2,
                label=f'Mean: {np.mean(signal_counts):.1f}')
    ax4.set_title('Distribution of Trading Signals Generated', fontsize=14, fontweight='bold')
    ax4.set_xlabel('Number of Signals per Stock')
    ax4.set_ylabel('Frequency')
    ax4.legend()
    ax4.grid(True, alpha=0.3)

    plt.tight_layout()
    plt.show()

    # Additional Analysis: Top and Bottom Performers
    returns_with_symbols = [(sma_results[i]['Symbol'], sma_results[i]['Total_Return'])
                           for i in range(len(sma_results)) if sma_results[i]['Total_Return'] is not None]
    returns_with_symbols.sort(key=lambda x: x[1], reverse=True)

    print("\n📈 TOP 10 PERFORMING STOCKS:")
    for i, (symbol, return_val) in enumerate(returns_with_symbols[:10]):
        print(f"{i+1:2d}. {symbol:6s}: {return_val:8.4f} ({return_val*100:6.2f}%)")

    print("\n📉 BOTTOM 10 PERFORMING STOCKS:")
    for i, (symbol, return_val) in enumerate(returns_with_symbols[-10:]):
        print(f"{i+1:2d}. {symbol:6s}: {return_val:8.4f} ({return_val*100:6.2f}%)")

    # Summary Statistics Table
    summary_df = pd.DataFrame({
        'Metric': ['Mean Return', 'Median Return', 'Standard Deviation', 'Min Return', 'Max Return', 'Sharpe Ratio'],
        'Value': [sma_stats['Mean Return'], sma_stats['Median Return'], sma_stats['Standard Deviation'],
                 sma_stats['Min Return'], sma_stats['Max Return'], sharpe_ratio],
        'Percentage': [f"{sma_stats['Mean Return']*100:.2f}%", f"{sma_stats['Median Return']*100:.2f}%",
                      f"{sma_stats['Standard Deviation']*100:.2f}%", f"{sma_stats['Min Return']*100:.2f}%",
                      f"{sma_stats['Max Return']*100:.2f}%", f"{sharpe_ratio:.4f}"]
    })

    print("\n📋 SUMMARY STATISTICS TABLE:")
    print(summary_df.to_string(index=False))

else:
    print("❌ No successful results to analyze. Please check your data and implementation.")


# The output grpah of Distribution of SMA Crossover Strategy Returns confirms the typical pattern of returns observed in Momentum trading strategies which is:


*   higher frequency of low or negative returns
*   the losses are relatively smaller in value (minimum -0.2%) whereas the profits go as high as 0.6%

#  Bollinger Bands Strategy - Mean Reversion Trading

 """

    Implements Bollinger Bands strategy on all the 100 stocks:
    - 20-day moving average as center line
    - Upper/Lower bands at 2 standard deviations
    - Buy signal: Price breaks below lower band (oversold)
    - Sell signal: Price breaks above upper band (overbought)

"""

In [None]:


import pandas as pd
import yfinance as yf
import numpy as np
import matplotlib.pyplot as plt

def bollinger_bands_strategy(symbol, start_date='2024-07-01', end_date='2024-12-31'):

    try:
        # Download data and handle MultiIndex columns
        data = yf.download(symbol, start=start_date, end=end_date, progress=False)

        # FIX: Handle yfinance MultiIndex columns
        if isinstance(data.columns, pd.MultiIndex):
            # Flatten MultiIndex columns - this is the key fix!
            data.columns = [col[0] if isinstance(col, tuple) else col for col in data.columns]

        if len(data) < 25:
            return None

        # Manual Bollinger Bands calculation (Cornell paper method)
        window = 20

        # Calculate components manually using only pandas
        data['BB_Middle'] = data['Close'].rolling(window=window).mean()
        rolling_std = data['Close'].rolling(window=window).std()
        data['BB_Upper'] = data['BB_Middle'] + (rolling_std * 2)
        data['BB_Lower'] = data['BB_Middle'] - (rolling_std * 2)

        # Initialize trading signals
        data['Signal'] = 0
        data['Position'] = 0

        # Generate Bollinger Bands signals (Cornell methodology)
        for i in range(window, len(data)):
            current_price = data['Close'].iloc[i]
            bb_upper = data['BB_Upper'].iloc[i]
            bb_lower = data['BB_Lower'].iloc[i]
            bb_middle = data['BB_Middle'].iloc[i]

            if pd.notna(bb_upper) and pd.notna(bb_lower):
                # Buy when price touches lower band (oversold)
                if current_price <= bb_lower:
                    data.iloc[i, data.columns.get_loc('Signal')] = 1

                # Sell when price touches upper band (overbought)
                elif current_price >= bb_upper:
                    data.iloc[i, data.columns.get_loc('Signal')] = -1

                # Exit when price returns toward middle
                elif abs(current_price - bb_middle) < abs(data['Close'].iloc[i-1] - bb_middle):
                    data.iloc[i, data.columns.get_loc('Signal')] = 0

        # Position tracking
        current_position = 0
        for i in range(len(data)):
            if data['Signal'].iloc[i] == 1:
                current_position = 1  # Long position
            elif data['Signal'].iloc[i] == -1:
                current_position = 0  # Exit/Short
            data.iloc[i, data.columns.get_loc('Position')] = current_position

        # Calculate returns
        data['Daily_Return'] = data['Close'].pct_change()
        data['Strategy_Return'] = data['Daily_Return'] * data['Position'].shift(1)

        # Calculate total return
        clean_returns = data['Strategy_Return'].dropna()
        total_return = (1 + clean_returns).prod() - 1

        return {
            'Symbol': symbol,
            'Total_Return': total_return,
            'Daily_Returns': clean_returns.tolist(),
            'Signals_Generated': (data['Signal'] != 0).sum(),
            'Success': True
        }

    except Exception as e:
        return {'Symbol': symbol, 'Success': False, 'Error': str(e)}

# Apply strategy to all stocks
print("🔄 Applying Bollinger Bands Strategy to all 100 stocks...")
print("Using manual calculation (NO ta library dependencies)")

bb_results = []
successful_stocks = []
failed_stocks = []

for i, symbol in enumerate(stocks_df['Symbol']):
    if i % 20 == 0:
        print(f"Processing stock {i+1}/100...")

    result = bollinger_bands_strategy(symbol)

    if result and result.get('Success', False):
        bb_results.append(result)
        successful_stocks.append(symbol)
    else:
        failed_stocks.append(symbol)
        if result and 'Error' in result:
            print(f"Error with {symbol}: {result['Error']}")

print(f"\n✅ Successfully processed: {len(successful_stocks)} stocks")
print(f"❌ Failed to process: {len(failed_stocks)} stocks")

if len(bb_results) > 0:
    # Extract returns
    bb_returns = [r['Total_Return'] for r in bb_results]
    bb_signals = [r['Signals_Generated'] for r in bb_results]

    # Calculate statistics
    bb_stats = {
        'Mean Return': np.mean(bb_returns),
        'Median Return': np.median(bb_returns),
        'Standard Deviation': np.std(bb_returns),
        'Min Return': np.min(bb_returns),
        'Max Return': np.max(bb_returns),
        'Number of Stocks': len(bb_returns),
        'Average Signals': np.mean(bb_signals)
    }

    print("\n" + "="*50)
    print("📊 BOLLINGER BANDS STRATEGY RESULTS")
    print("="*50)

    for metric, value in bb_stats.items():
        if 'Return' in metric and metric != 'Number of Stocks':
            print(f"{metric:20s}: {value:8.4f} ({value*100:6.2f}%)")
        elif metric == 'Average Signals':
            print(f"{metric:20s}: {value:8.1f}")
        else:
            print(f"{metric:20s}: {value}")

    # Calculate Sharpe ratio
    risk_free_rate = 0.02 / 252  # Daily risk-free rate
    excess_return = bb_stats['Mean Return'] - risk_free_rate
    bb_sharpe_ratio = excess_return / bb_stats['Standard Deviation'] if bb_stats['Standard Deviation'] > 0 else 0
    print(f"{'Sharpe Ratio':20s}: {bb_sharpe_ratio:8.4f}")
    print("="*50)

    # Visualizations comparing SMA and Bollinger Bands
    fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 12))

    # 1. SMA Strategy
    ax1.hist(sma_returns, bins=25, alpha=0.6, color='blue', label='SMA Strategy')
    ax1.axvline(np.mean(sma_returns), color='blue', linestyle='--', linewidth=2)
    ax1.set_title('SMA Crossover Strategy Returns')
    ax1.set_xlabel('Return')
    ax1.set_ylabel('Frequency')
    ax1.legend()
    ax1.grid(True, alpha=0.3)

    # 2. Bollinger Bands Strategy
    ax2.hist(bb_returns, bins=25, alpha=0.6, color='orange', label='Bollinger Bands')
    ax2.axvline(bb_stats['Mean Return'], color='orange', linestyle='--', linewidth=2)
    ax2.set_title('Bollinger Bands Strategy Returns')
    ax2.set_xlabel('Return')
    ax2.set_ylabel('Frequency')
    ax2.legend()
    ax2.grid(True, alpha=0.3)

    # 3. Overlay comparison
    ax3.hist(sma_returns, bins=20, alpha=0.6, color='blue', label=f'SMA (μ={np.mean(sma_returns):.3f})')
    ax3.hist(bb_returns, bins=20, alpha=0.6, color='orange', label=f'BB (μ={bb_stats["Mean Return"]:.3f})')
    ax3.set_title('Strategy Comparison: SMA vs Bollinger Bands')
    ax3.set_xlabel('Return')
    ax3.set_ylabel('Frequency')
    ax3.legend()
    ax3.grid(True, alpha=0.3)

    # 4. Box plot comparison
    ax4.boxplot([sma_returns, bb_returns],
               labels=['SMA\nCrossover', 'Bollinger\nBands'],
               patch_artist=True)
    ax4.set_title('Risk-Return Box Plot Comparison')
    ax4.set_ylabel('Return')
    ax4.grid(True, alpha=0.3)

    plt.tight_layout()
    plt.show()

    # Performance comparison table
    comparison_df = pd.DataFrame({
        'Strategy': ['SMA Crossover', 'Bollinger Bands'],
        'Mean Return': [np.mean(sma_returns), bb_stats['Mean Return']],
        'Median Return': [np.median(sma_returns), bb_stats['Median Return']],
        'Std Deviation': [np.std(sma_returns), bb_stats['Standard Deviation']],
        'Sharpe Ratio': [np.mean(sma_returns)/np.std(sma_returns), bb_sharpe_ratio],
        'Avg Signals': [np.mean([r['Signals_Generated'] for r in sma_results]), bb_stats['Average Signals']]
    })

    print("\n📈 STRATEGY PERFORMANCE COMPARISON:")
    print(comparison_df.round(4).to_string(index=False))

    # Top performers
    bb_performance = [(bb_results[i]['Symbol'], bb_results[i]['Total_Return'])
                     for i in range(len(bb_results))]
    bb_performance.sort(key=lambda x: x[1], reverse=True)

    print("\n🏆 TOP 10 BOLLINGER BANDS PERFORMERS:")
    for i, (symbol, ret) in enumerate(bb_performance[:10]):
        print(f"{i+1:2d}. {symbol}: {ret:7.4f} ({ret*100:6.2f}%)")

    print("\n📉 BOTTOM 10 PERFORMERS:")
    for i, (symbol, ret) in enumerate(bb_performance[-10:]):
        print(f"{i+1:2d}. {symbol}: {ret:7.4f} ({ret*100:6.2f}%)")

    # Cornell paper insights
    print("\n" + "="*50)
    print("📋 CORNELL PAPER ALIGNMENT:")
    print("="*50)
    print(f"• Paper's Bollinger Bands: 20-day MA ± 2σ ✓")
    print(f"• Mean reversion signals: Oversold/Overbought ✓")
    print(f"• Manual calculation (no libraries): ✓")
    print(f"• Our BB Average: {bb_stats['Mean Return']*100:.2f}% vs Paper's 1.92%")
    print(f"• Our BB Std Dev: {bb_stats['Standard Deviation']*100:.2f}% vs Paper's 6.48%")
    print("="*50)

else:
    print("❌ No successful results. Check your implementation and data.")


# The comparison of returns by both strategies confirms their typical tendencies:


*    Mean reversion strategies tend to win frequently in small amounts with occasional huge losses (close to -0.4% here) when
 prices break out to the new mean, and they tend to generate better returns on assets that are mean
 reverting.
*   Momentum strategies tend to lose frequently in small amounts with
 occasional huge payouts during price spikes



#  Balanced Hurst Strategy - Combining Both Approaches

"""

    Apply strategy based on Hurst exponent classification:
    - If Hurst > boundary: Use SMA (momentum) strategy
    - If Hurst < boundary: Use Bollinger Bands (mean reversion) strategy
    - here the 'boundary' is Hurst Exponent = 0.5
"""

In [None]:


def balanced_hurst_strategy(hurst_df, sma_results, bb_results, boundary=0.5):

    balanced_returns = []
    strategy_choices = []

    for i, row in hurst_df.iterrows():
        symbol = row['Symbol']
        hurst_value = row['Hurst_Exponent']

        if hurst_value > boundary:
            # Use SMA strategy for trending stocks
            sma_result = next((r for r in sma_results if r['Symbol'] == symbol), None)
            if sma_result:
                balanced_returns.append(sma_result['Total_Return'])
                strategy_choices.append('SMA')
        else:
            # Use Bollinger Bands for mean-reverting stocks
            bb_result = next((r for r in bb_results if r['Symbol'] == symbol), None)
            if bb_result:
                balanced_returns.append(bb_result['Total_Return'])
                strategy_choices.append('BB')

    return balanced_returns, strategy_choices

# Apply balanced strategy
balanced_returns, strategy_choices = balanced_hurst_strategy(hurst_df, sma_results, bb_results)

# Performance Statistics
balanced_stats = {
    'Mean Return': np.mean(balanced_returns),
    'Median Return': np.median(balanced_returns),
    'Standard Deviation': np.std(balanced_returns),
    'Number of Stocks': len(balanced_returns)
}

print("📊 Balanced Hurst Strategy Performance (Boundary = 0.5):")
for metric, value in balanced_stats.items():
    if metric in ['Mean Return', 'Median Return', 'Standard Deviation']:
        print(f"{metric}: {value:.4f} ({value*100:.2f}%)")
    else:
        print(f"{metric}: {value}")

print(f"\nStrategy Usage:")
print(f"SMA (Trending): {strategy_choices.count('SMA')} stocks")
print(f"Bollinger Bands (Mean Reverting): {strategy_choices.count('BB')} stocks")

# Comprehensive comparison visualization
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 12))

# Individual strategy distributions
ax1.hist(sma_returns, bins=20, alpha=0.7, color='blue', label='SMA')
ax1.set_title('SMA Strategy Returns')
ax1.set_xlabel('Return')
ax1.axvline(np.mean(sma_returns), color='red', linestyle='--')

ax2.hist(bb_returns, bins=20, alpha=0.7, color='orange', label='Bollinger Bands')
ax2.set_title('Bollinger Bands Strategy Returns')
ax2.set_xlabel('Return')
ax2.axvline(np.mean(bb_returns), color='red', linestyle='--')

ax3.hist(balanced_returns, bins=20, alpha=0.7, color='green', label='Balanced')
ax3.set_title('Balanced Hurst Strategy Returns')
ax3.set_xlabel('Return')
ax3.axvline(np.mean(balanced_returns), color='red', linestyle='--')

# Comparative box plot
ax4.boxplot([sma_returns, bb_returns, balanced_returns],
           labels=['SMA', 'Bollinger Bands', 'Balanced'])
ax4.set_title('Strategy Comparison - Return Distributions')
ax4.set_ylabel('Return')

plt.tight_layout()
plt.show()

# Summary comparison table
comparison_df = pd.DataFrame({
    'Strategy': ['SMA Crossover', 'Bollinger Bands', 'Balanced Hurst'],
    'Mean Return': [np.mean(sma_returns), np.mean(bb_returns), np.mean(balanced_returns)],
    'Median Return': [np.median(sma_returns), np.median(bb_returns), np.median(balanced_returns)],
    'Std Deviation': [np.std(sma_returns), np.std(bb_returns), np.std(balanced_returns)],
    'Sharpe Ratio': [np.mean(sma_returns)/np.std(sma_returns),
                     np.mean(bb_returns)/np.std(bb_returns),
                     np.mean(balanced_returns)/np.std(balanced_returns)]
})

print("\n📈 Strategy Performance Comparison:")
display(comparison_df.round(4))


# Understanding the Results: Risk vs Return Trade-off

## Key Observations from Our Analysis:

### 1. **The Balanced Strategy Paradox**
Our balanced Hurst strategy likely positioned itself **between** the two pure strategies rather than outperforming both. This mirrors the Cornell paper's findings and reveals a crucial insight:

**The Hurst exponent strategy is not primarily about maximizing returns—it's about choosing your preferred risk-return profile.**

### 2. **Why This Happens:**
- **SMA Strategy**: Frequent small losses with occasional large gains (trending markets)
- **Bollinger Bands**: Frequent small wins with occasional large losses (mean reversion breakdown)
- **Balanced Strategy**: Interpolates between these two characteristics

### 3. **The Risk Factor We Haven't Considered:**
Our analysis focused on returns but **ignored risk adjustment**. The paper showed that:
- Lower Hurst boundaries → Lower risk, Lower returns (more momentum trading)
- Higher Hurst boundaries → Higher risk, Higher returns (more mean reversion trading)

### 4. **The Real Value Proposition:**
Instead of seeking the "best" strategy, we can **dial in our preferred risk tolerance** by adjusting the Hurst boundary threshold!

---

## Next Step: Interactive Risk Preference Selection
In the next cell, you'll be able to input your risk tolerance, and we'll show you the optimal Hurst boundary for your preferences.


# 🎛️ Interactive Risk-Return Optimization

"""

  Analyze how different Hurst boundaries affect risk-return profile
  
"""

In [None]:


import ipywidgets as widgets
from IPython.display import display, clear_output

def analyze_boundary_effect(hurst_df, sma_results, bb_results, boundary_range):

    boundary_analysis = []

    for boundary in boundary_range:
        returns, choices = balanced_hurst_strategy(hurst_df, sma_results, bb_results, boundary)

        if returns:
            boundary_analysis.append({
                'Boundary': boundary,
                'Mean_Return': np.mean(returns),
                'Std_Deviation': np.std(returns),
                'Sharpe_Ratio': np.mean(returns) / np.std(returns) if np.std(returns) > 0 else 0,
                'SMA_Count': choices.count('SMA'),
                'BB_Count': choices.count('BB')
            })

    return pd.DataFrame(boundary_analysis)

# Analyze boundary effects
boundary_range = np.arange(0.1, 0.9, 0.05)
boundary_df = analyze_boundary_effect(hurst_df, sma_results, bb_results, boundary_range)

# Interactive widget for user preference
def interactive_analysis(user_boundary):
    clear_output(wait=True)

    # Calculate results for user's choice
    user_returns, user_choices = balanced_hurst_strategy(hurst_df, sma_results, bb_results, user_boundary)

    user_stats = {
        'Mean Return': np.mean(user_returns),
        'Median Return': np.median(user_returns),
        'Standard Deviation': np.std(user_returns),
        'Sharpe Ratio': np.mean(user_returns) / np.std(user_returns) if np.std(user_returns) > 0 else 0
    }

    print(f"🎯 Your Selected Hurst Boundary: {user_boundary:.2f}")
    print(f"📊 Your Portfolio Performance:")
    for metric, value in user_stats.items():
        if 'Ratio' in metric:
            print(f"{metric}: {value:.3f}")
        else:
            print(f"{metric}: {value:.4f} ({value*100:.2f}%)")

    print(f"\n📈 Strategy Allocation:")
    print(f"SMA (Trending): {user_choices.count('SMA')} stocks")
    print(f"Bollinger Bands (Mean Reverting): {user_choices.count('BB')} stocks")

    # Visualization
    fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 12))

    # Risk-Return frontier
    ax1.plot(boundary_df['Std_Deviation'], boundary_df['Mean_Return'], 'b-', alpha=0.7)
    ax1.scatter(user_stats['Standard Deviation'], user_stats['Mean Return'],
               color='red', s=100, zorder=5, label=f'Your Choice ({user_boundary:.2f})')
    ax1.set_xlabel('Risk (Standard Deviation)')
    ax1.set_ylabel('Expected Return')
    ax1.set_title('Risk-Return Frontier')
    ax1.legend()
    ax1.grid(True, alpha=0.3)

    # Boundary effects
    ax2.plot(boundary_df['Boundary'], boundary_df['Mean_Return'], 'g-', label='Mean Return')
    ax2_twin = ax2.twinx()
    ax2_twin.plot(boundary_df['Boundary'], boundary_df['Std_Deviation'], 'r-', label='Risk')
    ax2.axvline(user_boundary, color='black', linestyle='--', alpha=0.7)
    ax2.set_xlabel('Hurst Boundary')
    ax2.set_ylabel('Mean Return', color='g')
    ax2_twin.set_ylabel('Standard Deviation', color='r')
    ax2.set_title('Boundary Effect on Risk-Return')

    # Strategy allocation
    ax3.plot(boundary_df['Boundary'], boundary_df['SMA_Count'], 'b-', label='SMA Count')
    ax3.plot(boundary_df['Boundary'], boundary_df['BB_Count'], 'o-', label='BB Count')
    ax3.axvline(user_boundary, color='black', linestyle='--', alpha=0.7)
    ax3.set_xlabel('Hurst Boundary')
    ax3.set_ylabel('Number of Stocks')
    ax3.set_title('Strategy Allocation by Boundary')
    ax3.legend()
    ax3.grid(True, alpha=0.3)

    # Your portfolio distribution
    ax4.hist(user_returns, bins=20, alpha=0.7, color='purple', edgecolor='black')
    ax4.axvline(user_stats['Mean Return'], color='red', linestyle='--',
               label=f'Mean: {user_stats["Mean Return"]:.3f}')
    ax4.set_xlabel('Return')
    ax4.set_ylabel('Frequency')
    ax4.set_title('Your Portfolio Return Distribution')
    ax4.legend()
    ax4.grid(True, alpha=0.3)

    plt.tight_layout()
    plt.show()

# Risk preference widget
risk_widget = widgets.FloatSlider(
    value=0.5,
    min=0.1,
    max=0.9,
    step=0.05,
    description='Risk Level:',
    style={'description_width': 'initial'},
    readout_format='.2f'
)

print("🎛️ Adjust your risk preference using the slider below:")
print("• Lower values (0.1-0.4): Conservative (more mean reversion)")
print("• Middle values (0.4-0.6): Balanced")
print("• Higher values (0.6-0.9): Aggressive (more momentum)")

widgets.interact(interactive_analysis, user_boundary=risk_widget)


Run this cell to clear the notebook

In [None]:
import json
from google.colab import files
from pathlib import Path

# Step 1: Upload the broken notebook
uploaded = files.upload()
notebook_filename = next(iter(uploaded))

# Step 2: Clean the notebook's metadata
def clean_metadata(notebook_path):
    path = Path(notebook_path)
    with open(path, "r", encoding="utf-8") as f:
        data = json.load(f)

    if "widgets" in data.get("metadata", {}):
        print("🧹 Removing metadata.widgets...")
        del data["metadata"]["widgets"]

    cleaned_path = path.with_name(path.stem + "_CLEAN.ipynb")
    with open(cleaned_path, "w", encoding="utf-8") as f:
        json.dump(data, f, indent=1)

    print(f"✅ Cleaned notebook saved as: {cleaned_path}")
    return cleaned_path

cleaned_file = clean_metadata(notebook_filename)

# Step 3: Download the cleaned notebook
files.download(str(cleaned_file))
