# Finding the Best Timeframe for Option Pricing

This notebook evaluates different historical timeframes to determine which provides the most accurate option price predictions.


## 1. Setup and Imports


In [1]:
import sys
sys.path.append('/Users/jedi_pitchayut/Documents/programming/history-pricing-model/functions')

import pandas as pd
import numpy as np
from fetch_price import fetch_option_price
from historical_option_price import batch_historical_option_price
import warnings
warnings.filterwarnings('ignore')

print("✓ Imports successful")


✓ Imports successful


## 2. Define Helper Functions


In [2]:
def load_sp500_symbols(csv_path: str, limit: int = None) -> list[str]:
    """Load S&P 500 symbols from CSV file"""
    df = pd.read_csv(csv_path)
    symbols = df['Symbol'].tolist()
    if limit:
        symbols = symbols[:limit]
    return symbols

print("✓ load_sp500_symbols defined")


✓ load_sp500_symbols defined


In [3]:
def calculate_metrics(actual_prices: pd.DataFrame, predicted_prices: pd.DataFrame) -> dict:
    """
    Calculate comparison metrics between actual and predicted option prices
    
    Metrics:
    - MAE (Mean Absolute Error): Average absolute difference
    - RMSE (Root Mean Square Error): Square root of average squared differences
    - MAPE (Mean Absolute Percentage Error): Average percentage error
    - R-squared: Proportion of variance explained
    - Correlation: Pearson correlation coefficient
    """
    # Flatten both dataframes and align them
    actual_flat = actual_prices.values.flatten()
    predicted_flat = predicted_prices.values.flatten()
    
    # Remove NaN values - convert to float first to avoid dtype issues
    actual_flat = actual_flat.astype(float)
    predicted_flat = predicted_flat.astype(float)
    
    mask = ~(np.isnan(actual_flat) | np.isnan(predicted_flat))
    actual_clean = actual_flat[mask]
    predicted_clean = predicted_flat[mask]
    
    if len(actual_clean) == 0:
        return {
            'mae': np.nan,
            'rmse': np.nan,
            'mape': np.nan,
            'r_squared': np.nan,
            'correlation': np.nan,
            'n_samples': 0
        }
    
    # Calculate metrics
    mae = np.mean(np.abs(actual_clean - predicted_clean))
    rmse = np.sqrt(np.mean((actual_clean - predicted_clean) ** 2))
    
    # MAPE - avoid division by zero
    mape = np.mean(np.abs((actual_clean - predicted_clean) / (actual_clean + 1e-10))) * 100
    
    # R-squared
    ss_res = np.sum((actual_clean - predicted_clean) ** 2)
    ss_tot = np.sum((actual_clean - np.mean(actual_clean)) ** 2)
    r_squared = 1 - (ss_res / (ss_tot + 1e-10))
    
    # Correlation
    correlation = np.corrcoef(actual_clean, predicted_clean)[0, 1]
    
    return {
        'mae': mae,
        'rmse': rmse,
        'mape': mape,
        'r_squared': r_squared,
        'correlation': correlation,
        'n_samples': len(actual_clean)
    }

print("✓ calculate_metrics defined")


✓ calculate_metrics defined


In [4]:
def align_option_prices(actual_call_df: pd.DataFrame, actual_put_df: pd.DataFrame,
                       predicted_call_df: pd.DataFrame, predicted_put_df: pd.DataFrame,
                       strike_prices: list, time_to_maturity_list: list) -> tuple:
    """
    Align actual and predicted option prices to have the same strikes and maturities
    """
    # Get common strikes and maturities
    actual_strikes = set(actual_call_df.columns) | set(actual_put_df.columns)
    predicted_strikes = set(predicted_call_df.columns) | set(predicted_put_df.columns)
    common_strikes = sorted(list(actual_strikes & predicted_strikes))
    
    actual_maturities = set(actual_call_df.index) | set(actual_put_df.index)
    predicted_maturities = set(predicted_call_df.index) | set(predicted_put_df.index)
    common_maturities = sorted(list(actual_maturities & predicted_maturities))
    
    if len(common_strikes) == 0 or len(common_maturities) == 0:
        return None, None, None, None
    
    # Reindex to common strikes and maturities
    actual_call_aligned = actual_call_df.reindex(index=common_maturities, columns=common_strikes)
    actual_put_aligned = actual_put_df.reindex(index=common_maturities, columns=common_strikes)
    predicted_call_aligned = predicted_call_df.reindex(index=common_maturities, columns=common_strikes)
    predicted_put_aligned = predicted_put_df.reindex(index=common_maturities, columns=common_strikes)
    
    return actual_call_aligned, actual_put_aligned, predicted_call_aligned, predicted_put_aligned

print("✓ align_option_prices defined")


✓ align_option_prices defined


In [5]:
def evaluate_timeframe_for_symbol(symbol: str, timeframe: str) -> dict:
    """
    Evaluate a specific timeframe for a given symbol
    Returns metrics comparing actual vs predicted option prices
    """
    print(f"\n{'='*80}")
    print(f"Evaluating {symbol} with timeframe {timeframe}")
    print(f"{'='*80}")
    
    try:
        # Fetch actual option prices from market
        print(f"\n[1/3] Fetching actual option prices for {symbol}...")
        actual_call_df, actual_put_df, strike_prices, time_to_maturity_list = fetch_option_price(symbol)
        
        if actual_call_df.empty and actual_put_df.empty:
            print(f"No option data available for {symbol}")
            return None
        
        # Filter to reasonable maturities (up to 365 days) and strikes
        max_maturity = 365
        time_to_maturity_list = [t for t in time_to_maturity_list if 1 <= t <= max_maturity]
        
        if len(time_to_maturity_list) == 0:
            print(f"No valid maturities for {symbol}")
            return None
        
        # Select a reasonable subset of strikes (e.g., around ATM)
        if not actual_call_df.empty:
            current_price = actual_call_df.columns[len(actual_call_df.columns) // 2]
        elif not actual_put_df.empty:
            current_price = actual_put_df.columns[len(actual_put_df.columns) // 2]
        else:
            return None
        
        # Select strikes within 20% of current price
        strike_prices = [s for s in strike_prices if 0.8 * current_price <= s <= 1.2 * current_price]
        strike_prices = sorted(strike_prices)[:20]  # Limit to 20 strikes
        time_to_maturity_list = sorted(time_to_maturity_list)[:10]  # Limit to 10 maturities
        
        if len(strike_prices) == 0:
            print(f"No valid strikes for {symbol}")
            return None
        
        print(f"Using {len(strike_prices)} strikes and {len(time_to_maturity_list)} maturities")
        
        # Calculate predicted option prices using historical model
        print(f"\n[2/3] Calculating predicted option prices with timeframe {timeframe}...")
        predicted_call_df, predicted_put_df = batch_historical_option_price(
            symbol=symbol,
            strike_prices=strike_prices,
            time_to_maturity_days=time_to_maturity_list,
            timeframe=timeframe
        )
        
        # Align actual and predicted prices
        print("\n[3/3] Calculating metrics...")
        actual_call_aligned, actual_put_aligned, predicted_call_aligned, predicted_put_aligned = \
            align_option_prices(actual_call_df, actual_put_df, predicted_call_df, predicted_put_df,
                              strike_prices, time_to_maturity_list)
        
        if actual_call_aligned is None:
            print(f"Could not align option prices for {symbol}")
            return None
        
        # Calculate metrics for calls and puts
        call_metrics = calculate_metrics(actual_call_aligned, predicted_call_aligned)
        put_metrics = calculate_metrics(actual_put_aligned, predicted_put_aligned)
        
        # Combined metrics (average of calls and puts)
        combined_metrics = {
            'mae': np.nanmean([call_metrics['mae'], put_metrics['mae']]),
            'rmse': np.nanmean([call_metrics['rmse'], put_metrics['rmse']]),
            'mape': np.nanmean([call_metrics['mape'], put_metrics['mape']]),
            'r_squared': np.nanmean([call_metrics['r_squared'], put_metrics['r_squared']]),
            'correlation': np.nanmean([call_metrics['correlation'], put_metrics['correlation']]),
            'n_samples': call_metrics['n_samples'] + put_metrics['n_samples']
        }
        
        print(f"\nResults for {symbol} (timeframe: {timeframe}):")
        print(f"  Call Options - MAE: ${call_metrics['mae']:.3f}, RMSE: ${call_metrics['rmse']:.3f}, R²: {call_metrics['r_squared']:.3f}")
        print(f"  Put Options  - MAE: ${put_metrics['mae']:.3f}, RMSE: ${put_metrics['rmse']:.3f}, R²: {put_metrics['r_squared']:.3f}")
        print(f"  Combined     - MAE: ${combined_metrics['mae']:.3f}, RMSE: ${combined_metrics['rmse']:.3f}, R²: {combined_metrics['r_squared']:.3f}")
        
        return {
            'symbol': symbol,
            'timeframe': timeframe,
            'call_metrics': call_metrics,
            'put_metrics': put_metrics,
            'combined_metrics': combined_metrics
        }
        
    except Exception as e:
        print(f"Error evaluating {symbol} with timeframe {timeframe}: {str(e)}")
        import traceback
        traceback.print_exc()
        return None

print("✓ evaluate_timeframe_for_symbol defined")


✓ evaluate_timeframe_for_symbol defined


## 3. Load S&P 500 Symbols


In [6]:
csv_path = '/Users/jedi_pitchayut/Documents/programming/history-pricing-model/data/sp500_companies.csv'

# Start with a small sample for testing (first 3 symbols)
# You can increase this or remove the limit for full analysis
symbols = load_sp500_symbols(csv_path, limit=3)
print(f"Loaded {len(symbols)} symbols: {symbols}")


Loaded 3 symbols: ['AAPL', 'NVDA', 'MSFT']


## 4. Define Timeframes to Test


In [7]:
# Test all available timeframes
timeframes = ['1mo', '3mo', '6mo', '1y', '2y', '5y', '10y', 'ytd', 'max']
print(f"Timeframes to evaluate: {timeframes}")
print(f"\nTotal evaluations: {len(symbols)} symbols × {len(timeframes)} timeframes = {len(symbols) * len(timeframes)} evaluations")


Timeframes to evaluate: ['1mo', '3mo', '6mo', '1y', '2y', '5y', '10y', 'ytd', 'max']

Total evaluations: 3 symbols × 9 timeframes = 27 evaluations


## 5. Test with Single Symbol First

Let's test with just one symbol and one timeframe to make sure everything works.


In [8]:
# Test with AAPL and 1 year timeframe
test_result = evaluate_timeframe_for_symbol('AAPL', '1y')
if test_result:
    print("\n✓ Test successful! Ready to run full analysis.")
else:
    print("\n❌ Test failed. Please check the errors above.")



Evaluating AAPL with timeframe 1y

[1/3] Fetching actual option prices for AAPL...
START FETCHING OPTION PRICES FOR AAPL
Found 20 expiration dates
Fetched 69 call strikes and 57 put strikes for 2025-11-14 (6 days)
Fetched 76 call strikes and 68 put strikes for 2025-11-21 (13 days)
Fetched 44 call strikes and 38 put strikes for 2025-11-28 (20 days)
Fetched 40 call strikes and 34 put strikes for 2025-12-05 (27 days)
Fetched 27 call strikes and 19 put strikes for 2025-12-12 (34 days)
Fetched 78 call strikes and 70 put strikes for 2025-12-19 (41 days)
Fetched 79 call strikes and 70 put strikes for 2026-01-16 (69 days)
Fetched 56 call strikes and 46 put strikes for 2026-02-20 (104 days)
Fetched 48 call strikes and 40 put strikes for 2026-03-20 (132 days)
Fetched 53 call strikes and 43 put strikes for 2026-04-17 (160 days)
Fetched 53 call strikes and 50 put strikes for 2026-05-15 (188 days)
Fetched 74 call strikes and 68 put strikes for 2026-06-18 (222 days)
Fetched 40 call strikes and 36 p

## 6. Run Full Analysis

Now let's evaluate all symbols with all timeframes. This will take some time.


In [9]:
all_results = []
total_combinations = len(symbols) * len(timeframes)
current = 0

for symbol in symbols:
    print(f"\n{'#'*80}")
    print(f"Processing symbol: {symbol} ({symbols.index(symbol) + 1}/{len(symbols)})")
    print(f"{'#'*80}")
    
    for timeframe in timeframes:
        current += 1
        print(f"\nProgress: {current}/{total_combinations} ({current/total_combinations*100:.1f}%)")
        
        result = evaluate_timeframe_for_symbol(symbol, timeframe)
        if result:
            all_results.append(result)

print(f"\n{'='*80}")
print(f"Completed! Got {len(all_results)} valid results out of {total_combinations} evaluations")
print(f"{'='*80}")



################################################################################
Processing symbol: AAPL (1/3)
################################################################################

Progress: 1/27 (3.7%)

Evaluating AAPL with timeframe 1mo

[1/3] Fetching actual option prices for AAPL...
START FETCHING OPTION PRICES FOR AAPL
Found 20 expiration dates
Fetched 69 call strikes and 57 put strikes for 2025-11-14 (6 days)
Fetched 76 call strikes and 68 put strikes for 2025-11-21 (13 days)
Fetched 44 call strikes and 38 put strikes for 2025-11-28 (20 days)
Fetched 40 call strikes and 34 put strikes for 2025-12-05 (27 days)
Fetched 27 call strikes and 19 put strikes for 2025-12-12 (34 days)
Fetched 78 call strikes and 70 put strikes for 2025-12-19 (41 days)
Fetched 79 call strikes and 70 put strikes for 2026-01-16 (69 days)
Fetched 56 call strikes and 46 put strikes for 2026-02-20 (104 days)
Fetched 48 call strikes and 40 put strikes for 2026-03-20 (132 days)
Fetched 53 call strike

Traceback (most recent call last):
  File "/var/folders/fz/9vvx_h395pj79pvm5fhsr2rh0000gn/T/ipykernel_36229/137205805.py", line 48, in evaluate_timeframe_for_symbol
    predicted_call_df, predicted_put_df = batch_historical_option_price(
  File "/Users/jedi_pitchayut/Documents/programming/history-pricing-model/functions/historical_option_price.py", line 76, in batch_historical_option_price
    call_prices.columns.name = "Strike Price"
  File "/Users/jedi_pitchayut/Documents/programming/history-pricing-model/functions/risk_free_rate.py", line 16, in apply_risk_free_rate
    risk_free_rate = fetch_risk_free_rate()
  File "/Users/jedi_pitchayut/Documents/programming/history-pricing-model/functions/risk_free_rate.py", line 7, in fetch_risk_free_rate
    data = ticker.history(period="1d")
  File "/Users/jedi_pitchayut/Documents/programming/history-pricing-model/.venv/lib/python3.10/site-packages/yfinance/utils.py", line 92, in wrapper
    result = func(*args, **kwargs)
  File "/Users/jedi_p

Error evaluating MSFT with timeframe 3mo: Too Many Requests. Rate limited. Try after a while.

Progress: 21/27 (77.8%)

Evaluating MSFT with timeframe 6mo

[1/3] Fetching actual option prices for MSFT...
START FETCHING OPTION PRICES FOR MSFT
Error evaluating MSFT with timeframe 6mo: Too Many Requests. Rate limited. Try after a while.

Progress: 22/27 (81.5%)

Evaluating MSFT with timeframe 1y

[1/3] Fetching actual option prices for MSFT...
START FETCHING OPTION PRICES FOR MSFT


Traceback (most recent call last):
  File "/var/folders/fz/9vvx_h395pj79pvm5fhsr2rh0000gn/T/ipykernel_36229/137205805.py", line 13, in evaluate_timeframe_for_symbol
    actual_call_df, actual_put_df, strike_prices, time_to_maturity_list = fetch_option_price(symbol)
  File "/Users/jedi_pitchayut/Documents/programming/history-pricing-model/functions/fetch_price.py", line 32, in fetch_option_price
    expirations = ticker.options
  File "/Users/jedi_pitchayut/Documents/programming/history-pricing-model/.venv/lib/python3.10/site-packages/yfinance/ticker.py", line 311, in options
    self._download_options()
  File "/Users/jedi_pitchayut/Documents/programming/history-pricing-model/.venv/lib/python3.10/site-packages/yfinance/ticker.py", line 52, in _download_options
    r = self._data.get(url=url).json()
  File "/Users/jedi_pitchayut/Documents/programming/history-pricing-model/.venv/lib/python3.10/site-packages/yfinance/utils.py", line 92, in wrapper
    result = func(*args, **kwargs)
  File

Error evaluating MSFT with timeframe 1y: Too Many Requests. Rate limited. Try after a while.

Progress: 23/27 (85.2%)

Evaluating MSFT with timeframe 2y

[1/3] Fetching actual option prices for MSFT...
START FETCHING OPTION PRICES FOR MSFT
Error evaluating MSFT with timeframe 2y: Too Many Requests. Rate limited. Try after a while.

Progress: 24/27 (88.9%)

Evaluating MSFT with timeframe 5y

[1/3] Fetching actual option prices for MSFT...
START FETCHING OPTION PRICES FOR MSFT


Traceback (most recent call last):
  File "/var/folders/fz/9vvx_h395pj79pvm5fhsr2rh0000gn/T/ipykernel_36229/137205805.py", line 13, in evaluate_timeframe_for_symbol
    actual_call_df, actual_put_df, strike_prices, time_to_maturity_list = fetch_option_price(symbol)
  File "/Users/jedi_pitchayut/Documents/programming/history-pricing-model/functions/fetch_price.py", line 32, in fetch_option_price
    expirations = ticker.options
  File "/Users/jedi_pitchayut/Documents/programming/history-pricing-model/.venv/lib/python3.10/site-packages/yfinance/ticker.py", line 311, in options
    self._download_options()
  File "/Users/jedi_pitchayut/Documents/programming/history-pricing-model/.venv/lib/python3.10/site-packages/yfinance/ticker.py", line 52, in _download_options
    r = self._data.get(url=url).json()
  File "/Users/jedi_pitchayut/Documents/programming/history-pricing-model/.venv/lib/python3.10/site-packages/yfinance/utils.py", line 92, in wrapper
    result = func(*args, **kwargs)
  File

Error evaluating MSFT with timeframe 5y: Too Many Requests. Rate limited. Try after a while.

Progress: 25/27 (92.6%)

Evaluating MSFT with timeframe 10y

[1/3] Fetching actual option prices for MSFT...
START FETCHING OPTION PRICES FOR MSFT
Error evaluating MSFT with timeframe 10y: Too Many Requests. Rate limited. Try after a while.

Progress: 26/27 (96.3%)

Evaluating MSFT with timeframe ytd

[1/3] Fetching actual option prices for MSFT...
START FETCHING OPTION PRICES FOR MSFT


Traceback (most recent call last):
  File "/var/folders/fz/9vvx_h395pj79pvm5fhsr2rh0000gn/T/ipykernel_36229/137205805.py", line 13, in evaluate_timeframe_for_symbol
    actual_call_df, actual_put_df, strike_prices, time_to_maturity_list = fetch_option_price(symbol)
  File "/Users/jedi_pitchayut/Documents/programming/history-pricing-model/functions/fetch_price.py", line 32, in fetch_option_price
    expirations = ticker.options
  File "/Users/jedi_pitchayut/Documents/programming/history-pricing-model/.venv/lib/python3.10/site-packages/yfinance/ticker.py", line 311, in options
    self._download_options()
  File "/Users/jedi_pitchayut/Documents/programming/history-pricing-model/.venv/lib/python3.10/site-packages/yfinance/ticker.py", line 52, in _download_options
    r = self._data.get(url=url).json()
  File "/Users/jedi_pitchayut/Documents/programming/history-pricing-model/.venv/lib/python3.10/site-packages/yfinance/utils.py", line 92, in wrapper
    result = func(*args, **kwargs)
  File

Error evaluating MSFT with timeframe ytd: Too Many Requests. Rate limited. Try after a while.

Progress: 27/27 (100.0%)

Evaluating MSFT with timeframe max

[1/3] Fetching actual option prices for MSFT...
START FETCHING OPTION PRICES FOR MSFT
Error evaluating MSFT with timeframe max: Too Many Requests. Rate limited. Try after a while.

Completed! Got 18 valid results out of 27 evaluations


Traceback (most recent call last):
  File "/var/folders/fz/9vvx_h395pj79pvm5fhsr2rh0000gn/T/ipykernel_36229/137205805.py", line 13, in evaluate_timeframe_for_symbol
    actual_call_df, actual_put_df, strike_prices, time_to_maturity_list = fetch_option_price(symbol)
  File "/Users/jedi_pitchayut/Documents/programming/history-pricing-model/functions/fetch_price.py", line 32, in fetch_option_price
    expirations = ticker.options
  File "/Users/jedi_pitchayut/Documents/programming/history-pricing-model/.venv/lib/python3.10/site-packages/yfinance/ticker.py", line 311, in options
    self._download_options()
  File "/Users/jedi_pitchayut/Documents/programming/history-pricing-model/.venv/lib/python3.10/site-packages/yfinance/ticker.py", line 52, in _download_options
    r = self._data.get(url=url).json()
  File "/Users/jedi_pitchayut/Documents/programming/history-pricing-model/.venv/lib/python3.10/site-packages/yfinance/utils.py", line 92, in wrapper
    result = func(*args, **kwargs)
  File

## 7. Create Summary DataFrames


In [10]:
if len(all_results) > 0:
    # Create summary DataFrame
    summary_data = []
    for result in all_results:
        summary_data.append({
            'Symbol': result['symbol'],
            'Timeframe': result['timeframe'],
            'MAE': result['combined_metrics']['mae'],
            'RMSE': result['combined_metrics']['rmse'],
            'MAPE': result['combined_metrics']['mape'],
            'R²': result['combined_metrics']['r_squared'],
            'Correlation': result['combined_metrics']['correlation'],
            'N_Samples': result['combined_metrics']['n_samples']
        })
    
    summary_df = pd.DataFrame(summary_data)
    print("✓ Summary DataFrame created")
    print(f"Shape: {summary_df.shape}")
else:
    print("❌ No valid results to create summary")
    summary_df = pd.DataFrame()


✓ Summary DataFrame created
Shape: (18, 8)


## 8. View Detailed Results


In [11]:
if not summary_df.empty:
    print("DETAILED RESULTS BY SYMBOL AND TIMEFRAME:")
    print("="*80)
    display(summary_df)
else:
    print("No results to display")


DETAILED RESULTS BY SYMBOL AND TIMEFRAME:


Unnamed: 0,Symbol,Timeframe,MAE,RMSE,MAPE,R²,Correlation,N_Samples
0,AAPL,1mo,41.029831,58.783707,22684.275667,-345.908361,-0.411638,254
1,AAPL,3mo,69.294716,84.578604,34065.440341,-948.834434,-0.606644,254
2,AAPL,6mo,86.123198,98.398798,38337.427842,-1397.775346,-0.662586,254
3,AAPL,1y,109.765212,114.888668,41828.10481,-2043.249192,-0.584061,254
4,AAPL,2y,119.219342,122.438961,43000.920758,-2379.917696,-0.44018,254
5,AAPL,5y,126.199944,128.584129,43812.151534,-2673.074678,-0.206512,254
6,AAPL,10y,125.485066,128.006865,43822.360591,-2647.704498,0.075532,254
7,AAPL,ytd,104.408332,111.313204,41385.530072,-1896.430354,-0.624894,254
8,AAPL,max,126.822886,129.059423,44114.045153,-2742.146083,0.086809,254
9,NVDA,1mo,24.387063,34.349582,766.939684,-85.220711,-0.633507,128


## 9. Calculate Average Metrics by Timeframe


In [12]:
if not summary_df.empty:
    timeframe_summary = summary_df.groupby('Timeframe').agg({
        'MAE': 'mean',
        'RMSE': 'mean',
        'MAPE': 'mean',
        'R²': 'mean',
        'Correlation': 'mean',
        'N_Samples': 'sum'
    }).round(4)
    
    print("AVERAGE METRICS BY TIMEFRAME:")
    print("="*80)
    display(timeframe_summary)
else:
    print("No results to summarize")
    timeframe_summary = pd.DataFrame()


AVERAGE METRICS BY TIMEFRAME:


Unnamed: 0_level_0,MAE,RMSE,MAPE,R²,Correlation,N_Samples
Timeframe,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
10y,101.924,103.9414,22847.3408,-1677.1742,-0.0271,382
1mo,32.7084,46.5666,11725.6077,-215.5645,-0.5226,382
1y,89.5634,93.8444,21778.0725,-1303.811,-0.4242,382
2y,95.0875,97.8394,22366.9289,-1480.8019,-0.2825,382
3mo,55.0835,67.5738,17654.1681,-590.6487,-0.7046,382
5y,101.4722,103.4978,22823.5811,-1678.5948,-0.1664,382
6mo,70.1234,79.2899,19911.145,-878.0557,-0.7756,382
max,105.1675,106.6662,23033.2523,-1773.9396,0.0101,382
ytd,84.0973,89.984,21514.2352,-1192.1073,-0.4457,382


## 10. Find Best Timeframe by Each Metric


In [13]:
if not timeframe_summary.empty:
    print("BEST TIMEFRAMES BY METRIC:")
    print("="*80)
    
    best_mae = timeframe_summary['MAE'].idxmin()
    best_rmse = timeframe_summary['RMSE'].idxmin()
    best_mape = timeframe_summary['MAPE'].idxmin()
    best_r2 = timeframe_summary['R²'].idxmax()
    best_corr = timeframe_summary['Correlation'].idxmax()
    
    print(f"Best by MAE (Mean Absolute Error): {best_mae} (${timeframe_summary.loc[best_mae, 'MAE']:.4f})")
    print(f"Best by RMSE (Root Mean Squared Error): {best_rmse} (${timeframe_summary.loc[best_rmse, 'RMSE']:.4f})")
    print(f"Best by MAPE (Mean Absolute Percentage Error): {best_mape} ({timeframe_summary.loc[best_mape, 'MAPE']:.2f}%)")
    print(f"Best by R² (Coefficient of Determination): {best_r2} ({timeframe_summary.loc[best_r2, 'R²']:.4f})")
    print(f"Best by Correlation: {best_corr} ({timeframe_summary.loc[best_corr, 'Correlation']:.4f})")
else:
    print("No results to analyze")


BEST TIMEFRAMES BY METRIC:
Best by MAE (Mean Absolute Error): 1mo ($32.7084)
Best by RMSE (Root Mean Squared Error): 1mo ($46.5666)
Best by MAPE (Mean Absolute Percentage Error): 1mo (11725.61%)
Best by R² (Coefficient of Determination): 1mo (-215.5645)
Best by Correlation: max (0.0101)


## 11. Calculate Overall Best Timeframe

Using a weighted combination of metrics (lower MAE/RMSE are better, higher R²/Correlation are better)


In [14]:
if not timeframe_summary.empty:
    # Overall recommendation (based on weighted combination of metrics)
    # Lower MAE and RMSE are better, higher R² and Correlation are better
    timeframe_summary['Score'] = (
        -timeframe_summary['MAE'] / timeframe_summary['MAE'].max() +  # Normalize and invert
        -timeframe_summary['RMSE'] / timeframe_summary['RMSE'].max() +  # Normalize and invert
        timeframe_summary['R²'] +  # Already normalized (0-1)
        timeframe_summary['Correlation']  # Already normalized (-1 to 1)
    )
    
    best_overall = timeframe_summary['Score'].idxmax()
    
    print("="*80)
    print(f"OVERALL BEST TIMEFRAME: {best_overall}")
    print(f"Score: {timeframe_summary.loc[best_overall, 'Score']:.4f}")
    print("="*80)
    print("\nTimeframe Rankings by Score:")
    display(timeframe_summary[['MAE', 'RMSE', 'R²', 'Correlation', 'Score']].sort_values('Score', ascending=False))
else:
    print("No results to score")


OVERALL BEST TIMEFRAME: 1mo
Score: -216.8347

Timeframe Rankings by Score:


Unnamed: 0_level_0,MAE,RMSE,R²,Correlation,Score
Timeframe,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1mo,32.7084,46.5666,-215.5645,-0.5226,-216.834676
3mo,55.0835,67.5738,-590.6487,-0.7046,-592.510576
6mo,70.1234,79.2899,-878.0557,-0.7756,-880.241424
ytd,84.0973,89.984,-1192.1073,-0.4457,-1194.196255
1y,89.5634,93.8444,-1303.811,-0.4242,-1305.966621
2y,95.0875,97.8394,-1480.8019,-0.2825,-1482.905801
10y,101.924,103.9414,-1677.1742,-0.0271,-1679.144914
5y,101.4722,103.4978,-1678.5948,-0.1664,-1680.696359
max,105.1675,106.6662,-1773.9396,0.0101,-1775.9295


## 12. Save Results to CSV


In [15]:
if not summary_df.empty:
    output_path = '/Users/jedi_pitchayut/Documents/programming/history-pricing-model/script/timeframe_analysis_results.csv'
    summary_df.to_csv(output_path, index=False)
    print(f"✓ Detailed results saved to: {output_path}")
    
    timeframe_output_path = '/Users/jedi_pitchayut/Documents/programming/history-pricing-model/script/timeframe_summary.csv'
    timeframe_summary.to_csv(timeframe_output_path)
    print(f"✓ Timeframe summary saved to: {timeframe_output_path}")
else:
    print("No results to save")


✓ Detailed results saved to: /Users/jedi_pitchayut/Documents/programming/history-pricing-model/script/timeframe_analysis_results.csv
✓ Timeframe summary saved to: /Users/jedi_pitchayut/Documents/programming/history-pricing-model/script/timeframe_summary.csv
