In [160]:
import pandas as pd
import yfinance as yf
import matplotlib.pyplot as plt
import numpy as np
import random

In [182]:
# LOOKING AT THE DATA
spy = yf.download("SPY", start="1993-01-01", end="2025-01-01", auto_adjust=False, progress=False)
spy = spy.reset_index()
spy.columns = [col[0] if isinstance(col, tuple) else col for col in spy.columns]
print(spy.head())
print(spy.dtypes)


        Date  Adj Close     Close      High       Low      Open   Volume
0 1993-01-29  24.380442  43.93750  43.96875  43.75000  43.96875  1003200
1 1993-02-01  24.553846  44.25000  44.25000  43.96875  43.96875   480500
2 1993-02-02  24.605869  44.34375  44.37500  44.12500  44.21875   201300
3 1993-02-03  24.865978  44.81250  44.84375  44.37500  44.40625   529400
4 1993-02-04  24.970003  45.00000  45.09375  44.46875  44.96875   531500
Date         datetime64[ns]
Adj Close           float64
Close               float64
High                float64
Low                 float64
Open                float64
Volume                int64
dtype: object


In [183]:
# RANDOM 100 SIMULATION ON INVESTMENT ENTRY FOR 5 YEARS (ONLY DATA FROM 2006- BECAUSE OF SSO)
spy = yf.download("SPY", start="1993-01-01", end="2024-12-31", auto_adjust=False, progress=False) #EARLIEST DATA AVAILABLE
sso = yf.download("SSO", start="1993-01-01", end="2024-12-31", auto_adjust=False, progress=False)

# Reset index so Date becomes a column
spy = spy.reset_index()
sso = sso.reset_index()

# Common dates
common_dates = pd.Series(np.intersect1d(spy['Date'].dt.date, sso['Date'].dt.date))
common_dates = common_dates.sort_values().reset_index(drop=True)

# Function to compute returns for a 5-year period
def compute_return(start_date, end_date):
    open_spy = spy.loc[spy['Date'].dt.date == start_date, 'Open'].values[0]
    close_spy = spy.loc[spy['Date'].dt.date == end_date, 'Close'].values[0]
    spy_return = close_spy / open_spy

    open_sso = sso.loc[sso['Date'].dt.date == start_date, 'Open'].values[0]
    close_sso = sso.loc[sso['Date'].dt.date == end_date, 'Close'].values[0]
    sso_return = close_sso / open_sso

    return start_date, end_date, spy_return, sso_return

# Simulate 100 random 5-year periods
results = []
for _ in range(100):
    while True:
        idx = random.randint(0, len(common_dates) - 1 - 1260)  # ~252 trading days/year * 5 years
        start_date = common_dates[idx]
        end_date = common_dates[idx + 1260]

        try:
            result = compute_return(start_date, end_date)
            results.append(result)
            break
        except IndexError:
            continue

# Print results
df = pd.DataFrame(results, columns=["Start Date", "End Date", "SPY Return", "SSO Return"])
print(df)


    Start Date    End Date            SPY Return            SSO Return
0   2018-02-21  2023-02-23  [1.4735565011253569]  [1.7156592012751473]
1   2018-02-07  2023-02-09  [1.5161638597314362]   [1.816857864505676]
2   2015-06-24  2020-06-25  [1.4516814865456429]   [1.819727356784486]
3   2016-08-23  2021-08-25  [2.0474800623129275]   [3.608611845019472]
4   2017-02-02  2022-02-03  [1.9620420687277005]   [3.283893860816635]
..         ...         ...                   ...                   ...
95  2018-06-29  2023-07-05  [1.6284360312882564]  [2.0752122702748013]
96  2011-04-26  2016-04-28   [1.547556823763324]  [2.3872034895187353]
97  2009-01-09  2014-01-13  [1.9930890208698582]  [3.6756856876034356]
98  2019-08-20  2024-08-22  [1.9063645736267325]  [2.7508488858061555]
99  2007-08-30  2012-08-29  [0.9729116361356248]  [0.6777508411184987]

[100 rows x 4 columns]


In [184]:
#PRINT TOTAL RETURNS FOR ALL 100 INVESTMENTS
df['spy_return'] = df['SPY Return'] * 1000
df['sso_return'] = df['SSO Return'] * 1000
print(sum(df['spy_return']))
print(sum(df['sso_return']))


[161470.0173543]
[243343.18297197]


In [185]:
#SYNTHETIC_SSO TO GET DATA FROM 1993 W 100 SIM, 5 YEAR
############################################################################################################
# Download data
spy = yf.download("SPY", start="1993-01-01", end="2024-12-31", auto_adjust=False, progress=False)
spy = spy.reset_index()

# Make sure your synthetic_sso DataFrame exists and has columns: ['Date', 'Synthetic_SSO']
# Convert 'Date' columns to datetime
spy['Date'] = pd.to_datetime(spy['Date'])
synthetic_sso['Date'] = pd.to_datetime(synthetic_sso['Date'])

# Same trading days
common_dates = pd.Series(np.intersect1d(spy['Date'].dt.date, synthetic_sso['Date'].dt.date))
common_dates = common_dates.sort_values().reset_index(drop=True)

# Function to compute returns for a 5-year period
def compute_return(start_date, end_date):
    # SPY return: Open to Close
    open_spy = spy.loc[spy['Date'].dt.date == start_date, 'Open'].values[0]
    close_spy = spy.loc[spy['Date'].dt.date == end_date, 'Close'].values[0]
    spy_return = close_spy / open_spy

    # Synthetic SSO return: value at start vs value at end
    start_sso = synthetic_sso.loc[synthetic_sso['Date'].dt.date == start_date, 'Synthetic_SSO'].values[0]
    end_sso = synthetic_sso.loc[synthetic_sso['Date'].dt.date == end_date, 'Synthetic_SSO'].values[0]
    sso_return = end_sso / start_sso

    return start_date, end_date, spy_return, sso_return

# Simulate 100 random 5-year periods
results = []
for _ in range(100):
    while True:
        idx = random.randint(0, len(common_dates) - 1 - 1260)
        start_date = common_dates[idx]
        end_date = common_dates[idx + 1260]

        try:
            result = compute_return(start_date, end_date)
            results.append(result)
            break
        except IndexError:
            continue

# DF of results w print
df = pd.DataFrame(results, columns=["Start Date", "End Date", "SPY Return", "Synthetic SSO Return"])
print(df.head())


   Start Date    End Date            SPY Return  Synthetic SSO Return
0  2001-08-09  2006-08-16  [1.0926706005291342]              1.216825
1  2008-07-21  2013-07-23    [1.33696938213243]              1.609712
2  2001-10-11  2006-10-12  [1.2508490371416723]              1.587185
3  1997-09-02  2002-09-06  [0.9924190213645762]              0.817073
4  2009-12-10  2014-12-12   [1.814724525092576]              3.558358


In [186]:
#PRINT TOTAL RETURNS FOR ALL 100 INVESTMENTS
df['spy_return'] = df['SPY Return'] * 1000
df['sso_return'] = df['Synthetic SSO Return'] * 1000
print(sum(df['spy_return']))
print(sum(df['sso_return']))


[154475.67021055]
272393.9099809268


In [187]:
#SYNTHETIC_SSO TO GET DATA FROM 1993 INVESTMENT EVERY 2 MONTHS HOLD FOR 5 YEARS
############################################################################################################
# Download SPY data
spy = yf.download("SPY", start="1993-01-01", end="2024-03-31", auto_adjust=False, progress=False)
spy = spy.reset_index()

# Convert date columns to datetime
spy['Date'] = pd.to_datetime(spy['Date'])
synthetic_sso['Date'] = pd.to_datetime(synthetic_sso['Date'])

# Same Trading Days
common_dates = pd.Series(np.intersect1d(spy['Date'].dt.date, synthetic_sso['Date'].dt.date))
common_dates = common_dates.sort_values().reset_index(drop=True)

# Function to compute returns for a 5-year period
def compute_return(start_date, end_date):
    # SPY return: Open to Close
    open_spy = spy.loc[spy['Date'].dt.date == start_date, 'Open'].values[0]
    close_spy = spy.loc[spy['Date'].dt.date == end_date, 'Close'].values[0]
    spy_return = close_spy / open_spy

    # Synthetic SSO return: synthetic value change
    start_sso = synthetic_sso.loc[synthetic_sso['Date'].dt.date == start_date, 'Synthetic_SSO'].values[0]
    end_sso = synthetic_sso.loc[synthetic_sso['Date'].dt.date == end_date, 'Synthetic_SSO'].values[0]
    sso_return = end_sso / start_sso

    return start_date, end_date, spy_return, sso_return

# Simulation using a new start every ~2 months (roughly every 40 trading days)
step = 40  # about 2 months of trading days
window = 1260  # ~5 years
results = []

for idx in range(0, len(common_dates) - window, step):
    start_date = common_dates[idx]
    end_date = common_dates[idx + window]
    
    try:
        result = compute_return(start_date, end_date)
        results.append(result)
    except IndexError:
        continue

# DF + results display
df = pd.DataFrame(results, columns=["Start Date", "End Date", "SPY Return", "Synthetic SSO Return"])
df['spy_return'] = df['SPY Return'] * 1000
df['sso_return'] = df['Synthetic SSO Return'] * 1000
print(df.head())
print(f"Total SPY final value across all simulations: ${sum(df['spy_return'])}")
print(f"Total SSO final value across all simulations: ${sum(df['sso_return'])}")


   Start Date    End Date            SPY Return  Synthetic SSO Return  \
0  1993-02-01  1998-01-27  [2.2025586353944564]              5.455590   
1  1993-03-30  1998-03-25  [2.4394463667820068]              6.735035   
2  1993-05-26  1998-05-21   [2.473356401384083]              6.804820   
3  1993-07-23  1998-07-20  [2.6587245970567626]              7.885394   
4  1993-09-20  1998-09-15   [2.268392370572207]              5.696712   

             spy_return   sso_return  
0  [2202.5586353944564]  5455.589654  
1   [2439.446366782007]  6735.035496  
2  [2473.3564013840833]  6804.819529  
3  [2658.7245970567624]  7885.393933  
4  [2268.3923705722073]  5696.711621  
Total SPY final value across all simulations: $[252739.33801824]
Total SSO final value across all simulations: $452779.0175602362


In [188]:
#SYNTHETIC_SSO TO GET DATA FROM 1993 RECURRENT INVESTMENT EVERY WEEK FOR 5 YEARS
############################################################################################################
# 1. Download SPY data
spy = yf.download("SPY", start="1993-01-01", end="2025-01-31", auto_adjust=False, progress=False)
spy.columns = spy.columns.get_level_values(0)  # flatten in case of multiindex
spy = spy.reset_index()
spy['Date'] = pd.to_datetime(spy['Date'])

# 2. Ensure synthetic_sso exists and is clean
synthetic_sso['Date'] = pd.to_datetime(synthetic_sso['Date'])
synthetic_sso = synthetic_sso[['Date', 'Synthetic_SSO']].dropna()

# 3. Merge SPY and SSO on Date
merged = pd.merge(spy[['Date', 'Close']], synthetic_sso, on='Date', how='inner')
merged = merged.sort_values('Date').reset_index(drop=True)
print(merged.head())

# 4. Find January start dates at least 5 years before end of data
# Get the first trading day in January for each year
jan_starts = (
    merged[merged['Date'].dt.month == 1]
    .groupby(merged['Date'].dt.year)['Date']
    .min()
    .reset_index(drop=True)
)
jan_starts = jan_starts[jan_starts <= merged['Date'].max() - pd.DateOffset(years=5)]
# 5. Simulate monthly $100 investments over 5 years
results = []
for start_date in jan_starts:
    end_date = start_date + pd.DateOffset(years=5)
        # Get trading dates only
    all_trading_dates = merged['Date']
    
    # Build investment dates: first trading day of each month
    monthly_dates = (
        all_trading_dates[(all_trading_dates >= start_date) & (all_trading_dates <= end_date)]
        .groupby(all_trading_dates.dt.to_period("M"))
        .first()
        .values
    )


    invest_subset = merged[merged['Date'].isin(monthly_dates)].copy()
    if len(invest_subset) < 55:
        continue

    spy_units = (100 / invest_subset['Close']).sum()
    sso_units = (100 / invest_subset['Synthetic_SSO']).sum()

    final_date = invest_subset['Date'].iloc[-1]
    final_row = merged[merged['Date'] == final_date]
    if final_row.empty:
        continue

    spy_final = spy_units * final_row['Close'].values[0]
    sso_final = sso_units * final_row['Synthetic_SSO'].values[0]

    results.append({
        'Start Date': start_date,
        'End Date': final_date,
        'SPY Final Value': round(spy_final, 2),
        'SSO Final Value': round(sso_final, 2)
    })
# 6. DF + Display Results
df = pd.DataFrame(results)
print(df.head())
print(f"Total SPY final value across all simulations: ${sum(df['SPY Final Value'])}")
print(f"Total SSO final value across all simulations: ${sum(df['SSO Final Value'])}")

        Date     Close  Synthetic_SSO
0 1993-02-01  44.25000     101.422411
1 1993-02-02  44.34375     101.852216
2 1993-02-03  44.81250     104.005466
3 1993-02-04  45.00000     104.875696
4 1993-02-05  44.96875     104.730088
  Start Date   End Date  SPY Final Value  SSO Final Value
0 1994-01-03 1998-12-01         10804.45         21543.35
1 1995-01-03 2000-01-03         10997.13         21717.43
2 1996-01-02 2001-01-02          7912.99         10519.01
3 1997-01-02 2002-01-02          6188.93          6113.90
4 1998-01-02 2003-01-02          4718.33          3495.19
Total SPY final value across all simulations: $196618.5
Total SSO final value across all simulations: $268828.18


In [189]:
#DIVIDEND + SYNTHETIC_SSO TO GET DATA FROM 1993 RECURRENT INVESTMENT EVERY WEEK FOR 5 YEARS 
############################################################################################################
# 1. Download SPY data
spy = yf.download("SPY", start="1993-01-01", end="2025-01-31", auto_adjust=False, progress=False)
spy.columns = spy.columns.get_level_values(0)  # flatten in case of multiindex
spy = spy.reset_index()
spy['Date'] = pd.to_datetime(spy['Date'])

# 2. Ensure synthetic_sso exists and is clean
synthetic_sso['Date'] = pd.to_datetime(synthetic_sso['Date'])
synthetic_sso = synthetic_sso[['Date', 'Synthetic_SSO']].dropna()

# 3. Merge SPY and SSO on Date
merged = pd.merge(spy[['Date', 'Close', 'Adj Close']], synthetic_sso, on='Date', how='inner')
merged = merged.sort_values('Date').reset_index(drop=True)
print(merged.head())

# 4. Find January start dates at least 5 years before end of data
# Get the first trading day in January for each year
jan_starts = (
    merged[merged['Date'].dt.month == 1]
    .groupby(merged['Date'].dt.year)['Date']
    .min()
    .reset_index(drop=True)
)
jan_starts = jan_starts[jan_starts <= merged['Date'].max() - pd.DateOffset(years=5)]

# 5. Simulate monthly $100 investments over 5 years
results = []
for start_date in jan_starts:
    end_date = start_date + pd.DateOffset(years=5)
        # Get trading dates only
    all_trading_dates = merged['Date']
    
    # Build investment dates: first trading day of each month
    monthly_dates = (
        all_trading_dates[(all_trading_dates >= start_date) & (all_trading_dates <= end_date)]
        .groupby(all_trading_dates.dt.to_period("M"))
        .first()
        .values
    )


    invest_subset = merged[merged['Date'].isin(monthly_dates)].copy()
    if len(invest_subset) < 55:
        continue

    spy_units = (100 / invest_subset['Close']).sum()
    spy_adj_units = (100 / invest_subset['Adj Close']).sum()
    sso_units = (100 / invest_subset['Synthetic_SSO']).sum()

    final_date = invest_subset['Date'].iloc[-1]
    final_row = merged[merged['Date'] == final_date]
    if final_row.empty:
        continue

    spy_final = spy_units * final_row['Close'].values[0]
    spy__adj_final = spy_adj_units * final_row['Adj Close'].values[0]
    sso_final = sso_units * final_row['Synthetic_SSO'].values[0]

    results.append({
        'Start Date': start_date,
        'End Date': final_date,
        'SPY Final Value': round(spy_final, 2),
        'SPY ADJ Final Value': round(spy__adj_final, 2),
        'SSO Final Value': round(sso_final, 2)
    })
# 6. DF + Display Results
df = pd.DataFrame(results)
print(df.head())
print(f"Total SPY (adjusted) final value across all simulations: ${sum(df['SPY ADJ Final Value'])}")
print(f"Total SSO final value across all simulations: ${sum(df['SSO Final Value'])}")

        Date     Close  Adj Close  Synthetic_SSO
0 1993-02-01  44.25000  24.553841     101.422411
1 1993-02-02  44.34375  24.605858     101.852216
2 1993-02-03  44.81250  24.865973     104.005466
3 1993-02-04  45.00000  24.970013     104.875696
4 1993-02-05  44.96875  24.952682     104.730088
  Start Date   End Date  SPY Final Value  SPY ADJ Final Value  SSO Final Value
0 1994-01-03 1998-12-01         10804.45             11358.98         21543.35
1 1995-01-03 2000-01-03         10997.13             11497.02         21717.43
2 1996-01-02 2001-01-02          7912.99              8213.60         10519.01
3 1997-01-02 2002-01-02          6188.93              6399.67          6113.90
4 1998-01-02 2003-01-02          4718.33              4878.69          3495.19
Total SPY (adjusted) final value across all simulations: $206553.53
Total SSO final value across all simulations: $268828.18
