In [26]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [35]:
sp500_data = pd.read_csv('SP500.csv')
sp500_data['DATE'] = pd.to_datetime(sp500_data['DATE'])
sp500_data['SP500'] = pd.to_numeric(sp500_data['SP500'], errors='coerce')
sp500_data = sp500_data.dropna(subset=['SP500'])
sp500_data['Daily_Return'] = sp500_data['SP500'].pct_change()
sp500_data = sp500_data.dropna()

num_simulations = 500000  
forecast_days = (pd.Timestamp("2024-12-31") - sp500_data['DATE'].max()).days  
last_sp500_value = sp500_data['SP500'].iloc[-1]  

mean_daily_return = sp500_data['Daily_Return'].mean()
std_dev_daily_return = sp500_data['Daily_Return'].std()

np.random.seed(42)  
simulations = np.zeros(num_simulations)

for i in range(num_simulations):
    simulated_daily_returns = np.random.normal(mean_daily_return, std_dev_daily_return, forecast_days)
    simulations[i] = last_sp500_value * np.exp(np.sum(simulated_daily_returns))

forecast_buckets = [
    (0, 4500),
    (4500, 4775),
    (4775, 5050),
    (5050, 5325),
    (5325, 5600),
    (5600, 5875),
    (5875, 6150),
    (6150, 6425),
    (6425, 6700),
    (6700, np.inf)
]

bucket_counts = {f"{low} - {high}": np.sum((simulations >= low) & (simulations < high)) for low, high in forecast_buckets}
bucket_percentages = {k: (v / num_simulations) * 100 for k, v in bucket_counts.items()}

print("Monte Carlo Simulation Results for S&P 500 Forecast:")
for bucket, percentage in bucket_percentages.items():
    print(f"{percentage}%")

#plt.hist(simulations, bins=50, color='skyblue', edgecolor='black')
#plt.title("Distribution of Simulated S&P 500 Closing Values on 31-Dec-2024")
#plt.xlabel("S&P 500 Closing Value")
#plt.ylabel("Frequency")
#plt.show()

Monte Carlo Simulation Results for S&P 500 Forecast:
0.09%
0.5452%
2.3334%
6.579599999999999%
12.8312%
18.215400000000002%
19.794%
16.7058%
11.3672%
11.5382%


In [36]:
monte_carlo_percentages = [0,1,2,6,13,18,20,17,11,12]  
crowd_forecast_percentages = [0,0,1,6,12,29,34,15,2,1]
weight_monte_carlo = 0.75
weight_crowd = 0.25
weighted_percentages = [
    weight_monte_carlo * monte_carlo + weight_crowd * crowd
    for monte_carlo, crowd in zip(monte_carlo_percentages, crowd_forecast_percentages)
]
weighted_percentages = [round(value) for value in weighted_percentages]
forecast_buckets = [
    "0 - 4500", "4500 - 4775", "4775 - 5050", "5050 - 5325",
    "5325 - 5600", "5600 - 5875", "5875 - 6150", "6150 - 6425",
    "6425 - 6700", "6700 or more"
]

results_df = pd.DataFrame({
    "Bucket": forecast_buckets,
    "Monte Carlo %": monte_carlo_percentages,
    "Crowd Forecast %": crowd_forecast_percentages,
    "Weighted %": weighted_percentages
})
print("Combined Weighted Forecast:")
print(results_df)

Combined Weighted Forecast:
         Bucket  Monte Carlo %  Crowd Forecast %  Weighted %
0      0 - 4500              0                 0           0
1   4500 - 4775              1                 0           1
2   4775 - 5050              2                 1           2
3   5050 - 5325              6                 6           6
4   5325 - 5600             13                12          13
5   5600 - 5875             18                29          21
6   5875 - 6150             20                34          24
7   6150 - 6425             17                15          16
8   6425 - 6700             11                 2           9
9  6700 or more             12                 1           9
