In [None]:
## GBM_CLEANED

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from math import exp
from scipy.stats import ks_2samp
import warnings
warnings.filterwarnings('ignore')
def generate_single_gbm_path(mu_price, sigma, X0, N, dt, seed=300):
    np.random.seed(seed)
    X = np.zeros(N + 1)
    X[0] = X0
    for t in range(1, N + 1):
        dW = np.random.normal(0,np.sqrt(dt))
        exponent = (mu_price - 0.5 * sigma**2) * dt + sigma * np.sqrt(dt) * dW
        X[t] = X[t-1] * exp(exponent)
    return X
filename = 'Historical_Prices_SBU_Dec_2024.csv'
df = pd.read_csv(filename)
df['Date'] = pd.to_datetime(df['Date'])
fixed_end_day = 1480
prediction_horizons = [100, 50, 30]
num_scenarios = 10000
dt = 1
seed = 300
window_range = range(100, 310,10)
all_horizon_summaries = {}
for horizon in prediction_horizons:
    print(f"\n" + "="*100)
    print(f"FULL DATA REPORT FOR {horizon}-DAY HORIZON")
    print("="*100)
    actual_prices = df['Close'].values[fixed_end_day - 1 : fixed_end_day + horizon]
    prediction_dates = df['Date'].values[fixed_end_day - 1 : fixed_end_day + horizon]
    initial_date = pd.to_datetime(prediction_dates[0])
    print(f"INITIAL DATE (Day 0): {initial_date.strftime('%Y-%m-%d')}")
    grid_data = []
    for w in window_range:
        start_calib = (fixed_end_day - 1) - w
        raw_slice = df['Close'].values[start_calib : fixed_end_day - 1]
        M_clean_list = [raw_slice[0]]
        for i in range(1,len(raw_slice)):
            M_clean_list.append(raw_slice[i])
        calib_slice = np.array(M_clean_list)
        l_ret = np.log(calib_slice[1:] / calib_slice[:-1])
        s_w = np.std(l_ret, ddof=1)
        m_p = np.mean(l_ret) + 0.5 * (s_w**2)
        rmse_list = []
        for i in range(num_scenarios):
            p = generate_single_gbm_path(m_p,s_w,actual_prices[0], horizon, dt, seed=(seed + i))
            rmse_list.append(np.sqrt(np.mean((p[1:] - actual_prices[1:])**2)))                                                                       
    grid_data.append({'Window': w, 'RMSE': np.mean(rmse_list), 'mu':m_p, 'sig': s_w})                                                                    
    best = pd.DataFrame(grid_data).loc[pd.DataFrame(grid_data)['RMSE'].idxmin()]
    opt_w = int(best['Window'])
    all_paths = []
    f_res = {'rmse': [], 'mae': [], 'mape': [], 'ks': []}
    actual_log_returns = np.diff(np.log(actual_prices + 1e-9))
    for i in range(num_scenarios):
        path = generate_single_gbm_path(best['mu'], best['sig'], actual_prices[0], horizon, dt, seed=(seed + i))
        all_paths.append(path)
        err = path[1:] - actual_prices[1:]
        f_res['rmse'].append(np.sqrt(np.mean(err**2)))
        f_res['mae'].append(np.mean(np.abs(err)))
        f_res['mape'].append(np.mean(np.abs(err / (actual_prices[1:] + 1e-9))) * 100)
        _, p_val = ks_2samp(actual_log_returns, np.diff(np.log(path + 1e-9)))
        f_res['ks'].append(p_val)
    all_paths = np.array(all_paths)
    best_path_vals = all_paths[np.argmin(f_res['rmse'])]
    print(f"OPTIMAL WINDOW FOUND: {opt_w} Days")
    print(f"\n[ACTUAL PRICES (Day 0 to {horizon})]:")
    print(np.round(actual_prices, 2).tolist())
    print(f"\n[OPTIMAL PREDICTED PATH (Day 0 to {horizon})]:")
    print(np.round(best_path_vals, 2).tolist())
    plt.figure(figsize=(12, 6))
    p10,p25,p50,p75,p90 = [np.percentile(all_paths, i, axis=0) for i in [10, 25, 50, 75, 90]]
    plt.fill_between(prediction_dates, p10, p90, color = 'olive', alpha = 0.4, label = '90% CI')
    plt.fill_between(prediction_dates, p25, p75, color = 'olive', alpha = 0.9, label = '50% CI')
    plt.plot(prediction_dates, p50, color='navy', ls='--', label='Median Expectation')
    plt.plot(prediction_dates, actual_prices, color='red', lw=2.5, label='Actual Prices')
    plt.xlabel('Date', fontsize=14)
    ax = plt.gca()
    ax.tick_params(axis='x', labelsize=15)
    plt.ylabel('Closing Prices (Ug Shs)', fontsize=14)
    ax = plt.gca()
    ax.tick_params(axis='y', labelsize=15)
    plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%d-%m-%Y'))
    plt.gcf().autofmt_xdate(); plt.legend(loc='upper left'); plt.grid(True, alpha=0.4); plt.show()
    all_horizon_summaries[horizon] = {
        'Opt_Window': opt_w,
        'RMSE': (np.mean(f_res['rmse']), np.percentile(f_res['rmse'], 2.5), np.percentile(f_res['rmse'], 97.5)),
        'MAE': (np.mean(f_res['mae']), np.percentile(f_res['mae'], 2.5), np.percentile(f_res['mae'], 97.5)),
        'MAPE': (np.mean(f_res['mape']), np.percentile(f_res['mape'], 2.5), np.percentile(f_res['mape'], 97.5)),
        'KS_Pass': np.mean(np.array(f_res['ks']) > 0.05) * 100,
        'mu': best['mu'], 'sig': best['sig']
    }
print("\n" + "="*165)
print(f"{'Horizon':<8}|{'Win':<4}|{'mu':<8}|{'sigma':<8}|{'RMSE [95% CI]':<20}|{'MAE [95% CI]':<20}|{'MAPE [95% CI]':<20}|{'KS Pass'}")
print("\n" + "="*165)
for h in prediction_horizons:
    res = all_horizon_summaries[h]
    mu = f"{res['mu']:.4f}"
    sigma = f"{res['sig']:.4f}"
    r_s = f"{res['RMSE'][0]:.2f}[{res['RMSE'][1]:.1f}-{res['RMSE'][2]:.1f}]"
    m_s = f"{res['MAE'][0]:.2f}[{res['MAE'][1]:.1f}-{res['MAE'][2]:.1f}]"
    p_s = f"{res['MAPE'][0]:.2f}[{res['MAPE'][1]:.1f}-{res['MAPE'][2]:.1f}]"
    print(f"{h:<8}|{res['Opt_Window']:<4}|{mu:<18}|{sigma:<18}|||{r_s:<25}|{m_s:<25}|{p_s:25}|{res['KS_Pass']:.1f}%")
print("=" * 165)  