In [16]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
def gbm_simulation_with_fixed_columns(file_path, date_column_name, price_column_name, num_scenarios=100):
    #Read File
    try:
        data_df = pd.read_excel(file_path)
    except FileNotFoundError:
        print(f"Error: File '{file_path}' not found.")
        return None
    except Exception as e:
        print(f"Error reading CSV file: {e}")
        return None

    if data_df.empty:
        print("Error: CSV file is empty.")
        return None

    original_row_count = len(data_df)
    print(f"Successfully loaded '{file_path}' with {original_row_count} rows.")

    # Date Column Processing  
    print(f"Using specified date column: '{date_column_name}'")

    # Robust date conversion
    temp_date_col = data_df[date_column_name]
    converted_dates = pd.to_datetime(temp_date_col, errors='coerce', infer_datetime_format=True)
    if converted_dates.isna().sum() > len(temp_date_col.dropna()) * 0.5: # If more than 50% are NaT
            converted_dates_dayfirst = pd.to_datetime(temp_date_col, errors='coerce', dayfirst=True, infer_datetime_format=False)
            if converted_dates_dayfirst.isna().sum() < converted_dates.isna().sum():
                converted_dates = converted_dates_dayfirst
                print(f"Used dayfirst=True for date conversion on column '{date_column_name}'.")
    
    data_df[date_column_name] = converted_dates
    rows_before_dropna = len(data_df)
    data_df.dropna(subset=[date_column_name], inplace=True) # Remove rows where date conversion failed
    rows_after_dropna = len(data_df)
    if rows_before_dropna > rows_after_dropna:
        print(f"Removed {rows_before_dropna - rows_after_dropna} rows due to failed date conversion in '{date_column_name}'.")

    if data_df.empty:
        print(f"Error: No valid dates found in column '{date_column_name}' after conversion.")
        return None
        
    data_df.sort_values(by=date_column_name, ascending=True, inplace=True)
    print(f"Data sorted by date column: '{date_column_name}' in ascending order.")
    print(f"Date range after sorting: {data_df[date_column_name].min()} to {data_df[date_column_name].max()}")

    # --- Price Column Processing ---
    print(f"Using specified price column: '{price_column_name}'")
    price_series_str = data_df[price_column_name].astype(str)
    price_series_cleaned = price_series_str.str.replace(',', '', regex=False) # Remove commas
    data_df['cleaned_price'] = pd.to_numeric(price_series_cleaned, errors='coerce')
    
    rows_before_price_dropna = len(data_df)
    data_df.dropna(subset=['cleaned_price'], inplace=True) # Remove rows where price became NaN
    rows_after_price_dropna = len(data_df)
    if rows_before_price_dropna > rows_after_price_dropna:
        print(f"Removed {rows_before_price_dropna - rows_after_price_dropna} rows due to failed price conversion/cleaning in '{price_column_name}'.")

    if data_df.empty or len(data_df['cleaned_price']) < 2:
        print(f"Error: Not enough valid numeric data in price column '{price_column_name}' after cleaning (need at least 2 data points). Found: {len(data_df['cleaned_price'])}")
        return None

    prices = data_df['cleaned_price']
    
    # --- Log Returns Calculation ---
    log_returns = np.log(prices.iloc[1:].values / prices.iloc[:-1].values)
    log_returns = pd.Series(log_returns).dropna()

    if len(log_returns) < 1:
        print(f"Error: Not enough log returns to calculate mu and sigma (found {len(log_returns)}).")
        return None

    # --- GBM Parameters ---
    mu = log_returns.mean()
    sigma = log_returns.std()

    if pd.isna(mu) or pd.isna(sigma):
        print(f"Error: Calculated mu ({mu}) or sigma ({sigma}) is NaN.")
        return None
    
    if sigma == 0:
        print("Warning: Calculated daily volatility (σ) is 0. Predictions will be deterministic.")

    s_t = prices.iloc[-1] # Last price from sorted and cleaned data
    dt = 1 
    
    # --- Simulation ---
    num_projection_days = 30 # Number of days to project forward

    all_scenario_paths = [] # This will store lists of prices, one list per scenario path

    for scenario_index in range(num_scenarios):
        path_for_this_scenario = []
        current_price_in_path = s_t # Start each scenario's path from the last actual historical price

        for day_step in range(num_projection_days):
            # Generate a new random shock for each day in the path
            z = np.random.normal(0, 1) 
            
            # GBM formula: S_next = S_current * exp( (mu - 0.5*sigma^2)*dt + sigma*sqrt(dt)*Z )
            drift = (mu - 0.5 * sigma**2) * dt
            shock = sigma * np.sqrt(dt) * z
            next_price_in_path = current_price_in_path * np.exp(drift + shock)
            
            path_for_this_scenario.append(next_price_in_path)
            # Update the current price for the next step in this scenario's path
            current_price_in_path = next_price_in_path 
        
        all_scenario_paths.append(path_for_this_scenario)

    # Construct the DataFrame for the results
    # Create column names for each projected day, e.g., 'Day_1_pred_price', 'Day_2_pred_price', ...
    day_columns = [f'Day_{d+1}_pred_price' for d in range(num_projection_days)]
    
    # Create the DataFrame from the collected paths
    results_df = pd.DataFrame(all_scenario_paths, columns=day_columns)
    
    # Add 'Scenario No.' column at the beginning of the DataFrame
    results_df.insert(0, 'Scenario No.', range(1, num_scenarios + 1))

# --- How to use the function ---
file_path_for_simulation = "SET 50 Index Spot.xlsx" # Ensure this file exists

date_col = 'Date'
price_col = 'SET50'

simulation_results_data = gbm_simulation_with_fixed_columns(
    file_path_for_simulation,
    date_column_name=date_col,
    price_column_name=price_col,
    num_scenarios=100 # You can change the number of scenarios here
)

if simulation_results_data is not None:
    print("\n--- Simulated Next Day Spot Prices (First 10 scenarios) ---")
    print(simulation_results_data.head(10).to_string()) 
    
    print(f"\n--- Summary Statistics for {len(simulation_results_data)} Scenarios ---")
    print(simulation_results_data['pred_spot'].describe().to_string())

else:
    print("\nSimulation could not be completed. Please review the error messages above for details.")
print("\nGBM Simulation Script Finished.")
    
    # Save to Excel
# with pd.ExcelWriter('SET50_scenerio.xlsx', engine='openpyxl', mode='w') as writer:
#     simulation_results_data.to_excel(writer, sheet_name='Combined', index=False)  

Successfully loaded 'SET 50 Index Spot.xlsx' with 980 rows.
Using specified date column: 'Date'
Data sorted by date column: 'Date' in ascending order.
Date range after sorting: 2021-01-04 00:00:04 to 2025-01-16 00:00:04
Using specified price column: 'SET50'

Simulation could not be completed. Please review the error messages above for details.

GBM Simulation Script Finished.


  converted_dates = pd.to_datetime(temp_date_col, errors='coerce', infer_datetime_format=True)


IndexError: At least one sheet must be visible