In [16]:
import pandas as pd
import pandas_market_calendars as mcal
import re
import glob
import os

folder_path = '/Users/fulinq/Documents/SET/MarginModel/SOMC_History'
os.chdir(folder_path)

In [8]:
file_pattern = '*_merged_SOMC_data.xlsx'
date_column_name = 'Effective Date' # Your specific date column
output_filename = 'combined_merged_SOMC_data.xlsx' # <<< ADJUSTED for Excel output

# Sheet name to read from each Excel file
# IMPORTANT: This should be a string for a single sheet by name
sheet_to_read = 'SOMC Data' # <<< CORRECTED from {'Margin Data'}

# --- Processing Steps ---
list_of_dataframes = [] # Initialize here, before the try block

# Construct the full file search pattern
full_pattern = os.path.join(folder_path, file_pattern)

# Find all files matching the pattern
excel_files = glob.glob(full_pattern)

if not excel_files:
    print(f"Warning: No files found matching the pattern '{file_pattern}'")
else:
    # Read each Excel file and append its DataFrame to the list
    for file in excel_files:
        if os.path.basename(file).startswith('~'): # Skip temporary files
            continue
        
        try:
            df_single = pd.read_excel(file, sheet_name=sheet_to_read)
            list_of_dataframes.append(df_single)
            print(f"  Successfully read: {os.path.basename(file)}")
        except ValueError as e:
            # Handle error if the sheet name is not found in a file
            print(f"  Warning: Could not read sheet '{sheet_to_read}' from file '{os.path.basename(file)}'. Skipping. Error: {e}")

# Check if any data was actually loaded before proceeding
if not list_of_dataframes:
    print("\nError: No data was loaded. Creating an empty DataFrame.")
    combined_df = pd.DataFrame()
else:
    # Combine all DataFrames into one
    print("\nCombining all DataFrames...")
    combined_df = pd.concat(list_of_dataframes, ignore_index=True)
    print(f"Combined DataFrame has {len(combined_df)} rows.")

    if date_column_name in combined_df.columns:
        combined_df[date_column_name] = pd.to_datetime(combined_df[date_column_name], errors='coerce')
        combined_df.dropna(subset=[date_column_name], inplace=True)
        combined_df.sort_values(by=date_column_name, inplace=True)
    else:
        print(f"Error: Date column '{date_column_name}' not found in the combined data.")

combined_df.tail(5)

  Successfully read: 2019_merged_SOMC_data.xlsx
  Successfully read: 2023_merged_SOMC_data.xlsx
  Successfully read: 2021_merged_SOMC_data.xlsx
  Successfully read: 2025_merged_SOMC_data.xlsx
  Successfully read: 2022_merged_SOMC_data.xlsx
  Successfully read: 2020_merged_SOMC_data.xlsx
  Successfully read: 2024_merged_SOMC_data.xlsx

Combining all DataFrames...
Combined DataFrame has 189 rows.


Unnamed: 0,Product,Market,Type,SOMC,Effective Date
94,SET50 Options,Index Market,Option,100,2025-04-22
91,SET50 Options,Index Market,Option,100,2025-04-28
84,SET50 Options,Index Market,Option,100,2025-04-30
90,SET50 Options,Index Market,Option,100,2025-05-08
88,SET50 Options,Index Market,Option,120,2025-05-16


In [12]:
df_p = combined_df.copy()
df_p.head()

Unnamed: 0,Product,Market,Type,SOMC,Effective Date
12,SET50,Index Market,Option,120,2019-04-11
14,SET50,Index Market,Option,120,2019-04-25
13,SET50,Index Market,Option,120,2019-04-26
1,SET50,Index Market,Option,120,2019-05-03
10,SET50,Index Market,Option,340,2019-05-17


In [13]:
df_p = df_p[['Product', 'Effective Date', 'SOMC']]
df_p

Unnamed: 0,Product,Effective Date,SOMC
12,SET50,2019-04-11,120
14,SET50,2019-04-25,120
13,SET50,2019-04-26,120
1,SET50,2019-05-03,120
10,SET50,2019-05-17,340
...,...,...,...
94,SET50 Options,2025-04-22,100
91,SET50 Options,2025-04-28,100
84,SET50 Options,2025-04-30,100
90,SET50 Options,2025-05-08,100


In [28]:
df_p['Effective Date'] = pd.to_datetime(df_p['Effective Date'], errors='coerce')
df_p['SOMC'] = pd.to_numeric(df_p['SOMC'], errors='coerce')
df_p.head()

Unnamed: 0,Product,Effective Date,SOMC
12,SET50,2019-04-11,120
14,SET50,2019-04-25,120
13,SET50,2019-04-26,120
1,SET50,2019-05-03,120
10,SET50,2019-05-17,340


In [None]:
bkk = mcal.get_calendar('XBKK')
date_mcal = bkk.schedule(start_date=df_p['Effective Date'].min(), end_date=df_p['Effective Date'].max())
date_mcal = mcal.date_range(date_mcal, frequency='1D')
date_mcal

DatetimeIndex(['2019-04-11 09:30:00+00:00', '2019-04-12 09:30:00+00:00',
               '2019-04-17 09:30:00+00:00', '2019-04-18 09:30:00+00:00',
               '2019-04-19 09:30:00+00:00', '2019-04-22 09:30:00+00:00',
               '2019-04-23 09:30:00+00:00', '2019-04-24 09:30:00+00:00',
               '2019-04-25 09:30:00+00:00', '2019-04-26 09:30:00+00:00',
               ...
               '2025-04-30 09:30:00+00:00', '2025-05-02 09:30:00+00:00',
               '2025-05-06 09:30:00+00:00', '2025-05-07 09:30:00+00:00',
               '2025-05-08 09:30:00+00:00', '2025-05-09 09:30:00+00:00',
               '2025-05-13 09:30:00+00:00', '2025-05-14 09:30:00+00:00',
               '2025-05-15 09:30:00+00:00', '2025-05-16 09:30:00+00:00'],
              dtype='datetime64[ns, UTC]', length=1475, freq=None)

In [41]:
df = pd.DataFrame(date_mcal, columns=['Date'])
df['Date'] = df['Date'].dt.tz_localize(None).dt.normalize()
df = df.merge(df_p, left_on='Date', right_on='Effective Date', how='left')
df.fillna(method='ffill', inplace=True)
df = df[['Date', 'SOMC']]
df = df.set_index('Date')
df

  df.fillna(method='ffill', inplace=True)


Unnamed: 0_level_0,SOMC
Date,Unnamed: 1_level_1
2019-04-11,120.0
2019-04-12,120.0
2019-04-17,120.0
2019-04-18,120.0
2019-04-19,120.0
...,...
2025-05-09,100.0
2025-05-13,100.0
2025-05-14,100.0
2025-05-15,100.0


In [44]:
with pd.ExcelWriter(output_filename) as writer:
    df.to_excel(writer, sheet_name='SOMC Data', index=True)