In [3]:
import pandas as pd
import re

In [4]:
series = pd.read_excel('SOMC Data SET Clear (1).xlsx', sheet_name='Option Series')
series['END_DATE'] = pd.to_datetime(series['END_DATE'], format='%Y%m%d')
series = series[['INSTRUMENT_ID', 'NAME', 'END_DATE']]
series

Unnamed: 0,INSTRUMENT_ID,NAME,END_DATE
0,8301,S50K14C1000,2014-05-30
1,8302,S50K14C825,2014-05-30
2,8303,S50K14C850,2014-05-30
3,8304,S50K14C875,2014-05-30
4,8305,S50K14C900,2014-05-30
...,...,...,...
3179,96302,S50N25P675,2025-07-31
3180,96307,S50U25P875,2025-09-30
3181,96304,S50U25C875,2025-09-30
3182,96305,S50N25C875,2025-07-31


In [5]:
settlement = pd.read_excel('EquityIndexOptionsHistTrading-2021-2024.xlsx')
settlement = settlement[['Series', 'Trade Date', 'Settlement']]
settlement

Unnamed: 0,Series,Trade Date,Settlement
0,S50F21C1000,2021-01-04,4.9
1,S50F21C1025,2021-01-04,2.5
2,S50F21C1050,2021-01-04,1.1
3,S50F21C650,2021-01-04,272.6
4,S50F21C675,2021-01-04,247.6
...,...,...,...
90327,S50M25P875,2024-12-30,19.5
90328,S50M25P900,2024-12-30,30.0
90329,S50M25P925,2024-12-30,45.1
90330,S50M25P950,2024-12-30,62.1


In [6]:
merged_df = pd.merge(series, settlement, left_on='NAME', right_on='Series', how='inner')
merged_df = merged_df[['INSTRUMENT_ID', 'NAME', 'END_DATE', 'Trade Date', 'Settlement']]
merged_df['Trade Date'] = pd.to_datetime(merged_df['Trade Date'], format='%Y-5m-%d')
merged_df

Unnamed: 0,INSTRUMENT_ID,NAME,END_DATE,Trade Date,Settlement
0,57501,S50H21C775,2021-03-31,2021-01-04,149.5
1,57501,S50H21C775,2021-03-31,2021-01-05,177.8
2,57501,S50H21C775,2021-03-31,2021-01-06,165.3
3,57501,S50H21C775,2021-03-31,2021-01-07,178.9
4,57501,S50H21C775,2021-03-31,2021-01-08,200.0
...,...,...,...,...,...
101577,95969,S50M25C900,2025-06-30,2024-12-30,33.6
101578,95977,S50M25C1000,2025-06-30,2024-12-27,5.2
101579,95977,S50M25C1000,2025-06-30,2024-12-30,5.1
101580,95979,S50M25C975,2025-06-30,2024-12-27,10.4


In [7]:
def create_settlement_price_movement(df, series_col='NAME', trade_date_col='Trade Date', settlement_col='Settlement'):
    """
    Creates a DataFrame showing the settlement price movement of options for each END_DATE.

    Args:
        df (pd.DataFrame): Input DataFrame containing option data.
        series_col (str, optional): Name of the column containing option series. Defaults to 'Series'.
        trade_date_col (str, optional): Name of the column containing trade dates. Defaults to 'Trade Date'.
        settlement_col (str, optional): Name of the column containing settlement prices. Defaults to 'Settlement'.

    Returns:
        pd.DataFrame: A DataFrame showing settlement prices for each option series, grouped by END_DATE.
    """

    # Convert 'END_DATE' and 'Trade Date' to datetime objects
    df['END_DATE'] = pd.to_datetime(df['END_DATE'], errors='coerce')  # Handle potential conversion errors
    df[trade_date_col] = pd.to_datetime(df[trade_date_col], errors='coerce')

    # Group by 'END_DATE' and create a nested structure
    result_df = df.groupby('END_DATE').apply(
        lambda x: x.pivot_table(index=series_col, columns=trade_date_col, values=settlement_col, aggfunc='first')
    ).reset_index()

    return result_df

In [8]:
result_df = create_settlement_price_movement(merged_df)
result_df

  result_df = df.groupby('END_DATE').apply(


Trade Date,END_DATE,NAME,2021-01-04 00:00:00,2021-01-05 00:00:00,2021-01-06 00:00:00,2021-01-07 00:00:00,2021-01-08 00:00:00,2021-01-11 00:00:00,2021-01-12 00:00:00,2021-01-13 00:00:00,...,2024-12-17 00:00:00,2024-12-18 00:00:00,2024-12-19 00:00:00,2024-12-20 00:00:00,2024-12-23 00:00:00,2024-12-24 00:00:00,2024-12-25 00:00:00,2024-12-26 00:00:00,2024-12-27 00:00:00,2024-12-30 00:00:00
0,2021-01-29,S50F21C1000,4.9,11.6,7.6,10.5,17.4,13.9,12.9,13.6,...,,,,,,,,,,
1,2021-01-29,S50F21C1025,2.5,6.6,4.0,5.7,10.1,7.4,6.6,6.8,...,,,,,,,,,,
2,2021-01-29,S50F21C1050,1.1,3.5,2.0,2.9,5.5,3.6,3.0,3.0,...,,,,,,,,,,
3,2021-01-29,S50F21C1075,,,,,,1.6,1.3,1.2,...,,,,,,,,,,
4,2021-01-29,S50F21C650,272.6,302.4,289.0,304.0,326.2,323.8,324.5,329.8,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1409,2025-06-30,S50M25P875,,,,,,,,,...,,,,,,,,,21.5,19.5
1410,2025-06-30,S50M25P900,,,,,,,,,...,,,,,,,,,30.7,30.0
1411,2025-06-30,S50M25P925,,,,,,,,,...,,,,,,,,,44.5,45.1
1412,2025-06-30,S50M25P950,,,,,,,,,...,,,,,,,,,61.0,62.1


In [9]:
series = pd.read_excel('SOMC Data SET Clear (1).xlsx', sheet_name='Option Series')
series['END_DATE'] = pd.to_datetime(series['END_DATE'], format='%Y%m%d')
series = series[['INSTRUMENT_ID', 'NAME', 'END_DATE']]
series

Unnamed: 0,INSTRUMENT_ID,NAME,END_DATE
0,8301,S50K14C1000,2014-05-30
1,8302,S50K14C825,2014-05-30
2,8303,S50K14C850,2014-05-30
3,8304,S50K14C875,2014-05-30
4,8305,S50K14C900,2014-05-30
...,...,...,...
3179,96302,S50N25P675,2025-07-31
3180,96307,S50U25P875,2025-09-30
3181,96304,S50U25C875,2025-09-30
3182,96305,S50N25C875,2025-07-31


In [12]:
delta = pd.read_excel('SOMC Data SET Clear (1).xlsx', sheet_name='Risk Array')
delta['CALC_DATE'] = pd.to_datetime(delta['CALC_DATE'], format='%Y%m%d')
delta = delta[['INSTRUMENT_ID', 'CALC_DATE', 'COM_DELTA']]
delta

Unnamed: 0,INSTRUMENT_ID,CALC_DATE,COM_DELTA
0,46153,2020-01-02,0.6849
1,46154,2020-01-02,0.5191
2,46155,2020-01-02,0.3530
3,46156,2020-01-02,0.2136
4,46157,2020-01-02,0.1147
...,...,...,...
116729,95963,2024-12-27,-0.2334
116730,95969,2024-12-27,0.5304
116731,95977,2024-12-27,0.1454
116732,95979,2024-12-27,0.2168


In [13]:
merged_df = pd.merge(series, delta, left_on='INSTRUMENT_ID', right_on='INSTRUMENT_ID', how='inner')
merged_df = merged_df[['INSTRUMENT_ID', 'NAME', 'END_DATE', 'CALC_DATE', 'COM_DELTA']]
merged_df

Unnamed: 0,INSTRUMENT_ID,NAME,END_DATE,CALC_DATE,COM_DELTA
0,46156,S50H20C1125,2020-03-31,2020-01-02,0.2136
1,46156,S50H20C1125,2020-03-31,2020-01-03,0.1966
2,46156,S50H20C1125,2020-03-31,2020-01-06,0.1529
3,46156,S50H20C1125,2020-03-31,2020-01-07,0.2073
4,46156,S50H20C1125,2020-03-31,2020-01-08,0.1623
...,...,...,...,...,...
127951,95944,S50M25P1000,2025-06-30,2024-12-27,-0.8397
127952,95963,S50M25P850,2025-06-30,2024-12-27,-0.2334
127953,95969,S50M25C900,2025-06-30,2024-12-27,0.5304
127954,95977,S50M25C1000,2025-06-30,2024-12-27,0.1454


In [None]:
def create_delta_movement(df, series_col='NAME', calc_date_col='CALC_DATE', delta_col='COM_DELTA'):
    """
    Pivots a DataFrame to show changes in a specified delta column over a calculation date,
    for each series, grouped by an end date.

    Args:
        df (pd.DataFrame): The input DataFrame.
                           Expected columns: 'END_DATE', the specified series_col,
                           calc_date_col, and delta_col.
        series_col (str): The name of the column to use as the index of the pivot table
                          (e.g., 'NAME' for instrument name).
        calc_date_col (str): The name of the column to use for the columns of the pivot table
                             (e.g., 'CALC_DATE').
        delta_col (str): The name of the column whose values will populate the pivot table
                         (e.g., 'COM_DELTA').

    Returns:
        pd.DataFrame: A DataFrame pivoted to show delta movements.
                      The index will be 'END_DATE' and the column specified by series_col.
                      Columns will be dates from calc_date_col.
                      Values will be from delta_col.
                      If there are multiple entries for the same series_col and calc_date_col
                      within an END_DATE group, the first one encountered will be used.
    """
    # Ensure DataFrame is not modified in place by creating a copy
    df_copy = df.copy()

    # Convert 'END_DATE' and the calculation date column to datetime objects
    # errors='coerce' will turn unparseable dates into NaT (Not a Time)
    df_copy['END_DATE'] = pd.to_datetime(df_copy['END_DATE'], errors='coerce')
    df_copy[calc_date_col] = pd.to_datetime(df_copy[calc_date_col], errors='coerce')


    # Group by 'END_DATE' and then apply the pivot_table operation
    # The lambda function x refers to each group (DataFrame subset for each 'END_DATE')
    result_df = df_copy.groupby('END_DATE').apply(
        lambda x: x.pivot_table(
            index=series_col,         # Rows of the pivot table
            columns=calc_date_col,    # Columns of the pivot table
            values=delta_col,         # Values to fill the table
            aggfunc='first'           # How to aggregate if multiple values exist for the same index/column pair
                                      # 'first' takes the first encountered value.
                                      # Consider other options like 'mean', 'sum', or a custom function
                                      # if 'first' isn't appropriate for your data.
        )
    )
    result_df = result_df.reset_index()

    return result_df



In [21]:
delta_df = create_delta_movement(merged_df)
delta_df.reset_index(inplace=True)
delta_df

  result_df = df_copy.groupby('END_DATE').apply(


CALC_DATE,index,END_DATE,NAME,2020-01-02 00:00:00,2020-01-03 00:00:00,2020-01-06 00:00:00,2020-01-07 00:00:00,2020-01-08 00:00:00,2020-01-09 00:00:00,2020-01-10 00:00:00,...,2024-12-16 00:00:00,2024-12-17 00:00:00,2024-12-18 00:00:00,2024-12-19 00:00:00,2024-12-20 00:00:00,2024-12-23 00:00:00,2024-12-24 00:00:00,2024-12-25 00:00:00,2024-12-26 00:00:00,2024-12-27 00:00:00
0,0,2020-01-31,S50F20C1000,0.9914,0.9927,0.9730,0.9869,0.9444,0.9736,0.9794,...,,,,,,,,,,
1,1,2020-01-31,S50F20C1025,0.9519,0.9551,0.8767,0.9306,0.8110,0.8933,0.9053,...,,,,,,,,,,
2,2,2020-01-31,S50F20C1050,0.8165,0.8156,0.6529,0.7655,0.5722,0.7113,0.7209,...,,,,,,,,,,
3,3,2020-01-31,S50F20C1075,0.5596,0.5439,0.3607,0.4924,0.3076,0.4537,0.4495,...,,,,,,,,,,
4,4,2020-01-31,S50F20C1100,0.2785,0.2539,0.1377,0.2276,0.1205,0.2190,0.2050,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1803,1803,2025-06-30,S50M25P875,,,,,,,,...,,,,,,,,,,-0.3388
1804,1804,2025-06-30,S50M25P900,,,,,,,,...,,,,,,,,,,-0.4548
1805,1805,2025-06-30,S50M25P925,,,,,,,,...,,,,,,,,,,-0.5710
1806,1806,2025-06-30,S50M25P950,,,,,,,,...,,,,,,,,,,-0.6778


In [24]:
with pd.ExcelWriter('SET_DELTA_Groupby.xlsx', engine='openpyxl') as writer:
    result_df.to_excel(writer, sheet_name='Settlement Price Movement', index=False)
    delta_df.to_excel(writer, sheet_name='Delta Movement', index=False)