In [31]:
# --- AGGRESSIVE DEBUGGING FOR PDH/PDL ---

print("--- Step 1: Loading Raw Data ---")
daily_data_path = Path("../data/raw/SPY_1d.csv")
df_daily_debug = pd.read_csv(daily_data_path, header=[0, 1], index_col=0)
df_daily_debug.columns = df_daily_debug.columns.get_level_values(1)
df_daily_debug.columns = [col.lower() for col in df_daily_debug.columns]
df_daily_debug.index = pd.to_datetime(df_daily_debug.index, utc=True)
df_daily_debug.index.name = 'Datetime'

df_15m_debug = pd.read_csv(
    "../data/processed/master_m15_features.csv", 
    index_col='Datetime', 
    parse_dates=True
)

print("Daily DF head:")
print(df_daily_debug.head())
print("\nDaily DF Info:")
df_daily_debug.info()


--- Step 1: Loading Raw Data ---
Daily DF head:
                                 open        high         low       close  \
Datetime                                                                    
2023-10-23 00:00:00+00:00  409.038626  413.756717  407.274229  409.867218   
2023-10-24 00:00:00+00:00  412.002015  414.117359  410.140131  412.957336   
2023-10-25 00:00:00+00:00  411.261174  411.290417  406.513841  407.030487   
2023-10-26 00:00:00+00:00  405.958264  406.816069  401.230446  402.156494   
2023-10-27 00:00:00+00:00  403.755169  404.154843  398.900621  400.333588   

                              volume  
Datetime                              
2023-10-23 00:00:00+00:00   92035100  
2023-10-24 00:00:00+00:00   78564200  
2023-10-25 00:00:00+00:00   94223200  
2023-10-26 00:00:00+00:00  115156800  
2023-10-27 00:00:00+00:00  107367700  

Daily DF Info:
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 501 entries, 2023-10-23 00:00:00+00:00 to 2025-10-21 00:00:00+00:00
Da

In [32]:
print("\n--- Step 2: Creating the Lookup Maps ---")
df_daily_debug['PDH'] = df_daily_debug['high'].shift(1)
df_daily_debug['PDL'] = df_daily_debug['low'].shift(1)

pdh_map_debug = pd.Series(df_daily_debug['PDH'].values, index=df_daily_debug.index.date)
pdl_map_debug = pd.Series(df_daily_debug['PDL'].values, index=df_daily_debug.index.date)

print("\nPDH Map head:")
print(pdh_map_debug.head())

known_date = pd.to_datetime("2025-10-15").date()
if known_date in pdh_map_debug.index:
    print(f"\nValue for {known_date} in PDH map: {pdh_map_debug[known_date]}")
else:
    print(f"\nERROR: Date {known_date} not found in PDH map index!")



--- Step 2: Creating the Lookup Maps ---

PDH Map head:
2023-10-23           NaN
2023-10-24    413.756717
2023-10-25    414.117359
2023-10-26    411.290417
2023-10-27    406.816069
dtype: float64

Value for 2025-10-15 in PDH map: 665.8300170898438


In [33]:
print("\n--- Step 3: Applying the Map ---")
df_final_debug = df_15m_debug.copy()
df_final_debug['date_key'] = df_final_debug.index.date

df_final_debug['test_pdh'] = df_final_debug['date_key'].map(pdh_map_debug)
df_final_debug['test_pdl'] = df_final_debug['date_key'].map(pdl_map_debug)

validation_slice_debug = df_final_debug[df_final_debug.index.date == pd.to_datetime("2025-10-15").date()]

print("\nSlice of the final dataframe for 2025-10-15:")
print(validation_slice_debug[['test_pdh', 'test_pdl']].head())

print("\nNumber of null values in test_pdh:", validation_slice_debug['test_pdh'].isnull().sum())
print("Total rows in slice:", len(validation_slice_debug))



--- Step 3: Applying the Map ---

Slice of the final dataframe for 2025-10-15:
                             test_pdh    test_pdl
Datetime                                         
2025-10-15 13:30:00+00:00  665.830017  653.169983
2025-10-15 13:45:00+00:00  665.830017  653.169983
2025-10-15 14:00:00+00:00  665.830017  653.169983
2025-10-15 14:15:00+00:00  665.830017  653.169983
2025-10-15 14:30:00+00:00  665.830017  653.169983

Number of null values in test_pdh: 0
Total rows in slice: 26
