# Forward-fill and interpolate the price columns


In [1]:
import pandas as pd
import numpy as np

# Load the data
df_gdea = pd.read_parquet("../../../02_Data_Processed/01_Carbon_Markets/01_Regional/GDEA_processed.parquet")
df_hbea = pd.read_parquet("../../../02_Data_Processed/01_Carbon_Markets/01_Regional/HBEA_processed.parquet")

## Forward fill price columns

In [2]:
# Forward fill

# Forward fill price columns for both datasets
df_gdea['close'] = df_gdea['close'].ffill()
df_gdea['vwap'] = df_gdea['vwap'].ffill()

df_hbea['close'] = df_hbea['close'].ffill()
df_hbea['vwap'] = df_hbea['vwap'].ffill()

# Save the forward-filled data
df_gdea.to_parquet("../../../02_Data_Processed/01_Carbon_Markets/01_Regional/GDEA_forward_filled.parquet")
df_hbea.to_parquet("../../../02_Data_Processed/01_Carbon_Markets/01_Regional/HBEA_forward_filled.parquet")

## Interpolate price columns

In [3]:
# Load the data
df_gdea = pd.read_parquet("../../../02_Data_Processed/01_Carbon_Markets/01_Regional/GDEA_processed.parquet")
df_hbea = pd.read_parquet("../../../02_Data_Processed/01_Carbon_Markets/01_Regional/HBEA_processed.parquet")

In [None]:
# Save the interpolated data
df_gdea.to_parquet("../../../02_Data_Processed/01_Carbon_Markets/01_Regional/GDEA_interpolated.parquet")
df_hbea.to_parquet("../../../02_Data_Processed/01_Carbon_Markets/01_Regional/HBEA_interpolated.parquet")

## Add gap features and create trading-only versions

In [None]:
# Reload forward-filled data for gap analysis
df_gdea = pd.read_parquet("../../../02_Data_Processed/01_Carbon_Markets/01_Regional/GDEA_forward_filled.parquet")
df_hbea = pd.read_parquet("../../../02_Data_Processed/01_Carbon_Markets/01_Regional/HBEA_forward_filled.parquet")

def add_gap_features(df):
    """
    Add features to track gaps between trading days.
    This captures weekend/holiday information before dropping non-trading days.
    """
    df = df.copy()
    df['gap_days'] = 0
    
    # Get trading dates only
    trading_dates = df[df['is_open'] == True].index
    
    # Calculate gaps between consecutive trading days
    for i in range(1, len(trading_dates)):
        current_date = trading_dates[i]
        prev_date = trading_dates[i-1]
        gap = (current_date - prev_date).days
        df.loc[current_date, 'gap_days'] = gap
    
    # Add binary indicators for common patterns
    df['post_weekend'] = df['gap_days'] == 3  # Friday to Monday
    df['post_holiday'] = df['gap_days'] > 3   # Longer gaps indicate holidays
    
    return df

# Apply gap features
print("Adding gap features...")
df_gdea = add_gap_features(df_gdea)
df_hbea = add_gap_features(df_hbea)

# Verify gap features
print(f"GDEA gap_days distribution:\n{df_gdea['gap_days'].value_counts().head()}")
print(f"\nHBEA gap_days distribution:\n{df_hbea['gap_days'].value_counts().head()}")
print(f"\nWeekend patterns detected:")
print(f"GDEA post_weekend days: {df_gdea['post_weekend'].sum()}")
print(f"HBEA post_weekend days: {df_hbea['post_weekend'].sum()}")

## Create and save trading-only versions

In [None]:
# Create trading-only versions (drop non-trading days)
df_gdea_trading = df_gdea[df_gdea['is_open'] == True].copy()
df_hbea_trading = df_hbea[df_hbea['is_open'] == True].copy()

print(f"Original GDEA shape: {df_gdea.shape}")
print(f"Trading-only GDEA shape: {df_gdea_trading.shape}")
print(f"Removed {df_gdea.shape[0] - df_gdea_trading.shape[0]} non-trading days ({(df_gdea.shape[0] - df_gdea_trading.shape[0])/df_gdea.shape[0]*100:.1f}%)")

print(f"\nOriginal HBEA shape: {df_hbea.shape}")
print(f"Trading-only HBEA shape: {df_hbea_trading.shape}")
print(f"Removed {df_hbea.shape[0] - df_hbea_trading.shape[0]} non-trading days ({(df_hbea.shape[0] - df_hbea_trading.shape[0])/df_hbea.shape[0]*100:.1f}%)")

# Save both versions - keep originals for now, add trading-only versions
df_gdea.to_parquet("../../../02_Data_Processed/01_Carbon_Markets/01_Regional/GDEA_forward_filled.parquet")
df_gdea_trading.to_parquet("../../../02_Data_Processed/01_Carbon_Markets/01_Regional/GDEA_trading_only.parquet")

df_hbea.to_parquet("../../../02_Data_Processed/01_Carbon_Markets/01_Regional/HBEA_forward_filled.parquet")
df_hbea_trading.to_parquet("../../../02_Data_Processed/01_Carbon_Markets/01_Regional/HBEA_trading_only.parquet")

print("\nSaved both versions:")
print("  - Original (with gap features): *_forward_filled.parquet")
print("  - Trading-only: *_trading_only.parquet")