In [1]:
import pandas as pd
import pyarrow.dataset as ds
from pathlib import Path

# =================================================
# USER INPUT (CHANGE ONLY THIS)
# =================================================
PARQUET_ROOT =  "../../Options_Parquet/SENSEX"

TRADE_DATE = "2023-08-28"      # YYYY-MM-DD
EXPIRY_DATE = "2023-09-01"     # YYYY-MM-DD
STRIKE = 65000
OPTION_TYPE = "PE"             # "CE" or "PE"
# =================================================


def check_option_data():
    trade_date = pd.Timestamp(TRADE_DATE).date()
    expiry_date = pd.Timestamp(EXPIRY_DATE).date()

    year = expiry_date.year
    month = f"{expiry_date.month:02d}"

    dataset_path = Path(PARQUET_ROOT) / f"year={year}" / f"month={month}"

    print("\n================ OPTION DATA CHECK ================")
    print(f"Trade Date : {trade_date}")
    print(f"Expiry     : {expiry_date}")
    print(f"Strike     : {STRIKE}")
    print(f"Type       : {OPTION_TYPE}")
    print(f"Path       : {dataset_path}")
    print("===================================================\n")

    if not dataset_path.exists():
        print("❌ Parquet path does NOT exist")
        return

    dataset = ds.dataset(dataset_path, format="parquet")

    # Arrow-level filtering (safe types)
    table = dataset.to_table(
        filter=(
            (ds.field("date") == trade_date) &
            (ds.field("ExpiryDate") == expiry_date) &
            (ds.field("StrikePrice") == STRIKE) &
            (ds.field("Type") == OPTION_TYPE)
        )
    )

    if table.num_rows == 0:
        print("❌ NO DATA FOUND for this contract")
        print("\nPossible reasons:")
        print("- Strike never traded")
        print("- Contract illiquid or missing")
        print("- Data vendor gap")
        print("- Wrong expiry/strike combination")
        return

    df = table.to_pandas()

    # Construct DateTime index
    if isinstance(df["time"].iloc[0], int):
        df["DateTime"] = (
            pd.to_datetime(df["date"].astype(str)) +
            pd.to_timedelta(df["time"], unit="ms")
        )
    else:
        df["DateTime"] = pd.to_datetime(
            df["date"].astype(str) + " " + df["time"].astype(str)
        )

    df.set_index("DateTime", inplace=True)
    df.sort_index(inplace=True)

    print("✅ DATA FOUND\n")
    print(f"Total candles: {len(df)}")
    print(f"Time range  : {df.index.min()} → {df.index.max()}\n")

    print("----- FULL DAY DATA -----")
    print(df[["Open", "High", "Low", "Close", "Volume"]])

    print("\n================ END =================\n")


if __name__ == "__main__":
    check_option_data()



Trade Date : 2023-08-28
Expiry     : 2023-09-01
Strike     : 65000
Type       : PE
Path       : ..\..\Options_Parquet\SENSEX\year=2023\month=09

❌ NO DATA FOUND for this contract

Possible reasons:
- Strike never traded
- Contract illiquid or missing
- Data vendor gap
- Wrong expiry/strike combination
