In [None]:
import boto3
import pandas as pd
from io import BytesIO
    
def download_futures_s3(year: int, month: int, day: int, symbol: str, expiry: int, BUCKET = 'live-market-data'):
    s3_key = (
        f"year={year}/month={month:02d}/day={day:02d}/"
        f"Futures/{symbol}/{symbol}{list_option_folder(year,month,day,symbol)[expiry].split(f"{symbol}/",1)[1].strip("/")}FUT.parquet"
    )

    print(f"Fetching from s3://{BUCKET}/{s3_key}")

    s3 = boto3.client("s3")
    obj = s3.get_object(Bucket=BUCKET, Key=s3_key)

    # Load parquet
    data = obj["Body"].read()
    df = pd.read_parquet(BytesIO(data))

    print(f"Downloaded {len(df):,} rows.")
    return df

def download_options_s3(year: int, month: int, day: int, symbol: str, expiry: str, option_type: str, strike: int, BUCKET = live-market-data):
    s3_key = (
        f"year={year}/month={month:02d}/day={day:02d}/"
        f"Options/{symbol}/{expiry}/{option_type}/{strike:05d}/"
        f"{symbol}{expiry}{strike}{option_type}.parquet"
    )
    
    print(f"Fetching from s3://{BUCKET}/{s3_key}")

    s3 = boto3.client("s3")
    obj = s3.get_object(Bucket=BUCKET, Key=s3_key)
    data = obj["Body"].read()
    df = pd.read_parquet(BytesIO(data))
    print(f"Downloaded {len(df):,} rows.")
    return df

import datetime

month_map = {
    "JAN": 1, "FEB": 2, "MAR": 3, "APR": 4, "MAY": 5, "JUN": 6,
    "JUL": 7, "AUG": 8, "SEP": 9, "OCT": 10, "NOV": 11, "DEC": 12
}



def parse_expiry_from_folder(folder_path: str, ref_date: datetime.date) -> datetime.date:
    token = folder_path.rstrip("/").split("/")[-1]  # e.g. 25NOV, 25N04, 25D02
    year = 2000 + int(token[:2])
    code = token[2:]

    if len(code) == 3 and code in month_map:
        month = month_map[code]
        if month == ref_date.month:
            return datetime.date(year, month, 28)
        return datetime.date(year, month, 28)

    if code.startswith("N") and code[1:].isdigit():
        day = int(code[1:])
        month = ref_date.month
        return datetime.date(year, month, day)

    if code.startswith("D") and code[1:].isdigit():
        day = int(code[1:])
        month = ref_date.month + 1
        if month > 12:
            month -= 12
            year += 1
        return datetime.date(year, month, day)

    return datetime.date(2100, 1, 1)



def sort_expiry_folders(folders, ref_date: datetime.date):
    return sorted(folders, key=lambda f: parse_expiry_from_folder(f, ref_date))


def list_option_folder(year: int, month: int, day: int, symbol: str, BUCKET = "live-market-data"):
    prefix = f"year={year}/month={month:02d}/day={day:02d}/Options/{symbol}/"
    s3 = boto3.client("s3")

    paginator = s3.get_paginator("list_objects_v2")
    pages = paginator.paginate(Bucket=BUCKET, Prefix=prefix, Delimiter="/")

    items = []

    for page in pages:
        for cp in page.get("CommonPrefixes", []):
            items.append(cp["Prefix"])
        for obj in page.get("Contents", []):
            items.append(obj["Key"])

    return items



In [72]:
DAY = 20
MONTH = 11
YEAR = 2025
SYMBOL = "NIFTY"
BUCKET = "live-market-data"  

In [84]:
items = list_option_folder(YEAR,MONTH,DAY,SYMBOL)
items

['year=2025/month=11/day=20/Options/NIFTY/25D02/',
 'year=2025/month=11/day=20/Options/NIFTY/25D09/',
 'year=2025/month=11/day=20/Options/NIFTY/25D16/',
 'year=2025/month=11/day=20/Options/NIFTY/25D23/',
 'year=2025/month=11/day=20/Options/NIFTY/25DEC/',
 'year=2025/month=11/day=20/Options/NIFTY/25NOV/']

In [83]:
list_option_folder(YEAR,MONTH,DAY,SYMBOL)[-1].split(f"{SYMBOL}/",1)[1].strip("/")

'25NOV'

In [None]:
df_fut = download_futures_s3(
        year=YEAR,
        month= MONTH,
        day=DAY,
        symbol= SYMBOL,
        expiry = 0
    )
df_fut = df_fut.rename(columns={
    "Trading_Symbol": "Ticker",
    "BestBid": "BuyPrice",
    "BidSize": "BuyQty",
    "BestAsk": "SellPrice",
    "AskSize": "SellQty"
})
df_fut.head()



Fetching from s3://live-market-data/year=2025/month=11/day=20/Futures/NIFTY/NIFTY25NOVFUT.parquet
Downloaded 34,706 rows.


Unnamed: 0,Date,Time,Ticker,Instrument_Token,LTP,LTQ,Volume,Open_Interest,BuyPrice,SellPrice,BuyQty,SellQty
0,20/11/2025,08:46:53.482,NIFTY25NOVFUT,9485826,26071.0,75,0,16795500,0.0,0.0,0,0
1,20/11/2025,09:10:01.668,NIFTY25NOVFUT,9485826,26071.0,75,0,16795500,0.0,0.0,0,0
2,20/11/2025,09:15:01.183,NIFTY25NOVFUT,9485826,26134.5,75,3675,16795500,26130.0,26134.3,150,375
3,20/11/2025,09:15:01.457,NIFTY25NOVFUT,9485826,26134.5,75,3675,16795500,26130.0,26134.3,150,375
4,20/11/2025,09:15:01.905,NIFTY25NOVFUT,9485826,26130.9,150,3675,16795500,26130.0,26134.3,150,375


In [None]:
list_option_folder(YEAR,MONTH,DAY,SYMBOL)[-1].split(f"{SYMBOL}/",1)[1].strip("/")

'25NOV'

In [88]:
items = list_option_folder(YEAR,MONTH,DAY,SYMBOL)
sort_expiry_folders(items, ref_date=(20, 11, 2025))

AttributeError: 'tuple' object has no attribute 'month'

In [None]:



df_opt = download_options_s3(
    year=YEAR,
    month=MONTH,
    day=DAY,
    symbol=SYMBOL,
    expiry="25NOV",
    option_type="CE",
    strike=26300
)
df_opt = df_opt.rename(columns={
    "Trading_Symbol": "Ticker",
    "BestBid": "BuyPrice",
    "BidSize": "BuyQty",
    "BestAsk": "SellPrice",
    "AskSize": "SellQty"
})

ref_date = datetime.date(YEAR, MONTH, DAY)
items = list_option_folder(YEAR,MONTH,DAY,SYMBOL)
sorted_folders = sort_expiry_folders(items, ref_date)

Fetching from s3://live-market-data/year=2025/month=11/day=20/Futures/NIFTY/NIFTY25NOVFUT.parquet
Downloaded 34,706 rows.
Fetching from s3://live-market-data/year=2025/month=11/day=20/Options/NIFTY/25NOV/CE/26300/NIFTY25NOV26300CE.parquet
Downloaded 44,280 rows.


In [85]:
#All expiries avaliable that month

result = sorted_folders[0].split(f"{SYMBOL}/",1)[1].strip("/")
print(result)

25NOV


In [89]:
import datetime
from io import BytesIO

import boto3
import pandas as pd

BUCKET = "live-market-data"  # change if needed

month_map = {
    "JAN": 1, "FEB": 2, "MAR": 3, "APR": 4, "MAY": 5, "JUN": 6,
    "JUL": 7, "AUG": 8, "SEP": 9, "OCT": 10, "NOV": 11, "DEC": 12
}

def list_nifty_expiry_prefixes(year: int, month: int, day: int, symbol: str = "NIFTY"):
    s3 = boto3.client("s3")
    base_prefix = f"year={year}/month={month:02d}/day={day:02d}/Options/{symbol}/"
    resp = s3.list_objects_v2(
        Bucket=BUCKET,
        Prefix=base_prefix,
        Delimiter="/"
    )
    prefixes = [cp["Prefix"] for cp in resp.get("CommonPrefixes", [])]
    return prefixes

def parse_expiry_token(token: str, ref_date: datetime.date) -> datetime.date:
    year = 2000 + int(token[:2])
    code = token[2:]

    if code.startswith("N") and code[1:].isdigit():
        day = int(code[1:])
        return datetime.date(year, ref_date.month, day)

    if code.startswith("D") and code[1:].isdigit():
        day = int(code[1:])
        month = ref_date.month + 1
        if month > 12:
            month -= 12
            year += 1
        return datetime.date(year, month, day)

    if code in month_map:
        month = month_map[code]
        if month == ref_date.month:
            return datetime.date(year, month, 28)
        else:
            return datetime.date(year, month, 28)

    return datetime.date(2100, 1, 1)

def sort_expiry_prefixes(prefixes, ref_date: datetime.date):
    def key_func(p):
        token = p.rstrip("/").split("/")[-1]
        return parse_expiry_token(token, ref_date)
    return sorted(prefixes, key=key_func)

def download_futures_s3(year: int, month: int, day: int, symbol: str = "NIFTY"):
    s3 = boto3.client("s3")
    month_code_map = {
        1: "JAN", 2: "FEB", 3: "MAR", 4: "APR", 5: "MAY", 6: "JUN",
        7: "JUL", 8: "AUG", 9: "SEP", 10: "OCT", 11: "NOV", 12: "DEC"
    }
    month_code = month_code_map[month]
    yy = year % 100
    fut_name = f"{symbol}{yy:02d}{month_code}FUT.parquet"
    s3_key = (
        f"year={year}/month={month:02d}/day={day:02d}/"
        f"Futures/{symbol}/{fut_name}"
    )
    print(f"Downloading futures from s3://{BUCKET}/{s3_key}")
    obj = s3.get_object(Bucket=BUCKET, Key=s3_key)
    data = obj["Body"].read()
    df = pd.read_parquet(BytesIO(data))
    return df

def download_option_strike_s3(
    year: int,
    month: int,
    day: int,
    symbol: str,
    expiry_token: str,
    strike: int
):
    s3 = boto3.client("s3")
    base_prefix = f"year={year}/month={month:02d}/day={day:02d}/Options/{symbol}/{expiry_token}/"

    ce_key = (
        f"{base_prefix}CE/{strike}/"
        f"{symbol}{expiry_token}{strike}CE.parquet"
    )
    pe_key = (
        f"{base_prefix}PE/{strike}/"
        f"{symbol}{expiry_token}{strike}PE.parquet"
    )

    print(f"Downloading CE from s3://{BUCKET}/{ce_key}")
    ce_obj = s3.get_object(Bucket=BUCKET, Key=ce_key)
    ce_df = pd.read_parquet(BytesIO(ce_obj["Body"].read()))

    print(f"Downloading PE from s3://{BUCKET}/{pe_key}")
    pe_obj = s3.get_object(Bucket=BUCKET, Key=pe_key)
    pe_df = pd.read_parquet(BytesIO(pe_obj["Body"].read()))

    return ce_df, pe_df

def pipeline_for_day(date_str: str, symbol: str = "NIFTY", strike: int = 26000):
    day, month, year = map(int, date_str.split("/"))
    ref_date = datetime.date(year, month, day)

    expiry_prefixes = list_nifty_expiry_prefixes(year, month, day, symbol)
    if not expiry_prefixes:
        raise ValueError("No expiry folders found for this day")

    sorted_prefixes = sort_expiry_prefixes(expiry_prefixes, ref_date)
    nearest_prefix = sorted_prefixes[0]
    expiry_token = nearest_prefix.rstrip("/").split("/")[-1]

    print("All expiries (sorted nearest → farthest):")
    for p in sorted_prefixes:
        print("  ", p)

    print(f"\nNearest expiry token: {expiry_token}")

    fut_df = download_futures_s3(year, month, day, symbol)
    print(f"Futures rows: {len(fut_df):,}")

    ce_df, pe_df = download_option_strike_s3(
        year=year,
        month=month,
        day=day,
        symbol=symbol,
        expiry_token=expiry_token,
        strike=strike
    )
    print(f"CE rows: {len(ce_df):,}, PE rows: {len(pe_df):,}")

    return {
        "expiry_prefixes_sorted": sorted_prefixes,
        "nearest_expiry_token": expiry_token,
        "futures": fut_df,
        "ce": ce_df,
        "pe": pe_df,
    }

result = pipeline_for_day("20/11/2025", symbol="NIFTY", strike=26000)


All expiries (sorted nearest → farthest):
   year=2025/month=11/day=20/Options/NIFTY/25NOV/
   year=2025/month=11/day=20/Options/NIFTY/25D02/
   year=2025/month=11/day=20/Options/NIFTY/25D09/
   year=2025/month=11/day=20/Options/NIFTY/25D16/
   year=2025/month=11/day=20/Options/NIFTY/25D23/
   year=2025/month=11/day=20/Options/NIFTY/25DEC/

Nearest expiry token: 25NOV
Downloading futures from s3://live-market-data/year=2025/month=11/day=20/Futures/NIFTY/NIFTY25NOVFUT.parquet
Futures rows: 34,706
Downloading CE from s3://live-market-data/year=2025/month=11/day=20/Options/NIFTY/25NOV/CE/26000/NIFTY25NOV26000CE.parquet
Downloading PE from s3://live-market-data/year=2025/month=11/day=20/Options/NIFTY/25NOV/PE/26000/NIFTY25NOV26000PE.parquet
CE rows: 46,392, PE rows: 46,643
