In [11]:
# Step 17 — Segment Trees + Intraday Range Queries (min/max/VWAP)
# Hardened for: (n,1) shapes, timezone mismatches, scalar casting, and Windows file paths.

import numpy as np
import pandas as pd
import yfinance as yf
from pathlib import Path

# ---------------- Segment Tree ----------------
class SegmentTree:
    """
    Segment tree for associative ops (min, max, sum).
    - op: callable(a, b) -> scalar
    - ide: identity element (e.g., +inf for min, -inf for max, 0 for sum)
    """
    def __init__(self, data, op, ide):
        arr = np.asarray(data, dtype=float).reshape(-1)  # ensure 1-D float
        self.n = len(arr)
        if self.n == 0:
            raise ValueError("SegmentTree received empty data.")
        self.op = op
        self.ide = float(ide)
        # next power of two
        self.size = 1
        while self.size < self.n:
            self.size <<= 1
        self.seg = np.full(2 * self.size, self.ide, dtype=float)
        # build leaves
        self.seg[self.size:self.size+self.n] = arr
        # build internal nodes
        for i in range(self.size - 1, 0, -1):
            self.seg[i] = op(self.seg[2*i], self.seg[2*i+1])

    def update(self, idx, value):
        if not (0 <= idx < self.n):
            raise IndexError("SegmentTree.update index out of range.")
        i = idx + self.size
        self.seg[i] = float(value)
        i //= 2
        while i >= 1:
            self.seg[i] = self.op(self.seg[2*i], self.seg[2*i+1])
            i //= 2

    def query(self, l, r):
        """Half-open [l, r). Returns ide for empty."""
        if l < 0: l = 0
        if r > self.n: r = self.n
        if l >= r:
            return float(self.ide)
        resL, resR = self.ide, self.ide
        l += self.size; r += self.size
        while l < r:
            if l & 1:
                resL = self.op(resL, self.seg[l]); l += 1
            if r & 1:
                r -= 1; resR = self.op(self.seg[r], resR)
            l //= 2; r //= 2
        return self.op(resL, resR)

# ---------------- Data pull (1m with fallbacks) ----------------
def load_intraday(ticker="AAPL"):
    """
    Try 1-minute (~7d); fallback to 5m (30d), then 15m (60d).
    Returns df (OHLCV), period_used, interval_used with index in America/New_York.
    """
    for period, interval in [("7d","1m"), ("30d","5m"), ("60d","15m")]:
        df = yf.download(ticker, period=period, interval=interval, auto_adjust=False, progress=False)
        if not df.empty:
            df = df.loc[:, ["Open","High","Low","Close","Volume"]].dropna()
            df.index = pd.to_datetime(df.index)
            # Normalize to America/New_York tz
            if df.index.tz is None:
                df.index = df.index.tz_localize("UTC").tz_convert("America/New_York")
            else:
                df.index = df.index.tz_convert("America/New_York")
            return df, period, interval
    raise SystemExit("No intraday data available (or network issue).")

df, period_used, interval_used = load_intraday("AAPL")
df["PV"] = df["Close"] * df["Volume"]

# ------------- Build segment trees -------------
maxHigh = SegmentTree(df["High"].to_numpy(dtype=float),   op=max,              ide=-np.inf)
minLow  = SegmentTree(df["Low"].to_numpy(dtype=float),    op=min,              ide= np.inf)
sumPV   = SegmentTree(df["PV"].to_numpy(dtype=float),     op=lambda a,b:a+b,   ide=0.0)
sumV    = SegmentTree(df["Volume"].to_numpy(dtype=float), op=lambda a,b:a+b,   ide=0.0)

# ------------- Helpers: time -> index mapping (tz-safe) -------------
def _to_same_tz(ts, tz):
    ts = pd.to_datetime(ts)
    if ts.tz is None:
        return ts.tz_localize(tz)
    return ts.tz_convert(tz)

def idx_range(start_ts, end_ts):
    """
    Map timestamps to half-open index range [l, r), tz-safe.
    Uses nearest bars and clips to df bounds.
    """
    idx = df.index
    tz = idx.tz
    start_ts = _to_same_tz(start_ts, tz)
    end_ts   = _to_same_tz(end_ts,   tz)
    if start_ts > end_ts:
        start_ts, end_ts = end_ts, start_ts

    # clip to bounds
    start_ts = max(idx[0], start_ts)
    end_ts   = min(idx[-1], end_ts)

    l = idx.get_indexer([start_ts], method="nearest")[0]
    r = idx.get_indexer([end_ts],   method="nearest")[0]
    if r < l: l, r = r, l
    r = min(r + 1, len(idx))  # half-open
    return int(l), int(r)

def range_stats(start_ts, end_ts):
    l, r = idx_range(start_ts, end_ts)
    hi = float(maxHigh.query(l, r))
    lo = float(minLow .query(l, r))
    pv = float(sumPV  .query(l, r))
    vv = float(sumV   .query(l, r))
    vwap = pv / vv if vv > 0 else np.nan
    return {"bars": int(r-l), "high": hi, "low": lo, "vwap": vwap, "l": l, "r": r}

# ------------- Example queries -------------
tz = df.index.tz
last_day = df.index[-1].astimezone(tz).date()
session_start = pd.Timestamp.combine(last_day, pd.Timestamp("09:30").time()).tz_localize(tz)
session_end   = pd.Timestamp.combine(last_day, pd.Timestamp("16:00").time()).tz_localize(tz)

stats_last_day = range_stats(session_start, session_end)
N = 1000 if len(df) > 1000 else len(df)
stats_last_window = range_stats(df.index[-N], df.index[-1])

print(f"Data interval used: period={period_used}, interval={interval_used}, bars={len(df)}")
print("\n=== Last day session stats (US/Eastern) ===")
print({k: (round(v,4) if isinstance(v,(int,float,np.floating)) else v) for k,v in stats_last_day.items()})
print("\n=== Last window stats (tail) ===")
print({k: (round(v,4) if isinstance(v,(int,float,np.floating)) else v) for k,v in stats_last_window.items()})

# ------------- Demonstrate point update (correction) -------------
# ------------- Demonstrate point update (correction) -------------
def correct_last_bar_high(df, factor=1.01):
    """Set last bar High = Close * factor. Returns (new_high, old_high). Works for Series or 1-col DataFrames."""
    # Robust scalar extraction even if df["Close"] / df["High"] are DataFrames
    close_last = float(np.asarray(df["Close"]).reshape(-1)[-1])
    high_last  = float(np.asarray(df["High"]).reshape(-1)[-1])

    new_high = close_last * factor
    old_high = high_last
    if new_high != old_high:
        # Update the DataFrame cell safely
        df.loc[df.index[-1], "High"] = new_high
    return new_high, old_high

last_idx = len(df) - 1
corrected_high, old_high = correct_last_bar_high(df, 1.01)
maxHigh.update(last_idx, corrected_high)  # sync segtree

if corrected_high != old_high:
    stats_after_fix = range_stats(df.index[-50], df.index[-1])
    print("\nApplied correction to last bar High.")
    print({k: (round(v,4) if isinstance(v,(int,float,np.floating)) else v) for k,v in stats_after_fix.items()})

# ------------- Export summary CSV (Windows-safe path) -------------
out = pd.DataFrame(
    {
        "metric": ["bars","session_high","session_low","session_vwap"],
        "value": [
            stats_last_day["bars"],
            stats_last_day["high"],
            stats_last_day["low"],
            stats_last_day["vwap"],
        ],
    }
)

# Choose your path; this will create folders if missing
out_path = Path(r"C:\Users\adity\Downloads\Learn_DSA_Quant\Level-17\intraday_range_summary.csv")
out_path.parent.mkdir(parents=True, exist_ok=True)
out.to_csv(out_path, index=False)
print(f"\nSaved {out_path}")


Data interval used: period=7d, interval=1m, bars=2729

=== Last day session stats (US/Eastern) ===
{'bars': 390, 'high': 251.82, 'low': 247.48, 'vwap': 249.7487, 'l': 2339, 'r': 2729}

=== Last window stats (tail) ===
{'bars': 1000, 'high': 251.82, 'low': 244.7, 'vwap': 248.4381, 'l': 1729, 'r': 2729}

Applied correction to last bar High.
{'bars': 50, 'high': 251.9243, 'low': 248.17, 'vwap': 249.2557, 'l': 2679, 'r': 2729}

Saved C:\Users\adity\Downloads\Learn_DSA_Quant\Level-17\intraday_range_summary.csv
