In [1]:
# price_volume_analysis.py
from __future__ import annotations
from datetime import datetime
import numpy as np
import pandas as pd
import yfinance as yf

# =====================
# ====== CONFIG =======
# =====================
SYMBOL   = "CRCL"        # <-- change to your ticker
INTERVAL = "1m"          # '1m','2m','5m','15m','30m','1h','1d'
PERIOD   = "5d"          # '1d','5d','1mo','3mo','6mo','1y'
TZ       = "America/New_York"
INCLUDE_EXTENDED = False # pre/post-hours
N_PRICE_BINS = 40        # volume profile bins (more = finer)
RVOL_LOOKBACK_DAYS = 20  # for RVOL on daily data (optional)

# =====================
# ===== HELPERS =======
# =====================
def fetch(symbol: str, interval: str, period: str, tz: str, prepost: bool) -> pd.DataFrame:
    df = yf.download(symbol, interval=interval, period=period,
                     auto_adjust=True, prepost=prepost, progress=False)
    if df.empty:
        raise RuntimeError(f"No data returned for {symbol} [{period}/{interval}]")
    
    # Make the index tz-aware and convert to local tz
    df.index = pd.to_datetime(df.index, utc=True).tz_convert(tz)
    df.columns = [' '.join(col).strip() for col in df.columns.values]
    df.rename(columns={"Close CRCL": "Close", "High CRCL": "High", "Open CRCL": "Open", "Low CRCL": "Low", "Volume CRCL": "Volume"}, inplace=True)
    return df

df = fetch(SYMBOL, INTERVAL, PERIOD, TZ, INCLUDE_EXTENDED)
df


Unnamed: 0_level_0,Close,High,Low,Open,Volume
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2025-08-11 09:30:00-04:00,160.000000,161.080002,159.199997,161.005005,553982
2025-08-11 09:31:00-04:00,159.300003,159.800003,158.050003,159.800003,65399
2025-08-11 09:32:00-04:00,159.729996,159.889999,159.242996,159.309998,8818
2025-08-11 09:33:00-04:00,156.550003,156.800003,156.500000,156.770004,162041
2025-08-11 09:34:00-04:00,156.309998,156.720001,155.526794,156.550003,74034
...,...,...,...,...,...
2025-08-15 15:55:00-04:00,149.395004,149.649994,149.199997,149.649994,74494
2025-08-15 15:56:00-04:00,149.365005,149.539993,149.320007,149.419998,149615
2025-08-15 15:57:00-04:00,149.335007,149.449997,149.300003,149.365005,77402
2025-08-15 15:58:00-04:00,149.425003,149.600006,149.279999,149.315002,108962


In [2]:
# Maybe consider to put weight on the price
def typical_price(df: pd.DataFrame) -> pd.Series:
    """(H+L+C)/3 — a better proxy for price*volume than close alone."""
    return (df["High"] + df["Low"] + df["Close"]) / 3.0

tp = typical_price(df)
print(tp)

Datetime
2025-08-11 09:30:00-04:00    160.093333
2025-08-11 09:31:00-04:00    159.050003
2025-08-11 09:32:00-04:00    159.620997
2025-08-11 09:33:00-04:00    156.616669
2025-08-11 09:34:00-04:00    156.185598
                                ...    
2025-08-15 15:55:00-04:00    149.414998
2025-08-15 15:56:00-04:00    149.408335
2025-08-15 15:57:00-04:00    149.361669
2025-08-15 15:58:00-04:00    149.435003
2025-08-15 15:59:00-04:00    149.486664
Length: 1949, dtype: float64


In [3]:
def vwap_by_day(df: pd.DataFrame) -> pd.Series:
    """Per-session VWAP computed from intraday bars (cumulative within each date)."""
    tp = typical_price(df)
    # stable date key (naive date in UTC so DST/local doesn’t split a session)
    day_key = df.index.tz_convert("UTC").tz_localize(None).date
    num = (tp * df["Volume"]).groupby(day_key).cumsum()
    den = df["Volume"].groupby(day_key).cumsum().replace(0, np.nan)
    vwap = num / den
    vwap.name = "VWAP"
    return vwap

vwap_by_day(df)

Datetime
2025-08-11 09:30:00-04:00    160.093333
2025-08-11 09:31:00-04:00    159.983170
2025-08-11 09:32:00-04:00    159.978086
2025-08-11 09:33:00-04:00    159.288818
2025-08-11 09:34:00-04:00    159.022995
                                ...    
2025-08-15 15:55:00-04:00    145.108658
2025-08-15 15:56:00-04:00    145.138809
2025-08-15 15:57:00-04:00    145.154074
2025-08-15 15:58:00-04:00    145.175747
2025-08-15 15:59:00-04:00    145.208604
Name: VWAP, Length: 1949, dtype: float64

In [4]:
def volume_profile(df: pd.DataFrame, bins: int = 40) -> pd.DataFrame:
    """
    Approximate a volume profile by assigning each bar's V to a price bin of its typical price.
    Without tick-level data, this is the standard proxy.
    	volume_profile() bins price into N_PRICE_BINS and sums volume in each price bin based on the typical price of each bar.
	•	This tells you which prices saw the most trading—useful to spot acceptance levels vs rejection zones.
    """
    tp = typical_price(df)
    v  = df["Volume"].astype(float)
    lo, hi = float(tp.min()), float(tp.max())
    if not np.isfinite(lo) or not np.isfinite(hi) or lo == hi:
        return pd.DataFrame({"PriceBin": [lo if np.isfinite(lo) else 0.0],
                             "Volume": [float(v.sum())]})
    edges = np.linspace(lo, hi, bins + 1)
    idx = np.digitize(tp.values, edges) - 1
    idx = np.clip(idx, 0, bins - 1)
    vol_by_bin = np.bincount(idx, weights=v.values, minlength=bins)
    mids = (edges[:-1] + edges[1:]) / 2.0
    prof = pd.DataFrame({"PriceBin": mids, "Volume": vol_by_bin})
    return prof.sort_values("PriceBin", ascending=True, ignore_index=True)

volume_profile(df, N_PRICE_BINS)

Unnamed: 0,PriceBin,Volume
0,137.1801,1379415.0
1,138.446968,1210553.0
2,139.713835,1314879.0
3,140.980703,6147107.0
4,142.24757,3861126.0
5,143.514437,4959106.0
6,144.781305,3993858.0
7,146.048172,2255033.0
8,147.31504,2639262.0
9,148.581907,1885088.0


In [5]:
def classify_range_fraction(df: pd.DataFrame) -> pd.DataFrame:
    """
    For each bar, compute where its typical price sits within that session’s H–L:
    RangeFrac in [0,1] and a coarse bucket: lower/middle/upper.
    """
    out = df.copy()
    tp = typical_price(out)
    day_key = out.index.tz_convert("UTC").tz_localize(None).date
    grp = out.groupby(day_key, group_keys=False)

    # Compute daily low/high and range and avoid collapsing so each interval is assigned the daily low/high
    day_low  = grp["Low"].transform("min")
    day_high = grp["High"].transform("max")
    rng = (day_high - day_low).replace(0, np.nan)

    frac = (tp - day_low) / rng
    out["TP"] = tp
    out["RangeFrac"] = frac.clip(0, 1)

    # Vectorized bucketization
    buck = np.full(len(out), "middle", dtype=object)
    buck[out["RangeFrac"] < 1/3] = "lower"
    buck[out["RangeFrac"] > 2/3] = "upper"
    out["RangeBucket"] = buck
    return out

classify_range_fraction(df)

Unnamed: 0_level_0,Close,High,Low,Open,Volume,TP,RangeFrac,RangeBucket
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2025-08-11 09:30:00-04:00,160.000000,161.080002,159.199997,161.005005,553982,160.093333,0.319938,lower
2025-08-11 09:31:00-04:00,159.300003,159.800003,158.050003,159.800003,65399,159.050003,0.246841,lower
2025-08-11 09:32:00-04:00,159.729996,159.889999,159.242996,159.309998,8818,159.620997,0.286845,lower
2025-08-11 09:33:00-04:00,156.550003,156.800003,156.500000,156.770004,162041,156.616669,0.076358,lower
2025-08-11 09:34:00-04:00,156.309998,156.720001,155.526794,156.550003,74034,156.185598,0.046157,lower
...,...,...,...,...,...,...,...,...
2025-08-15 15:55:00-04:00,149.395004,149.649994,149.199997,149.649994,74494,149.414998,0.831833,upper
2025-08-15 15:56:00-04:00,149.365005,149.539993,149.320007,149.419998,149615,149.408335,0.831288,upper
2025-08-15 15:57:00-04:00,149.335007,149.449997,149.300003,149.365005,77402,149.361669,0.827469,upper
2025-08-15 15:58:00-04:00,149.425003,149.600006,149.279999,149.315002,108962,149.435003,0.833470,upper


In [6]:
def up_down_volume(df: pd.DataFrame) -> pd.DataFrame:
    """
    Split bar volume into UpVol (Close>=Open) and DownVol (Close<Open).
    Proxy for buying/selling pressure without aggressor flags.
    """
    out = df.copy()
    up_mask = (out["Close"] >= out["Open"])
    out["UpVol"] = out["Volume"].where(up_mask, 0)
    out["DownVol"] = out["Volume"].where(~up_mask, 0)
    return out

up_down_volume(df)

Unnamed: 0_level_0,Close,High,Low,Open,Volume,UpVol,DownVol
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2025-08-11 09:30:00-04:00,160.000000,161.080002,159.199997,161.005005,553982,0,553982
2025-08-11 09:31:00-04:00,159.300003,159.800003,158.050003,159.800003,65399,0,65399
2025-08-11 09:32:00-04:00,159.729996,159.889999,159.242996,159.309998,8818,8818,0
2025-08-11 09:33:00-04:00,156.550003,156.800003,156.500000,156.770004,162041,0,162041
2025-08-11 09:34:00-04:00,156.309998,156.720001,155.526794,156.550003,74034,0,74034
...,...,...,...,...,...,...,...
2025-08-15 15:55:00-04:00,149.395004,149.649994,149.199997,149.649994,74494,0,74494
2025-08-15 15:56:00-04:00,149.365005,149.539993,149.320007,149.419998,149615,0,149615
2025-08-15 15:57:00-04:00,149.335007,149.449997,149.300003,149.365005,77402,0,77402
2025-08-15 15:58:00-04:00,149.425003,149.600006,149.279999,149.315002,108962,108962,0


In [7]:

def rvol_daily(symbol: str, tz: str, lookback: int = 20) -> pd.DataFrame:
    """
    Calculate Relative Volume (RVOL) for a stock based on daily bars.

    RVOL is a measure of how today's trading volume compares to its historical average.
    - Formula: RVOL = Today's Volume / Average Volume over 'lookback' days.
    - This can help detect unusual trading activity.

    Parameters
    ----------
    symbol : str
        Stock ticker symbol (e.g., "AAPL").
    tz : str
        Timezone string for converting timestamps (e.g., "America/New_York").
    lookback : int, default=20
        Number of days to average when calculating historical volume.

    Returns
    -------
    pd.DataFrame
        DataFrame containing:
            - Close: Closing price
            - Volume: Daily trading volume
            - AvgVol: Rolling average volume over 'lookback' days
            - RVOL: Relative Volume
    """
    
    # Download at least 3x the lookback period (or 60 days minimum) to ensure enough history
    period_days = max(lookback * 3, 60)
    dfd = fetch(symbol, "1d", f"{period_days}d", tz, False)
    
    # If no data is returned, stop early
    if dfd.empty:
        return pd.DataFrame()

    # Flatten column names in case Yahoo returns a MultiIndex
    if isinstance(dfd.columns, pd.MultiIndex):
        dfd.columns = ['_'.join([str(c) for c in col if c]).strip()
                       for col in dfd.columns]
    else:
        dfd.columns = [str(c) for c in dfd.columns]

    # Normalize column names to Title case (e.g., "Open", "High", "Low", "Close", "Volume")
    dfd.columns = [c.title() for c in dfd.columns]

    # Ensure index is timezone-aware and convert to desired timezone
    dfd.index = pd.to_datetime(dfd.index, utc=True).tz_convert(tz)

    # Calculate rolling average volume
    # min_periods allows calculation to start earlier (half of lookback days)
    dfd["AvgVol"] = dfd["Volume"].rolling(lookback, min_periods=lookback//2).mean()

    # Calculate Relative Volume
    dfd["Rvol"] = dfd["Volume"] / dfd["AvgVol"]

    # Keep only relevant columns
    return dfd[["Close", "Volume", "AvgVol", "Rvol"]]

rvol_daily("CRCL", "America/New_York")

Unnamed: 0_level_0,Close,Volume,AvgVol,Rvol
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2025-06-04 20:00:00-04:00,83.230003,47192000,,
2025-06-05 20:00:00-04:00,107.699997,60706300,,
2025-06-08 20:00:00-04:00,115.25,45521100,,
2025-06-09 20:00:00-04:00,105.910004,25517800,,
2025-06-10 20:00:00-04:00,117.199997,26481400,,
2025-06-11 20:00:00-04:00,106.540001,13416500,,
2025-06-12 20:00:00-04:00,133.559998,34013600,,
2025-06-15 20:00:00-04:00,151.059998,43781500,,
2025-06-16 20:00:00-04:00,149.149994,31234200,,
2025-06-17 20:00:00-04:00,199.589996,63566800,39143120.0,1.623958


In [8]:
# ---------- Main analysis ----------
def main():
    # Expect these globals to be defined elsewhere in your script:
    # SYMBOL, INTERVAL, PERIOD, TZ, INCLUDE_EXTENDED, N_PRICE_BINS, RVOL_LOOKBACK_DAYS
    df = fetch(SYMBOL, INTERVAL, PERIOD, TZ, INCLUDE_EXTENDED)

    # Intraday gets per-session VWAP; daily leaves VWAP NaN (bar aggregates)
    if INTERVAL.endswith(("m", "h")):
        df["VWAP"] = vwap_by_day(df)
    else:
        df["VWAP"] = np.nan

    # Up/Down volume & range position
    df2 = up_down_volume(df)
    df3 = classify_range_fraction(df2)

    # Today mask (last session)
    day_key = df3.index.tz_convert("UTC").tz_localize(None).date
    last_day = day_key[-1]
    today = df3[day_key == last_day]

    # Volume profiles
    prof_all  = volume_profile(df3, bins=N_PRICE_BINS)
    prof_last = volume_profile(today, bins=max(10, N_PRICE_BINS // 2)) if not today.empty else pd.DataFrame()

    # Per-session bucketed volume
    bucket_vol = df3.groupby([day_key, "RangeBucket"])["Volume"].sum().unstack(fill_value=0)
    bucket_vol["Total"] = bucket_vol.sum(axis=1)
    for k in ("upper","middle","lower"):
        if k not in bucket_vol.columns:
            bucket_vol[k] = 0
    bucket_vol["UpperPct"]  = bucket_vol["upper"]  / bucket_vol["Total"].replace(0, np.nan)
    bucket_vol["MiddlePct"] = bucket_vol["middle"] / bucket_vol["Total"].replace(0, np.nan)
    bucket_vol["LowerPct"]  = bucket_vol["lower"]  / bucket_vol["Total"].replace(0, np.nan)

    # Up/Down volume by day
    ud_day = df3.groupby(day_key)[["UpVol","DownVol","Volume"]].sum()
    ud_day["UpPct"] = ud_day["UpVol"] / ud_day["Volume"].replace(0, np.nan)
    ud_day["DownPct"] = ud_day["DownVol"] / ud_day["Volume"].replace(0, np.nan)

    # Daily RVOL context
    dfd = rvol_daily(SYMBOL, TZ, RVOL_LOOKBACK_DAYS)

    # ----- Prints -----
    pd.set_option("display.float_format", lambda x: f"{x:,.4f}")
    print(f"\n=== DATA SUMMARY: {SYMBOL} [{PERIOD}/{INTERVAL}] tz={TZ} prepost={INCLUDE_EXTENDED} ===")
    print(df.tail(3)[["Open","High","Low","Close","Volume","VWAP"]])

    if not today.empty:
        t_lo, t_hi = float(today["Low"].min()), float(today["High"].max())
        print("\n--- Today’s Range & Concentration ---")
        print(f"Day Low/High: {t_lo:.2f} / {t_hi:.2f}")
        print("Volume by bucket (upper/middle/lower) for recent sessions:")
        print(bucket_vol.tail(3)[["UpperPct","MiddlePct","LowerPct"]])
        upct = bucket_vol.iloc[-1]["UpperPct"]
        if np.isfinite(upct):
            print(f"Today: {upct:.1%} of volume in the UPPER third of range "
                  f"({'bullish' if upct > 0.5 else 'neutral/bearish'})")

    print("\n--- Up/Down Volume by Day (last 5 sessions) ---")
    print(ud_day.tail(5)[["UpVol","DownVol","Volume","UpPct","DownPct"]])

    print("\n--- Volume Profile (All, top 10 bins) ---")
    print(prof_all.sort_values("Volume", ascending=False).head(10))

    if not prof_last.empty:
        print("\n--- Volume Profile (Last Session, top 5 bins) ---")
        print(prof_last.sort_values("Volume", ascending=False).head(5))

    if not dfd.empty:
        last = dfd.iloc[-1]
        print("\n--- Daily RVOL Context ---")
        print(dfd.tail(5))
        print(f"Latest RVOL ({RVOL_LOOKBACK_DAYS}d): {last['RVOL']:.2f} "
              f"(Vol {last['Volume']:,.0f} vs Avg {last['AvgVol']:,.0f})")

    # ----- Save CSVs -----
    from datetime import datetime as _dt
    ts = _dt.now().strftime("%Y%m%d_%H%M%S")
    df3.to_csv(f"{SYMBOL}_{INTERVAL}_{PERIOD}_bars_with_features_{ts}.csv")
    prof_all.to_csv(f"{SYMBOL}_{INTERVAL}_{PERIOD}_volume_profile_all_{ts}.csv", index=False)
    if not prof_last.empty:
        prof_last.to_csv(f"{SYMBOL}_{INTERVAL}_{PERIOD}_volume_profile_last_{ts}.csv", index=False)
    bucket_vol.to_csv(f"{SYMBOL}_{INTERVAL}_{PERIOD}_bucket_volume_by_day_{ts}.csv")
    ud_day.to_csv(f"{SYMBOL}_{INTERVAL}_{PERIOD}_updown_volume_by_day_{ts}.csv")
    if not dfd.empty:
        dfd.to_csv(f"{SYMBOL}_daily_rvol_{ts}.csv")

    print("\nCSV files saved to current directory.")
    
    
## run it
main()


=== DATA SUMMARY: CRCL [5d/1m] tz=America/New_York prepost=False ===
                              Open     High      Low    Close  Volume     VWAP
Datetime                                                                      
2025-08-15 15:57:00-04:00 149.3650 149.4500 149.3000 149.3350   77402 145.1541
2025-08-15 15:58:00-04:00 149.3150 149.6000 149.2800 149.4250  108962 145.1757
2025-08-15 15:59:00-04:00 149.4050 149.8100 149.2500 149.4000  165297 145.2086

--- Today’s Range & Concentration ---
Day Low/High: 139.25 / 151.47
Volume by bucket (upper/middle/lower) for recent sessions:
RangeBucket  UpperPct  MiddlePct  LowerPct
2025-08-13     0.0820     0.4775    0.4406
2025-08-14     0.1582     0.4467    0.3951
2025-08-15     0.3461     0.2670    0.3869
Today: 34.6% of volume in the UPPER third of range (neutral/bearish)

--- Up/Down Volume by Day (last 5 sessions) ---
               UpVol   DownVol    Volume  UpPct  DownPct
2025-08-11   5709338   5726151  11435489 0.4993   0.5007
202

KeyError: 'RVOL'