In [11]:
# core_data_pipeline.py
import math
import numpy as np
import pandas as pd
import yfinance as yf

# -----------------------------
# Helpers: EMA and Wilder EMA
# -----------------------------
def ema(series: pd.Series, span: int) -> pd.Series:
    k = 2 / (span + 1)
    return series.ewm(alpha=k, adjust=False).mean()

def wilder_ema(series: pd.Series, period: int) -> pd.Series:
    # Wilder smoothing = previous + (1/period)*(current - previous)
    return series.ewm(alpha=1/period, adjust=False).mean()

# -----------------------------
# RSI (Wilder, default 14)
# -----------------------------
def rsi_wilder(close: pd.Series, period: int = 14) -> pd.Series:
    delta = close.diff()
    up = delta.clip(lower=0)
    down = -delta.clip(upper=0)
    avg_up = wilder_ema(up, period)
    avg_down = wilder_ema(down, period)
    rs = avg_up / (avg_down.replace(0, np.nan))
    rsi = 100 - (100 / (1 + rs))
    return rsi.fillna(0)

# -----------------------------
# MACD (12, 26, 9)
# -----------------------------
def macd(close: pd.Series, fast=12, slow=26, signal=9):
    ema_fast = ema(close, fast)
    ema_slow = ema(close, slow)
    macd_line = ema_fast - ema_slow
    signal_line = ema(macd_line, signal)
    hist = macd_line - signal_line
    return macd_line, signal_line, hist

# -----------------------------
# Bollinger Bands (20, 2)
# -----------------------------
def bollinger_bands(close: pd.Series, period=20, mult=2.0):
    sma = close.rolling(period).mean()
    std = close.rolling(period).std()
    upper = sma + mult * std
    lower = sma - mult * std
    return sma, upper, lower

# -----------------------------
# ATR (Wilder, default 14)
# -----------------------------
def true_range(high: pd.Series, low: pd.Series, close: pd.Series) -> pd.Series:
    prev_close = close.shift(1)
    tr1 = high - low
    tr2 = (high - prev_close).abs()
    tr3 = (low - prev_close).abs()
    return pd.concat([tr1, tr2, tr3], axis=1).max(axis=1)

def atr_wilder(high: pd.Series, low: pd.Series, close: pd.Series, period: int = 14) -> pd.Series:
    tr = true_range(high, low, close)
    return wilder_ema(tr, period)

# -----------------------------
# Pivot Points (Classic) from previous day
# -----------------------------
def daily_pivot_levels(df: pd.DataFrame) -> pd.DataFrame:
    # expects daily data; compute pivots from previous day’s HLC
    piv = pd.DataFrame(index=df.index)
    H, L, C = df['High'].shift(1), df['Low'].shift(1), df['Close'].shift(1)
    P = (H + L + C) / 3.0
    R1 = 2*P - L
    S1 = 2*P - H
    R2 = P + (H - L)
    S2 = P - (H - L)
    piv['P'] = P
    piv['R1'] = R1
    piv['S1'] = S1
    piv['R2'] = R2
    piv['S2'] = S2
    return piv

# -----------------------------
# Fractal pivots (swing highs/lows)
# -----------------------------
def fractal_pivots(df: pd.DataFrame, left=2, right=2):
    highs = df['High'].values
    lows = df['Low'].values
    n = len(df)
    pivot_high = np.full(n, False, dtype=bool)
    pivot_low = np.full(n, False, dtype=bool)
    for i in range(left, n-right):
        if highs[i] == max(highs[i-left:i+right+1]):
            pivot_high[i] = True
        if lows[i] == min(lows[i-left:i+right+1]):
            pivot_low[i] = True
    piv = pd.DataFrame({'pivot_high': pivot_high, 'pivot_low': pivot_low}, index=df.index)
    piv['ph_price'] = np.where(piv['pivot_high'], df['High'], np.nan)
    piv['pl_price'] = np.where(piv['pivot_low'], df['Low'], np.nan)
    return piv

# -----------------------------
# Clustered Support/Resistance from pivots
# -----------------------------
def cluster_levels(prices: pd.Series, tolerance_frac=0.002, min_touches=3):
    # tolerance_frac is fraction of price (e.g., 0.2%) for clustering
    pts = prices.dropna().sort_values().values
    if len(pts) == 0:
        return []
    clusters = [[pts[0]]]
    for p in pts[1:]:
        if abs(p - clusters[-1][-1]) <= tolerance_frac * clusters[-1][-1]:
            clusters[-1].append(p)
        else:
            clusters.append([p])
    levels = []
    for cl in clusters:
        if len(cl) >= min_touches:
            levels.append({'level': float(np.mean(cl)), 'touches': int(len(cl))})
    return levels

def support_resistance_from_fractals(df: pd.DataFrame, piv: pd.DataFrame, atr_col='ATR', atr_mult=0.0, min_touches=3):
    # optional ATR-based tolerance: tolerance = atr_mult * ATR; else percentage based handled externally
    sr = {}
    if atr_mult > 0 and atr_col in df.columns:
        tol_series = atr_mult * df[atr_col]
        # create price list with atr-specific tolerance per bar is more advanced;
        # for now we’ll apply percentage-based outside this function
    highs = piv['ph_price']
    lows = piv['pl_price']
    res_levels = cluster_levels(highs, tolerance_frac=0.002, min_touches=min_touches)
    sup_levels = cluster_levels(lows, tolerance_frac=0.002, min_touches=min_touches)
    sr['resistance'] = res_levels
    sr['support'] = sup_levels
    return sr

# -----------------------------
# Fibonacci retracements
# -----------------------------
def fib_levels_from_swings(swing_high: float, swing_low: float):
    ratios = [0.236, 0.382, 0.5, 0.618, 0.786]
    if swing_high >= swing_low:
        diff = swing_high - swing_low
        levels = {f'{int(r*100)}%': swing_high - r*diff for r in ratios}
        levels['0%'] = swing_high
        levels['100%'] = swing_low
    else:
        diff = swing_low - swing_high
        levels = {f'{int(r*100)}%': swing_low + r*diff for r in ratios}
        levels['0%'] = swing_low
        levels['100%'] = swing_high
    return levels

def last_swing_from_fractals(df: pd.DataFrame, piv: pd.DataFrame, lookback=200):
    sub = df.tail(lookback)
    sub_piv = piv.loc[sub.index]
    # pick most recent pivot (high or low), then prior opposite
    last_idx = sub.index[-1]
    # find most recent pivot high or low
    recent_ph = sub_piv[sub_piv['pivot_high']].index.max()
    recent_pl = sub_piv[sub_piv['pivot_low']].index.max()
    if pd.isna(recent_ph) and pd.isna(recent_pl):
        return None, None
    if pd.isna(recent_pl) or (recent_ph and recent_ph > recent_pl):
        # last was a pivot high; find the preceding pivot low
        prev_pl = sub_piv[sub_piv.index < recent_ph][sub_piv['pivot_low']].index.max()
        if pd.isna(prev_pl):
            return None, None
        return float(df.loc[recent_ph, 'High']), float(df.loc[prev_pl, 'Low'])
    else:
        # last was a pivot low; find preceding pivot high
        prev_ph = sub_piv[sub_piv.index < recent_pl][sub_piv['pivot_high']].index.max()
        if pd.isna(prev_ph):
            return None, None
        return float(df.loc[prev_ph, 'High']), float(df.loc[recent_pl, 'Low'])

# -----------------------------
# Fetch and assemble feature set
# -----------------------------
def fetch_ohlcv(symbol: str, period="5y", interval="1d") -> pd.DataFrame:
    t = yf.Ticker(symbol)
    df = t.history(period=period, interval=interval, auto_adjust=False)
    df = df.rename(columns=str.title)
    df = df[['Open','High','Low','Close','Volume']].dropna()
    return df

def build_features(df: pd.DataFrame) -> pd.DataFrame:
    out = df.copy()
    out['RSI14'] = rsi_wilder(out['Close'], 14)
    macd_line, signal_line, hist = macd(out['Close'], 12, 26, 9)
    out['MACD'] = macd_line
    out['MACD_SIGNAL'] = signal_line
    out['MACD_HIST'] = hist
    sma20, bb_u, bb_l = bollinger_bands(out['Close'], 20, 2.0)
    out['BB_MID_20'] = sma20
    out['BB_UPPER_20_2'] = bb_u
    out['BB_LOWER_20_2'] = bb_l
    out['ATR14'] = atr_wilder(out['High'], out['Low'], out['Close'], 14)
    piv = daily_pivot_levels(out)
    out = out.join(piv)
    pivots = fractal_pivots(out, left=2, right=2)
    out = out.join(pivots)
    # Basic S/R summary (not columns): return as metadata
    sr = support_resistance_from_fractals(out, pivots, atr_col='ATR14', atr_mult=0.0, min_touches=3)
    # Fibonacci levels from last swing
    sh, sl = last_swing_from_fractals(out, pivots, lookback=200)
    fib = fib_levels_from_swings(sh, sl) if (sh is not None and sl is not None) else {}
    return out, sr, fib

import os

if __name__ == "__main__":
    symbols = ["HDFCBANK.NS"]

    # ensure data folder exists
    data_dir = "Projects/Major_Project/Data"
    os.makedirs(data_dir, exist_ok=True)

    for s in symbols:
        df = fetch_ohlcv(s, period="5y", interval="1d")
        feats, sr, fib = build_features(df)

        # save features CSV
        feats_path = os.path.join(data_dir, f"{s}_features.csv")
        feats.to_csv(feats_path, index=True)

        # save support levels CSV
        support_path = os.path.join(data_dir, f"{s}_support_levels.csv")
        pd.DataFrame(sr.get('support', [])).to_csv(support_path, index=False)

        # save resistance levels CSV
        resistance_path = os.path.join(data_dir, f"{s}_resistance_levels.csv")
        pd.DataFrame(sr.get('resistance', [])).to_csv(resistance_path, index=False)

        # save Fibonacci levels CSV
        fib_path = os.path.join(data_dir, f"{s}_fibonacci_levels.csv")
        pd.Series(fib).to_csv(fib_path, index=True)

        print(f"Saved data for {s} in {data_dir}/")


Saved data for HDFCBANK.NS in ./Data/


  prev_pl = sub_piv[sub_piv.index < recent_ph][sub_piv['pivot_low']].index.max()
