<a href="https://colab.research.google.com/github/Swagat1342/Enhanced-Stock-Prediction-System-with-Risk-Management/blob/main/finance_forecast.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install numpy pandas yfinance scikit-learn tensorflow plotly



In [None]:
pip install alpha_vantage finnhub-python polygon-api-client

Collecting alpha_vantage
  Downloading alpha_vantage-3.0.0-py3-none-any.whl.metadata (12 kB)
Collecting finnhub-python
  Downloading finnhub_python-2.4.25-py3-none-any.whl.metadata (9.2 kB)
Collecting polygon-api-client
  Downloading polygon_api_client-1.15.4-py3-none-any.whl.metadata (1.0 kB)
Collecting websockets<15.0,>=10.3 (from polygon-api-client)
  Downloading websockets-14.2-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.8 kB)
Downloading alpha_vantage-3.0.0-py3-none-any.whl (35 kB)
Downloading finnhub_python-2.4.25-py3-none-any.whl (11 kB)
Downloading polygon_api_client-1.15.4-py3-none-any.whl (54 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m54.6/54.6 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading websockets-14.2-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (170 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32

In [None]:
pip install --upgrade yfinance




In [None]:
pip install yfinance ta pandas numpy plotly


Collecting ta
  Downloading ta-0.11.0.tar.gz (25 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: ta
  Building wheel for ta (setup.py) ... [?25l[?25hdone
  Created wheel for ta: filename=ta-0.11.0-py3-none-any.whl size=29412 sha256=40aa2ab68a0a2548c04a231b69f7ae1b4ed916b584d7717394063affd81bbabf
  Stored in directory: /root/.cache/pip/wheels/5c/a1/5f/c6b85a7d9452057be4ce68a8e45d77ba34234a6d46581777c6
Successfully built ta
Installing collected packages: ta
Successfully installed ta-0.11.0


In [None]:
# ====== Standard Python & Data Libraries ======
import requests                 # For HTTP requests, useful for APIs or custom data fetching
from requests.adapters import HTTPAdapter       # To configure retry strategy for HTTP requests
from urllib3.util.retry import Retry             # Retry logic for stable HTTP connections

import pandas as pd             # Data manipulation and analysis
import numpy as np              # Numerical computing with arrays

from datetime import datetime, timedelta    # Date/time utilities

# ====== Financial Data and ML Libraries ======
import yfinance as yf           # Yahoo Finance API for stock data

from sklearn.preprocessing import StandardScaler, RobustScaler  # Feature scaling for ML
from sklearn.feature_selection import mutual_info_regression, SelectKBest  # Feature selection

# ====== Visualization Libraries ======
import plotly.graph_objects as go    # For interactive plotting
from plotly.subplots import make_subplots  # For creating subplot layouts

# ====== Technical Analysis ======
import ta                        # Technical indicators library

# ====== Other Settings ======
import warnings                  # Handle warnings gracefully
warnings.filterwarnings('ignore')  # Suppress warnings for cleaner output


In [None]:
# ======================================
# 🔑 API Keys Configuration
# ======================================

API_KEYS = {
    'ALPHA_VANTAGE': "YOUR_ALPHA_VANTAGE_API_KEY",  # API key for Alpha Vantage service
    'IEX_CLOUD': "YOUR_IEX_CLOUD_API_KEY",          # API key for IEX Cloud service
    'FINNHUB': "YOUR_FINNHUB_API_KEY",              # API key for Finnhub service
    'POLYGON': "YOUR_POLYGON_API_KEY"               # API key for Polygon.io service
}



In [None]:
# ======================================
# 📊 Stock Data Fetcher Class
# ======================================
class StockDataFetcher:
    def __init__(self, api_keys=None):
        # Initialize the fetcher with provided API keys or defaults
        self.api_keys = api_keys or API_KEYS
        # Create a session with retry mechanism to handle temporary HTTP failures
        self.session = self._make_session()
        # Keep track of the last successful data source used
        self.last_source = None

    def _make_session(self):
        # Create a requests session with retry logic for robust HTTP requests
        session = requests.Session()
        retries = Retry(total=3,                    # Retry up to 3 times
                        backoff_factor=1,           # Wait time multiplier between retries
                        status_forcelist=[429, 500, 502, 503, 504])  # Retry on these HTTP errors
        session.mount('https://', HTTPAdapter(max_retries=retries))
        return session

    def fetch_alpha_vantage(self, symbol, interval='5min', outputsize='compact'):
        """Fetch stock data from Alpha Vantage API with time series intraday."""
        print(f"\n🔹 Trying Alpha Vantage for {symbol}...")
        try:
            # Form the URL for Alpha Vantage intraday API
            url = (
                f"https://www.alphavantage.co/query?"
                f"function=TIME_SERIES_INTRADAY&symbol={symbol}"
                f"&interval={interval}&apikey={self.api_keys['ALPHA_VANTAGE']}"
                f"&outputsize={outputsize}"
            )
            r = self.session.get(url, timeout=10)  # Send GET request with timeout
            data = r.json()  # Parse JSON response

            ts_key = f"Time Series ({interval})"  # Key that contains time series data

            # If expected data key is missing, print error note or message
            if ts_key not in data:
                print(f"❌ Alpha Vantage Error: {data.get('Note', data.get('Error Message', 'Unknown'))}")
                return None

            # Convert time series dictionary to DataFrame and set datetime index
            df = pd.DataFrame.from_dict(data[ts_key], orient="index").astype(float)
            df.index = pd.to_datetime(df.index)

            # Rename columns to standard format
            df.rename(columns={
                "1. open": "open", "2. high": "high",
                "3. low": "low", "4. close": "close",
                "5. volume": "volume"
            }, inplace=True)

            print(f"✅ Alpha Vantage Success — {len(df)} records")
            self.last_source = "Alpha Vantage"
            return df.sort_index()  # Make sure data is in chronological order
        except Exception as e:
            print(f"❌ Alpha Vantage failed: {e}")
            return None

    def fetch_polygon(self, symbol, start_date=None, end_date=None, timespan='minute', multiplier=5):
        """Fetch stock data from Polygon.io aggregates endpoint."""
        print(f"\n🔹 Trying Polygon.io for {symbol}...")
        try:
            # Default to last 5 days if dates not provided
            if not start_date:
                start_date = (datetime.now() - timedelta(days=5)).strftime('%Y-%m-%d')
            if not end_date:
                end_date = datetime.now().strftime('%Y-%m-%d')

            # Construct the URL with range parameters
            url = (
                f"https://api.polygon.io/v2/aggs/ticker/{symbol}/range/"
                f"{multiplier}/{timespan}/{start_date}/{end_date}?"
                f"apiKey={self.api_keys['POLYGON']}"
            )
            r = self.session.get(url, timeout=10)
            data = r.json()

            # Check if 'results' key exists in JSON response
            if "results" not in data:
                print(f"❌ Polygon.io Error: {data.get('error', 'No results')}")
                return None

            # Extract results and build DataFrame
            df = pd.DataFrame(data["results"])

            # Convert timestamp from milliseconds to datetime
            df["t"] = pd.to_datetime(df["t"], unit="ms")
            df.rename(columns={
                "t": "timestamp", "o": "open", "h": "high",
                "l": "low", "c": "close", "v": "volume"
            }, inplace=True)

            # Set timestamp as index for time series processing
            df.set_index("timestamp", inplace=True)
            print(f"✅ Polygon.io Success — {len(df)} records")
            self.last_source = "Polygon.io"
            return df
        except Exception as e:
            print(f"❌ Polygon.io failed: {e}")
            return None

    def fetch_finnhub(self, symbol):
        """Fetch current stock quote from Finnhub API."""
        print(f"\n🔹 Trying Finnhub for {symbol}...")
        try:
            url = f"https://finnhub.io/api/v1/quote?symbol={symbol}&token={self.api_keys['FINNHUB']}"
            r = self.session.get(url, timeout=10)
            data = r.json()

            # Validate if 'c' (current price) in response is valid and nonzero
            if "c" not in data or data.get("c") == 0:
                print(f"❌ Finnhub Error: No data available")
                return None

            # Create a single-row DataFrame with quote data and current timestamp index
            df = pd.DataFrame([{
                "open": data.get("o"),
                "high": data.get("h"),
                "low": data.get("l"),
                "close": data.get("c"),
                "volume": None  # Volume is not provided by this endpoint
            }], index=[datetime.now()])

            print(f"✅ Finnhub Success — Current ${data.get('c')}")
            self.last_source = "Finnhub"
            return df
        except Exception as e:
            print(f"❌ Finnhub failed: {e}")
            return None

    def fetch_yfinance(self, symbol, period="5d", interval="5m"):
        """Fallback fetch stock data from Yahoo Finance."""
        print(f"\n🔹 Trying Yahoo Finance for {symbol}...")
        try:
            df = yf.download(symbol, period=period, interval=interval, progress=False)

            if df.empty:
                print("❌ Yahoo Finance returned no data")
                return None

            # For multi-level columns (e.g., multi-ticker), drop the extra level
            if isinstance(df.columns, pd.MultiIndex):
                df.columns = df.columns.droplevel(1)

            # Lowercase all column names for consistency
            df.columns = [col.lower() if isinstance(col, str) else col[0].lower() for col in df.columns]

            print(f"✅ Yahoo Finance Success — {len(df)} records")
            self.last_source = "Yahoo Finance"
            return df
        except Exception as e:
            print(f"❌ Yahoo Finance failed: {e}")
            return None

    def get_stock_data(self, symbol, prefer_source=None):
        """
        Fetch stock data using preferred source or fallback through all APIs.

        Args:
            symbol (str): Stock ticker symbol.
            prefer_source (str): Preferred data source ('alpha_vantage', 'polygon', 'finnhub', or 'yfinance').

        Returns:
            pd.DataFrame or None: Stock data DataFrame or None if all fail.
        """
        # Dictionary mapping source names to fetcher methods
        fetchers = {
            'alpha_vantage': self.fetch_alpha_vantage,
            'polygon': self.fetch_polygon,
            'finnhub': self.fetch_finnhub,
            'yfinance': self.fetch_yfinance
        }

        # Attempt preferred source first if specified
        if prefer_source and prefer_source in fetchers:
            df = fetchers[prefer_source](symbol)
            if df is not None and not df.empty:
                return df

        # Otherwise, try all sources in order excluding preferred source if already tried
        for name, fetcher in fetchers.items():
            if prefer_source == name:  # Skip already tried preferred source
                continue
            df = fetcher(symbol)
            if df is not None and not df.empty:
                print(f"\n✅ Data fetched successfully from: {self.last_source}")
                return df

        # All sources failed, prompt user to check credentials and connectivity
        print("❌ All APIs failed — please check keys and connection.")
        return None


In [None]:
# Instantiate the StockDataFetcher with your API keys dictionary
fetcher = StockDataFetcher(api_keys=API_KEYS)

# Fetch data for SBI stock symbol, optionally specify preferred data source
symbol = 'SBIN.NS'  # NSE ticker for State Bank of India
df_stock = fetcher.get_stock_data(symbol, prefer_source='alpha_vantage')

# Check if data was fetched successfully
if df_stock is not None:
    # Add technical indicators to fetched stock data
    df_enriched = add_technical_indicators(df_stock)

    # Print a preview of the enriched data
    print(df_enriched.head())

    # Generate and show the comprehensive technical analysis Plotly chart
    fig = plot_technical_analysis(df_enriched, ticker_name=symbol)
    fig.show()

else:
    print("Failed to fetch stock data for symbol:", symbol)



🔹 Trying Alpha Vantage for SBIN.NS...
❌ Alpha Vantage Error: Unknown

🔹 Trying Polygon.io for SBIN.NS...
❌ Polygon.io Error: Unknown API Key

🔹 Trying Finnhub for SBIN.NS...
❌ Finnhub Error: No data available

🔹 Trying Yahoo Finance for SBIN.NS...
✅ Yahoo Finance Success — 300 records

✅ Data fetched successfully from: Yahoo Finance


KeyError: 'Close'

In [None]:
def add_technical_indicators(df):
    """
    Add a comprehensive set of technical indicators to a stock OHLCV DataFrame.

    Parameters:
    df (pd.DataFrame): DataFrame containing columns 'Open', 'High', 'Low', 'Close', 'Volume'.

    Returns:
    pd.DataFrame: DataFrame enriched with multiple technical indicator columns.
    """

    # ========== TREND INDICATORS ==========

    # Calculate Simple Moving Averages (SMA) to identify price trend smoothing
    df['SMA_20'] = ta.trend.sma_indicator(df['Close'], window=20)
    df['SMA_50'] = ta.trend.sma_indicator(df['Close'], window=50)
    df['SMA_100'] = ta.trend.sma_indicator(df['Close'], window=100)
    df['SMA_200'] = ta.trend.sma_indicator(df['Close'], window=200)

    # Calculate Exponential Moving Averages (EMA) - reacts faster to recent price changes
    df['EMA_12'] = ta.trend.ema_indicator(df['Close'], window=12)
    df['EMA_20'] = ta.trend.ema_indicator(df['Close'], window=20)
    df['EMA_26'] = ta.trend.ema_indicator(df['Close'], window=26)
    df['EMA_50'] = ta.trend.ema_indicator(df['Close'], window=50)

    # MACD (Moving Average Convergence Divergence) for momentum and trend strength analysis
    df['MACD'] = ta.trend.macd(df['Close'])
    df['MACD_Signal'] = ta.trend.macd_signal(df['Close'])
    df['MACD_Hist'] = ta.trend.macd_diff(df['Close'])

    # Average Directional Index (ADX) - measures trend strength (non-directional)
    df['ADX'] = ta.trend.adx(df['High'], df['Low'], df['Close'], window=14)
    df['ADX_Pos'] = ta.trend.adx_pos(df['High'], df['Low'], df['Close'], window=14)
    df['ADX_Neg'] = ta.trend.adx_neg(df['High'], df['Low'], df['Close'], window=14)

    # Use a 20-period SMA as a simple trend line proxy
    df['Trend_Line'] = ta.trend.sma_indicator(df['Close'], window=20)

    # ========== MOMENTUM INDICATORS ==========

    # Relative Strength Index (RSI) over 14 and 7 periods to gauge overbought/oversold momentum
    df['RSI_14'] = ta.momentum.rsi(df['Close'], window=14)
    df['RSI_7'] = ta.momentum.rsi(df['Close'], window=7)

    # Stochastic Oscillator components (%K and %D) to measure momentum changes
    df['Stoch_K'] = ta.momentum.stoch(df['High'], df['Low'], df['Close'], window=14)
    df['Stoch_D'] = ta.momentum.stoch_signal(df['High'], df['Low'], df['Close'], window=14)

    # ========== DeMarker Indicator: Custom Implementation ==========

    def demarker(high, low, period=14):
        demax = high.diff()
        demax[demax < 0] = 0
        demin = -low.diff()
        demin[demin < 0] = 0
        demax_ma = demax.rolling(window=period).mean()
        demin_ma = demin.rolling(window=period).mean()
        return demax_ma / (demax_ma + demin_ma)

    df['DeMarker'] = demarker(df['High'], df['Low'], period=14)

    # ========== VOLATILITY INDICATORS ==========

    df['ATR_14'] = ta.volatility.average_true_range(df['High'], df['Low'], df['Close'], window=14)
    df['ATR_7'] = ta.volatility.average_true_range(df['High'], df['Low'], df['Close'], window=7)

    bb = ta.volatility.BollingerBands(df['Close'], window=20, window_dev=2)
    df['BB_High'] = bb.bollinger_hband()
    df['BB_Mid'] = bb.bollinger_mavg()
    df['BB_Low'] = bb.bollinger_lband()
    df['BB_Width'] = bb.bollinger_wband()
    df['BB_Pct'] = bb.bollinger_pband()

    df['Std_Dev_20'] = df['Close'].rolling(window=20).std()
    df['Std_Dev_50'] = df['Close'].rolling(window=50).std()

    # ========== VOLUME INDICATORS ==========

    df['OBV'] = ta.volume.on_balance_volume(df['Close'], df['Volume'])
    df['ADI'] = ta.volume.acc_dist_index(df['High'], df['Low'], df['Close'], df['Volume'])
    df['AD_Line'] = ta.volume.acc_dist_index(df['High'], df['Low'], df['Close'], df['Volume'])

    # ========== SUPPORT/RESISTANCE & PIVOT POINTS ==========

    df['Pivot'] = (df['High'] + df['Low'] + df['Close']) / 3
    df['R1'] = 2 * df['Pivot'] - df['Low']
    df['S1'] = 2 * df['Pivot'] - df['High']
    df['R2'] = df['Pivot'] + (df['High'] - df['Low'])
    df['S2'] = df['Pivot'] - (df['High'] - df['Low'])
    df['R3'] = df['High'] + 2 * (df['Pivot'] - df['Low'])
    df['S3'] = df['Low'] - 2 * (df['High'] - df['Pivot'])

    df['Resistance_20'] = df['High'].rolling(window=20).max()
    df['Support_20'] = df['Low'].rolling(window=20).min()
    df['Resistance_50'] = df['High'].rolling(window=50).max()
    df['Support_50'] = df['Low'].rolling(window=50).min()

    # ========== MARKET PROFILE (Volume Profile Approximation) ==========

    df['VWAP'] = (df['Volume'] * (df['High'] + df['Low'] + df['Close']) / 3).cumsum() / df['Volume'].cumsum()
    df['VWMA_20'] = (df['Close'] * df['Volume']).rolling(window=20).sum() / df['Volume'].rolling(window=20).sum()

    # Remove NaNs caused by rolling calculations
    df.dropna(inplace=True)

    return df


In [None]:
import numpy as np
import pandas as pd

class AdvancedFeatureEngineer:
    """Full-featured technical indicator generator for trading analysis"""

    def __init__(self):
        self.feature_names = []  # Store names of generated features if needed

    # =============================================================
    # 📌 MASTER FEATURE FUNCTION: Adds all indicators to DataFrame
    # =============================================================
    def add_all_features(self, df):
        df = df.copy()  # Work on a copy to avoid modifying original DataFrame

        # Extract key price/volume series for easy reference
        close, high, low, volume = df['close'], df['high'], df['low'], df['volume']

        # Sequentially add a comprehensive list of technical indicators
        df = self._add_sma_ema(df)                     # Moving averages
        df = self._add_macd(df)                         # MACD momentum
        df = self._add_rsi(df)                          # RSI momentum
        df = self._add_stochastic(df)                   # Stochastic oscillator
        df = self._add_adx(df)                          # Average Directional Index (trend strength)
        df = self._add_demarker(df)                     # DeMarker indicator (trend/momentum)
        df = self._add_atr(df)                          # Average True Range (volatility)
        df = self._add_bollinger(df)                    # Bollinger Bands (volatility)
        df = self._add_std_features(df)                 # Rolling standard deviations
        df = self._add_obv(df)                          # On-Balance Volume (volume momentum)
        df = self._add_accumulation_distribution(df)   # Accumulation/Distribution line (volume flow)
        df = self._add_pivot_points(df)                 # Support/resistance pivot levels
        df = self._add_trendlines(df)                   # Trendline slopes (linear regression)
        df = self._add_market_profile(df)               # Market profile volume clusters
        df = self._add_support_resistance(df)           # Dynamic support/resistance levels

        # Drop rows with NaN values caused by rolling calculations
        df.dropna(inplace=True)
        return df

    # =============================================================
    # 📊 SIMPLE MOVING AVERAGES (SMA) / EXPONENTIAL MOVING AVERAGES (EMA)
    # =============================================================
    def _add_sma_ema(self, df):
        for period in [5, 10, 20, 50, 100, 200]:
            # Calculate SMA for given period
            df[f'sma_{period}'] = df['close'].rolling(period).mean()
            # Calculate EMA for given period
            df[f'ema_{period}'] = df['close'].ewm(span=period, adjust=False).mean()
        return df

    # =============================================================
    # 💹 MACD (Moving Average Convergence Divergence)
    # =============================================================
    def _add_macd(self, df):
        exp1 = df['close'].ewm(span=12, adjust=False).mean()  # Fast EMA
        exp2 = df['close'].ewm(span=26, adjust=False).mean()  # Slow EMA
        df['macd'] = exp1 - exp2                              # MACD line
        df['macd_signal'] = df['macd'].ewm(span=9, adjust=False).mean()  # Signal line
        df['macd_hist'] = df['macd'] - df['macd_signal']     # Histogram
        return df

    # =============================================================
    # 💪 RSI (Relative Strength Index)
    # =============================================================
    def _add_rsi(self, df, period=14):
        delta = df['close'].diff()
        gain = (delta.where(delta > 0, 0)).rolling(period).mean()  # Average gains
        loss = (-delta.where(delta < 0, 0)).rolling(period).mean()  # Average losses
        rs = gain / (loss + 1e-10)                                 # Relative strength
        df[f'rsi_{period}'] = 100 - (100 / (1 + rs))              # RSI formula
        return df

    # =============================================================
    # 🎯 STOCHASTIC OSCILLATOR
    # =============================================================
    def _add_stochastic(self, df, period=14):
        low_min = df['low'].rolling(period).min()
        high_max = df['high'].rolling(period).max()
        df[f'stoch_k_{period}'] = 100 * (df['close'] - low_min) / (high_max - low_min + 1e-10)  # %K line
        df[f'stoch_d_{period}'] = df[f'stoch_k_{period}'].rolling(3).mean()                      # %D line (smoothed)
        return df

    # =============================================================
    # 📏 ADX (Average Directional Index) — Trend Strength Indicator
    # =============================================================
    def _add_adx(self, df, period=14):
        high, low, close = df['high'], df['low'], df['close']
        plus_dm = high.diff()
        minus_dm = -low.diff()
        plus_dm[plus_dm < 0] = 0
        minus_dm[minus_dm < 0] = 0
        tr = self._calculate_atr(df, period)               # True Range for normalization
        plus_di = 100 * (plus_dm.rolling(period).mean() / tr)  # +DI percentage
        minus_di = 100 * (minus_dm.rolling(period).mean() / tr)  # -DI percentage
        dx = 100 * abs(plus_di - minus_di) / (plus_di + minus_di + 1e-10)  # Directional index
        df['adx'] = dx.rolling(period).mean()  # Smoothed ADX
        return df

    # Helper function: ATR calculation
    def _calculate_atr(self, df, period=14):
        tr1 = df['high'] - df['low']
        tr2 = abs(df['high'] - df['close'].shift())
        tr3 = abs(df['low'] - df['close'].shift())
        tr = pd.concat([tr1, tr2, tr3], axis=1).max(axis=1)
        return tr.rolling(period).mean()

    # =============================================================
    # 🧭 DEMARKER INDICATOR — Buying/Selling Pressure
    # =============================================================
    def _add_demarker(self, df, period=14):
        demax = df['high'].diff()
        demin = df['low'].diff()
        demax = np.where(demax > 0, demax, 0)  # Only positive moves
        demin = np.where(demin < 0, abs(demin), 0)  # Only negative moves as positive values
        demax_sma = pd.Series(demax).rolling(period).mean()
        demin_sma = pd.Series(demin).rolling(period).mean()
        df['demarker'] = demax_sma / (demax_sma + demin_sma + 1e-10)
        return df

    # =============================================================
    # 📉 ATR (Average True Range) — Market Volatility
    # =============================================================
    def _add_atr(self, df, period=14):
        df['atr'] = self._calculate_atr(df, period)
        return df

    # =============================================================
    # 🎢 BOLLINGER BANDS — Volatility Channel
    # =============================================================
    def _add_bollinger(self, df, period=20):
        sma = df['close'].rolling(period).mean()
        std = df['close'].rolling(period).std()
        df['bb_upper'] = sma + (2 * std)
        df['bb_lower'] = sma - (2 * std)
        df['bb_width'] = (df['bb_upper'] - df['bb_lower']) / sma
        return df

    # =============================================================
    # 🧮 STANDARD DEVIATION FEATURES — Supplement Volatility
    # =============================================================
    def _add_std_features(self, df):
        for period in [10, 20, 50]:
            df[f'std_{period}'] = df['close'].rolling(period).std()
        return df

    # =============================================================
    # 💰 ON-BALANCE VOLUME (OBV) — Volume Momentum Indicator
    # =============================================================
    def _add_obv(self, df):
        obv = [0]  # Initialize OBV list with zero
        for i in range(1, len(df)):
            if df['close'][i] > df['close'][i-1]:
                obv.append(obv[-1] + df['volume'][i])  # Add volume when price rises
            elif df['close'][i] < df['close'][i-1]:
                obv.append(obv[-1] - df['volume'][i])  # Subtract volume when price falls
            else:
                obv.append(obv[-1])  # No change when price is unchanged
        df['obv'] = obv
        return df

    # =============================================================
    # 🧩 ACCUMULATION/DISTRIBUTION LINE — Volume Flow Indicator
    # =============================================================
    def _add_accumulation_distribution(self, df):
        mfm = ((df['close'] - df['low']) - (df['high'] - df['close'])) / (df['high'] - df['low'] + 1e-10)  # Money Flow Multiplier
        df['ad_line'] = (mfm * df['volume']).cumsum()  # Cumulative volume flow
        return df

    # =============================================================
    # 🪜 PIVOT POINTS — Support/Resistance Midpoints
    # =============================================================
    def _add_pivot_points(self, df):
        df['pivot'] = (df['high'] + df['low'] + df['close']) / 3
        df['r1'] = 2 * df['pivot'] - df['low']  # Resistance 1
        df['s1'] = 2 * df['pivot'] - df['high']  # Support 1
        df['r2'] = df['pivot'] + (df['high'] - df['low'])  # Resistance 2
        df['s2'] = df['pivot'] - (df['high'] - df['low'])  # Support 2
        return df

    # =============================================================
    # 📈 TRENDLINES (Slope of Linear Regression over Rolling Window)
    # =============================================================
    def _add_trendlines(self, df, period=20):
        # For each rolling window, calculate slope of linear regression as trend strength proxy
        df['trend_slope'] = df['close'].rolling(period).apply(
            lambda x: np.polyfit(range(len(x)), x, 1)[0], raw=True)
        return df

    # =============================================================
    # 🏗️ MARKET PROFILE (Volume Concentrations by Price Bin)
    # =============================================================
    def _add_market_profile(self, df, bins=20):
        # Bin closing prices into quantile bins
        df['price_bin'] = pd.qcut(df['close'], bins, duplicates='drop')
        # Sum volumes by price bin to estimate volume density at price levels
        volume_profile = df.groupby('price_bin')['volume'].sum()
        # Map sum of volume per bin back to each row
        df['market_profile_strength'] = df['price_bin'].map(volume_profile)
        return df

    # =============================================================
    # 🧭 SUPPORT/RESISTANCE LEVELS (Rolling Local Min/Max & Price Position)
    # =============================================================
    def _add_support_resistance(self, df, window=10):
        # Rolling minimum low as support level
        df['support'] = df['low'].rolling(window).min()
        # Rolling maximum high as resistance level
        df['resistance'] = df['high'].rolling(window).max()
        # Relative close price position between support and resistance (0 - support, 1 - resistance)
        df['price_position'] = (df['close'] - df['support']) / (df['resistance'] - df['support'] + 1e-10)
        return df


In [None]:

class AdvancedFeatureEngineer:
    """Full-featured technical indicator generator for trading analysis"""

    def __init__(self):
        # Optionally store feature names here for future reference (not used now)
        self.feature_names = []


In [None]:
    # =============================================================
    # 📌 MASTER FEATURE FUNCTION: Adds all indicators to a DataFrame
    # =============================================================
    def add_all_features(self, df):
        df = df.copy()  # Work on copy to avoid side effects

        # Extract price/volume series for convenience
        close, high, low, volume = df['close'], df['high'], df['low'], df['volume']

        # Add technical features one by one by calling dedicated functions
        df = self._add_sma_ema(df)
        df = self._add_macd(df)
        df = self._add_rsi(df)
        df = self._add_stochastic(df)
        df = self._add_adx(df)
        df = self._add_demarker(df)
        df = self._add_atr(df)
        df = self._add_bollinger(df)
        df = self._add_std_features(df)
        df = self._add_obv(df)
        df = self._add_accumulation_distribution(df)
        df = self._add_pivot_points(df)
        df = self._add_trendlines(df)
        df = self._add_market_profile(df)
        df = self._add_support_resistance(df)

        # Drop rows with NaN created by rolling/window calculations
        df.dropna(inplace=True)
        return df


    # =============================================================
    # 📊 SIMPLE MOVING AVERAGES (SMA) and EXPONENTIAL MOVING AVERAGES (EMA)
    # =============================================================
    def _add_sma_ema(self, df):
        for period in [5, 10, 20, 50, 100, 200]:
            df[f'sma_{period}'] = df['close'].rolling(window=period).mean()
            df[f'ema_{period}'] = df['close'].ewm(span=period, adjust=False).mean()
        return df


In [None]:
    # =============================================================
    # 💹 MACD (Moving Average Convergence Divergence)
    # =============================================================
    def _add_macd(self, df):
        exp1 = df['close'].ewm(span=12, adjust=False).mean()
        exp2 = df['close'].ewm(span=26, adjust=False).mean()
        df['macd'] = exp1 - exp2
        df['macd_signal'] = df['macd'].ewm(span=9, adjust=False).mean()
        df['macd_hist'] = df['macd'] - df['macd_signal']
        return df


    # =============================================================
    # 💪 RSI (Relative Strength Index)
    # =============================================================
    def _add_rsi(self, df, period=14):
        delta = df['close'].diff()
        gain = delta.where(delta > 0, 0).rolling(window=period).mean()
        loss = (-delta.where(delta < 0, 0)).rolling(window=period).mean()
        rs = gain / (loss + 1e-10)
        df[f'rsi_{period}'] = 100 - (100 / (1 + rs))
        return df


In [None]:
    # =============================================================
    # 🎯 STOCHASTIC OSCILLATOR
    # =============================================================
    def _add_stochastic(self, df, period=14):
        low_min = df['low'].rolling(window=period).min()
        high_max = df['high'].rolling(window=period).max()
        df[f'stoch_k_{period}'] = 100 * (df['close'] - low_min) / (high_max - low_min + 1e-10)
        df[f'stoch_d_{period}'] = df[f'stoch_k_{period}'].rolling(window=3).mean()
        return df


    # =============================================================
    # 📏 ADX (Average Directional Index)
    # =============================================================
    def _add_adx(self, df, period=14):
        high, low, close = df['high'], df['low'], df['close']
        plus_dm = high.diff()
        minus_dm = -low.diff()
        plus_dm[plus_dm < 0] = 0
        minus_dm[minus_dm < 0] = 0
        tr = self._calculate_atr(df, period)
        plus_di = 100 * (plus_dm.rolling(window=period).mean() / tr)
        minus_di = 100 * (minus_dm.rolling(window=period).mean() / tr)
        dx = 100 * abs(plus_di - minus_di) / (plus_di + minus_di + 1e-10)
        df['adx'] = dx.rolling(window=period).mean()
        return df


    # Helper function to calculate ATR used by ADX
    def _calculate_atr(self, df, period=14):
        tr1 = df['high'] - df['low']
        tr2 = abs(df['high'] - df['close'].shift())
        tr3 = abs(df['low'] - df['close'].shift())
        tr = pd.concat([tr1, tr2, tr3], axis=1).max(axis=1)
        return tr.rolling(window=period).mean()


In [None]:
    # =============================================================
    # 🧭 DEMARKER INDICATOR
    # =============================================================
    def _add_demarker(self, df, period=14):
        demax = df['high'].diff()
        demin = df['low'].diff()
        demax = np.where(demax > 0, demax, 0)
        demin = np.where(demin < 0, abs(demin), 0)
        demax_sma = pd.Series(demax).rolling(window=period).mean()
        demin_sma = pd.Series(demin).rolling(window=period).mean()
        df['demarker'] = demax_sma / (demax_sma + demin_sma + 1e-10)
        return df


    # =============================================================
    # 📉 ATR (Average True Range)
    # =============================================================
    def _add_atr(self, df, period=14):
        df['atr'] = self._calculate_atr(df, period)
        return df


    # =============================================================
    # 🎢 BOLLINGER BANDS
    # =============================================================
    def _add_bollinger(self, df, period=20):
        sma = df['close'].rolling(window=period).mean()
        std = df['close'].rolling(window=period).std()
        df['bb_upper'] = sma + (2 * std)
        df['bb_lower'] = sma - (2 * std)
        df['bb_width'] = (df['bb_upper'] - df['bb_lower']) / sma
        return df


In [None]:
    # =============================================================
    # 🧮 STANDARD DEVIATION FEATURES
    # =============================================================
    def _add_std_features(self, df):
        for period in [10, 20, 50]:
            df[f'std_{period}'] = df['close'].rolling(window=period).std()
        return df


    # =============================================================
    # 💰 ON-BALANCE VOLUME (OBV)
    # =============================================================
    def _add_obv(self, df):
        obv = [0]
        for i in range(1, len(df)):
            if df['close'][i] > df['close'][i-1]:
                obv.append(obv[-1] + df['volume'][i])
            elif df['close'][i] < df['close'][i-1]:
                obv.append(obv[-1] - df['volume'][i])
            else:
                obv.append(obv[-1])
        df['obv'] = obv
        return df


    # =============================================================
    # 🧩 ACCUMULATION/DISTRIBUTION LINE
    # =============================================================
    def _add_accumulation_distribution(self, df):
        mfm = ((df['close'] - df['low']) - (df['high'] - df['close'])) / (df['high'] - df['low'] + 1e-10)
        df['ad_line'] = (mfm * df['volume']).cumsum()
        return df


In [None]:
    # =============================================================
    # 🪜 PIVOT POINTS
    # =============================================================
    def _add_pivot_points(self, df):
        df['pivot'] = (df['high'] + df['low'] + df['close']) / 3
        df['r1'] = 2 * df['pivot'] - df['low']
        df['s1'] = 2 * df['pivot'] - df['high']
        df['r2'] = df['pivot'] + (df['high'] - df['low'])
        df['s2'] = df['pivot'] - (df['high'] - df['low'])
        return df


    # =============================================================
    # 📈 TRENDLINES (linear regression slope)
    # =============================================================
    def _add_trendlines(self, df, period=20):
        df['trend_slope'] = df['close'].rolling(window=period).apply(
            lambda x: np.polyfit(range(len(x)), x, 1)[0], raw=True)
        return df


    # =============================================================
    # 🏗️ MARKET PROFILE (volume by price bins)
    # =============================================================
    def _add_market_profile(self, df, bins=20):
        df['price_bin'] = pd.qcut(df['close'], q=bins, duplicates='drop')
        volume_profile = df.groupby('price_bin')['volume'].sum()
        df['market_profile_strength'] = df['price_bin'].map(volume_profile)
        return df


    # =============================================================
    # 🧭 SUPPORT / RESISTANCE LEVELS (local lows/highs)
    # =============================================================
    def _add_support_resistance(self, df, window=10):
        df['support'] = df['low'].rolling(window=window).min()
        df['resistance'] = df['high'].rolling(window=window).max()
        df['price_position'] = (df['close'] - df['support']) / (df['resistance'] - df['support'] + 1e-10)
        return df


In [None]:
class AdvancedFeatureEngineer:
    def add_all_features(self, df):
        print("add_all_features is defined!")
        return df

# Test
engineer = AdvancedFeatureEngineer()
engineer.add_all_features(pd.DataFrame())  # Should print the message


add_all_features is defined!


In [None]:
engineer = AdvancedFeatureEngineer()
df_features = engineer.add_all_features(df_stock)  # Use your actual DataFrame variable here
print(df_features.head())


add_all_features is defined!
                                close        high         low        open  \
Datetime                                                                    
2025-10-01 03:45:00+00:00  872.849976  874.700012  871.200012  874.700012   
2025-10-01 03:50:00+00:00  874.849976  876.349976  872.099976  872.849976   
2025-10-01 03:55:00+00:00  871.000000  875.450012  870.400024  874.849976   
2025-10-01 04:00:00+00:00  871.150024  871.849976  870.650024  871.099976   
2025-10-01 04:05:00+00:00  873.000000  873.299988  870.849976  871.099976   

                           volume  
Datetime                           
2025-10-01 03:45:00+00:00       0  
2025-10-01 03:50:00+00:00  154136  
2025-10-01 03:55:00+00:00  131612  
2025-10-01 04:00:00+00:00   69056  
2025-10-01 04:05:00+00:00   85664  


In [None]:
# Assuming AdvancedFeatureEngineer class is already defined and imported

engineer = AdvancedFeatureEngineer()

# Add features
df_features = engineer.add_all_features(df_stock)

# Show sample of enriched data
print(df_features.head())


add_all_features is defined!
                                close        high         low        open  \
Datetime                                                                    
2025-10-01 03:45:00+00:00  872.849976  874.700012  871.200012  874.700012   
2025-10-01 03:50:00+00:00  874.849976  876.349976  872.099976  872.849976   
2025-10-01 03:55:00+00:00  871.000000  875.450012  870.400024  874.849976   
2025-10-01 04:00:00+00:00  871.150024  871.849976  870.650024  871.099976   
2025-10-01 04:05:00+00:00  873.000000  873.299988  870.849976  871.099976   

                           volume  
Datetime                           
2025-10-01 03:45:00+00:00       0  
2025-10-01 03:50:00+00:00  154136  
2025-10-01 03:55:00+00:00  131612  
2025-10-01 04:00:00+00:00   69056  
2025-10-01 04:05:00+00:00   85664  


In [None]:
import yfinance as yf

# Fetch 1 year of daily data for SBI (SBIN.NS)
df_stock = yf.download('SBIN.NS', period='1y', interval='1d', progress=False)

# Check if data was fetched
if df_stock.empty:
    print("Failed to fetch SBI data.")
else:
    # If columns are MultiIndex, flatten them by taking first level
    if isinstance(df_stock.columns, pd.MultiIndex):
        df_stock.columns = df_stock.columns.get_level_values(0)

    # Convert column names to lowercase for consistency
    df_stock.columns = [col.lower() for col in df_stock.columns]

    # Show first few rows to verify
    print(df_stock.head())


                 close        high         low        open    volume
Date                                                                
2024-10-07  755.480286  788.173789  750.333628  784.252526  24267990
2024-10-08  766.067688  768.959631  757.048772  757.048772   8560960
2024-10-09  781.703735  788.516881  766.655854  771.214346  18758697
2024-10-10  781.409607  788.614963  779.350968  783.272231  10241015
2024-10-11  784.007446  786.899389  777.390316  781.507653  14322720


In [None]:
ticker = yf.Ticker('SBIN.NS')
data = ticker.history(period='1mo')  # Fetches 1 month of historical data
print(data)


                                 Open        High         Low       Close  \
Date                                                                        
2025-09-08 00:00:00+05:30  808.000000  813.849976  806.799988  808.799988   
2025-09-09 00:00:00+05:30  812.000000  812.349976  805.599976  808.849976   
2025-09-10 00:00:00+05:30  812.000000  824.599976  810.400024  818.200012   
2025-09-11 00:00:00+05:30  819.099976  825.700012  819.000000  823.650024   
2025-09-12 00:00:00+05:30  824.099976  825.799988  819.799988  823.549988   
2025-09-15 00:00:00+05:30  823.549988  827.799988  821.099976  824.750000   
2025-09-16 00:00:00+05:30  825.099976  833.000000  821.650024  831.549988   
2025-09-17 00:00:00+05:30  834.299988  858.150024  831.000000  857.150024   
2025-09-18 00:00:00+05:30  858.650024  860.799988  851.099976  854.349976   
2025-09-19 00:00:00+05:30  852.000000  864.450012  849.299988  862.349976   
2025-09-22 00:00:00+05:30  862.000000  868.200012  854.000000  855.250000   

In [None]:
data2 = yf.download('SBIN.NS', period='1mo')  # Also fetches 1 month historical data
print(data2)


[*********************100%***********************]  1 of 1 completed

Price            Close        High         Low        Open    Volume
Ticker         SBIN.NS     SBIN.NS     SBIN.NS     SBIN.NS   SBIN.NS
Date                                                                
2025-09-08  808.799988  813.849976  806.799988  808.000000   4614227
2025-09-09  808.849976  812.349976  805.599976  812.000000   4567272
2025-09-10  818.200012  824.599976  810.400024  812.000000   7285853
2025-09-11  823.650024  825.700012  819.000000  819.099976   7425055
2025-09-12  823.549988  825.799988  819.799988  824.099976   5078018
2025-09-15  824.750000  827.799988  821.099976  823.549988   3852494
2025-09-16  831.549988  833.000000  821.650024  825.099976   9467531
2025-09-17  857.150024  858.150024  831.000000  834.299988  17243938
2025-09-18  854.349976  860.799988  851.099976  858.650024  10400847
2025-09-19  862.349976  864.450012  849.299988  852.000000  15206426
2025-09-22  855.250000  868.200012  854.000000  862.000000   6594034
2025-09-23  870.599976  874.250000




In [None]:
ticker = yf.Ticker('SBIN.NS')
data = ticker.history(period='1mo')  # Fetches 1 month of historical data
print(data)


                                 Open        High         Low       Close  \
Date                                                                        
2025-09-08 00:00:00+05:30  808.000000  813.849976  806.799988  808.799988   
2025-09-09 00:00:00+05:30  812.000000  812.349976  805.599976  808.849976   
2025-09-10 00:00:00+05:30  812.000000  824.599976  810.400024  818.200012   
2025-09-11 00:00:00+05:30  819.099976  825.700012  819.000000  823.650024   
2025-09-12 00:00:00+05:30  824.099976  825.799988  819.799988  823.549988   
2025-09-15 00:00:00+05:30  823.549988  827.799988  821.099976  824.750000   
2025-09-16 00:00:00+05:30  825.099976  833.000000  821.650024  831.549988   
2025-09-17 00:00:00+05:30  834.299988  858.150024  831.000000  857.150024   
2025-09-18 00:00:00+05:30  858.650024  860.799988  851.099976  854.349976   
2025-09-19 00:00:00+05:30  852.000000  864.450012  849.299988  862.349976   
2025-09-22 00:00:00+05:30  862.000000  868.200012  854.000000  855.250000   

In [None]:
def add_technical_indicators(df):
    df['SMA_20'] = ta.trend.sma_indicator(df['Close'], window=20)
    df['EMA_20'] = ta.trend.ema_indicator(df['Close'], window=20)
    df['MACD'] = ta.trend.macd(df['Close'])
    df['MACD_Signal'] = ta.trend.macd_signal(df['Close'])
    df['MACD_Hist'] = ta.trend.macd_diff(df['Close'])
    df['RSI_14'] = ta.momentum.rsi(df['Close'], window=14)
    df['ATR_14'] = ta.volatility.average_true_range(df['High'], df['Low'], df['Close'], window=14)
    df['OBV'] = ta.volume.on_balance_volume(df['Close'], df['Volume'])
    df.dropna(inplace=True)
    return df


In [None]:
def fetch_stock_data(ticker_symbol='SBIN.NS', period='1y'):
    try:
        ticker = yf.Ticker(ticker_symbol)
        df = ticker.history(period=period)
        if isinstance(df.columns, pd.MultiIndex):
            df.columns = df.columns.get_level_values(0)
        if df.empty:
            print(f"No data fetched for symbol '{ticker_symbol}'!")
            return None
        return df
    except Exception as e:
        print(f"Error fetching data for symbol '{ticker_symbol}': {e}")
        return None


In [None]:
def prepare_stock_analysis(ticker='SBIN.NS', period='1y'):
    df = fetch_stock_data(ticker, period)
    if df is None:
        return None
    df = add_technical_indicators(df)
    print(f"Data ready with indicators for {ticker}")
    return df


In [None]:
def plot_price_ma(df, ticker):
    fig = go.Figure()
    fig.add_trace(go.Candlestick(x=df.index, open=df['Open'], high=df['High'],
                                 low=df['Low'], close=df['Close'], name='Price'))
    fig.add_trace(go.Scatter(x=df.index, y=df['SMA_20'], mode='lines', name='SMA 20'))
    fig.add_trace(go.Scatter(x=df.index, y=df['EMA_20'], mode='lines', name='EMA 20'))
    fig.update_layout(title=f"{ticker} Price with SMA & EMA", xaxis_title='Date', yaxis_title='Price')
    fig.show()

def plot_macd(df, ticker):
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=df.index, y=df['MACD'], mode='lines', name='MACD'))
    fig.add_trace(go.Scatter(x=df.index, y=df['MACD_Signal'], mode='lines', name='Signal'))
    colors = ['green' if val >= 0 else 'red' for val in df['MACD_Hist']]
    fig.add_trace(go.Bar(x=df.index, y=df['MACD_Hist'], name='Histogram', marker_color=colors))
    fig.update_layout(title=f"{ticker} MACD", xaxis_title='Date', yaxis_title='Value')
    fig.show()

def plot_rsi(df, ticker):
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=df.index, y=df['RSI_14'], mode='lines', name='RSI 14'))
    fig.add_hline(y=70, line_dash='dash', line_color='red')
    fig.add_hline(y=30, line_dash='dash', line_color='green')
    fig.update_layout(title=f"{ticker} RSI (14)", xaxis_title='Date', yaxis_title='RSI')
    fig.show()

def plot_volume_obv(df, ticker):
    fig = make_subplots(rows=2, cols=1, shared_xaxes=True,
                        subplot_titles=['Volume', 'On-Balance Volume (OBV)'])
    fig.add_trace(go.Bar(x=df.index, y=df['Volume'], name='Volume'), row=1, col=1)
    fig.add_trace(go.Scatter(x=df.index, y=df['OBV'], mode='lines', name='OBV'), row=2, col=1)
    fig.update_layout(title=f"{ticker} Volume and OBV", height=600)
    fig.show()


In [None]:
ticker = input("Enter stock ticker symbol (e.g., 'AAPL', 'SBIN.NS'): ").strip().upper()
period = input("Enter data period (e.g., '1mo', '3mo', '6mo', '1y', '5y', 'max'): ").strip()

stock_data = prepare_stock_analysis(ticker, period)

if stock_data is not None:
    print(stock_data.head())
    print(stock_data.columns)
    plot_price_ma(stock_data, ticker)
    plot_macd(stock_data, ticker)
    plot_rsi(stock_data, ticker)
    plot_volume_obv(stock_data, ticker)
else:
    print("Failed to load data for analysis. Check ticker and period, then retry.")


Enter stock ticker symbol (e.g., 'AAPL', 'SBIN.NS'): SBIN.NS
Enter data period (e.g., '1mo', '3mo', '6mo', '1y', '5y', 'max'): 1y
Data ready with indicators for SBIN.NS
                                 Open        High         Low       Close  \
Date                                                                        
2024-11-25 00:00:00+05:30  810.721055  832.876165  809.397653  827.827576   
2024-11-26 00:00:00+05:30  827.386442  830.033307  819.102763  822.877014   
2024-11-27 00:00:00+05:30  821.994687  825.915950  815.720643  817.681274   
2024-11-28 00:00:00+05:30  817.681269  829.150974  814.152157  822.337769   
2024-11-29 00:00:00+05:30  824.543495  827.435438  815.181492  822.435852   

                             Volume  Dividends  Stock Splits      SMA_20  \
Date                                                                       
2024-11-25 00:00:00+05:30  23434605        0.0           0.0  806.405237   
2024-11-26 00:00:00+05:30   8893744        0.0           0.0  8