<a href="https://colab.research.google.com/github/Swagat1342/Enhanced-Stock-Prediction-System-with-Risk-Management/blob/main/finance_forecast.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install numpy pandas yfinance scikit-learn tensorflow plotly



In [None]:
pip install alpha_vantage finnhub-python polygon-api-client


In [1]:
import requests
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
from datetime import datetime, timedelta
import yfinance as yf
from sklearn.preprocessing import StandardScaler, RobustScaler
from sklearn.feature_selection import mutual_info_regression, SelectKBest
import warnings
warnings.filterwarnings('ignore')


In [2]:
# ======================================
# 🔑 API Keys Configuration
# ======================================
API_KEYS = {
    'ALPHA_VANTAGE': "YOUR_ALPHA_VANTAGE_API_KEY",
    'IEX_CLOUD': "YOUR_IEX_CLOUD_API_KEY",
    'FINNHUB': "YOUR_FINNHUB_API_KEY",
    'POLYGON': "YOUR_POLYGON_API_KEY"
}


In [3]:
# ======================================
# 📊 Stock Data Fetcher Class
# ======================================
class StockDataFetcher:
    def __init__(self, api_keys=None):
        self.api_keys = api_keys or API_KEYS
        self.session = self._make_session()
        self.last_source = None

    def _make_session(self):
        session = requests.Session()
        retries = Retry(total=3, backoff_factor=1,
                       status_forcelist=[429, 500, 502, 503, 504])
        session.mount('https://', HTTPAdapter(max_retries=retries))
        return session

    def fetch_alpha_vantage(self, symbol, interval='5min', outputsize='compact'):
        """Fetch from Alpha Vantage API"""
        print(f"\n🔹 Trying Alpha Vantage for {symbol}...")
        try:
            url = (
                f"https://www.alphavantage.co/query?"
                f"function=TIME_SERIES_INTRADAY&symbol={symbol}"
                f"&interval={interval}&apikey={self.api_keys['ALPHA_VANTAGE']}"
                f"&outputsize={outputsize}"
            )
            r = self.session.get(url, timeout=10)
            data = r.json()

            ts_key = f"Time Series ({interval})"
            if ts_key not in data:
                print(f"❌ Alpha Vantage Error: {data.get('Note', data.get('Error Message', 'Unknown'))}")
                return None

            df = pd.DataFrame.from_dict(data[ts_key], orient="index").astype(float)
            df.index = pd.to_datetime(df.index)
            df.rename(columns={
                "1. open": "open", "2. high": "high",
                "3. low": "low", "4. close": "close",
                "5. volume": "volume"
            }, inplace=True)
            print(f"✅ Alpha Vantage Success — {len(df)} records")
            self.last_source = "Alpha Vantage"
            return df.sort_index()
        except Exception as e:
            print(f"❌ Alpha Vantage failed: {e}")
            return None

    def fetch_polygon(self, symbol, start_date=None, end_date=None, timespan='minute', multiplier=5):
        """Fetch from Polygon.io API"""
        print(f"\n🔹 Trying Polygon.io for {symbol}...")
        try:
            if not start_date:
                start_date = (datetime.now() - timedelta(days=5)).strftime('%Y-%m-%d')
            if not end_date:
                end_date = datetime.now().strftime('%Y-%m-%d')

            url = (
                f"https://api.polygon.io/v2/aggs/ticker/{symbol}/range/"
                f"{multiplier}/{timespan}/{start_date}/{end_date}?"
                f"apiKey={self.api_keys['POLYGON']}"
            )
            r = self.session.get(url, timeout=10)
            data = r.json()

            if "results" not in data:
                print(f"❌ Polygon.io Error: {data.get('error', 'No results')}")
                return None

            df = pd.DataFrame(data["results"])
            df["t"] = pd.to_datetime(df["t"], unit="ms")
            df.rename(columns={
                "t": "timestamp", "o": "open", "h": "high",
                "l": "low", "c": "close", "v": "volume"
            }, inplace=True)
            df.set_index("timestamp", inplace=True)
            print(f"✅ Polygon.io Success — {len(df)} records")
            self.last_source = "Polygon.io"
            return df
        except Exception as e:
            print(f"❌ Polygon.io failed: {e}")
            return None

    def fetch_finnhub(self, symbol):
        """Fetch current quote from Finnhub API"""
        print(f"\n🔹 Trying Finnhub for {symbol}...")
        try:
            url = f"https://finnhub.io/api/v1/quote?symbol={symbol}&token={self.api_keys['FINNHUB']}"
            r = self.session.get(url, timeout=10)
            data = r.json()

            if "c" not in data or data.get("c") == 0:
                print(f"❌ Finnhub Error: No data available")
                return None

            df = pd.DataFrame([{
                "open": data.get("o"),
                "high": data.get("h"),
                "low": data.get("l"),
                "close": data.get("c"),
                "volume": None
            }], index=[datetime.now()])
            print(f"✅ Finnhub Success — Current ${data.get('c')}")
            self.last_source = "Finnhub"
            return df
        except Exception as e:
            print(f"❌ Finnhub failed: {e}")
            return None

    def fetch_yfinance(self, symbol, period="5d", interval="5m"):
        """Fetch from Yahoo Finance (Fallback)"""
        print(f"\n🔹 Trying Yahoo Finance for {symbol}...")
        try:
            df = yf.download(symbol, period=period, interval=interval, progress=False)
            if df.empty:
                print("❌ Yahoo Finance returned no data")
                return None

            # Handle multi-level columns (for multi-ticker downloads)
            if isinstance(df.columns, pd.MultiIndex):
                df.columns = df.columns.droplevel(1)

            # Standardize column names - handle both string and tuple columns
            df.columns = [col.lower() if isinstance(col, str) else col[0].lower()
                         for col in df.columns]

            print(f"✅ Yahoo Finance Success — {len(df)} records")
            self.last_source = "Yahoo Finance"
            return df
        except Exception as e:
            print(f"❌ Yahoo Finance failed: {e}")
            return None

    def get_stock_data(self, symbol, prefer_source=None):
        """
        Universal fetcher with auto-fallback

        Args:
            symbol: Stock ticker symbol
            prefer_source: 'alpha_vantage', 'polygon', 'finnhub', or 'yfinance'
        """
        fetchers = {
            'alpha_vantage': self.fetch_alpha_vantage,
            'polygon': self.fetch_polygon,
            'finnhub': self.fetch_finnhub,
            'yfinance': self.fetch_yfinance
        }

        # Try preferred source first
        if prefer_source and prefer_source in fetchers:
            df = fetchers[prefer_source](symbol)
            if df is not None and not df.empty:
                return df

        # Fallback to all sources
        for name, fetcher in fetchers.items():
            if prefer_source == name:  # Skip if already tried
                continue
            df = fetcher(symbol)
            if df is not None and not df.empty:
                print(f"\n✅ Data fetched successfully from: {self.last_source}")
                return df

        print("❌ All APIs failed — please check keys and connection.")
        return None


In [5]:
import numpy as np
import pandas as pd

class AdvancedFeatureEngineer:
    """Full-featured technical indicator generator for trading analysis"""

    def __init__(self):
        self.feature_names = []

    # =============================================================
    # 📌 MASTER FEATURE FUNCTION
    # =============================================================
    def add_all_features(self, df):
        df = df.copy()

        # Basic reference series
        close, high, low, volume = df['close'], df['high'], df['low'], df['volume']

        # Core Indicators
        df = self._add_sma_ema(df)
        df = self._add_macd(df)
        df = self._add_rsi(df)
        df = self._add_stochastic(df)
        df = self._add_adx(df)
        df = self._add_demarker(df)
        df = self._add_atr(df)
        df = self._add_bollinger(df)
        df = self._add_std_features(df)
        df = self._add_obv(df)
        df = self._add_accumulation_distribution(df)
        df = self._add_pivot_points(df)
        df = self._add_trendlines(df)
        df = self._add_market_profile(df)
        df = self._add_support_resistance(df)

        df.dropna(inplace=True)
        return df

    # =============================================================
    # 📊 SIMPLE MOVING AVERAGES (SMA / EMA)
    # =============================================================
    def _add_sma_ema(self, df):
        for period in [5, 10, 20, 50, 100, 200]:
            df[f'sma_{period}'] = df['close'].rolling(period).mean()
            df[f'ema_{period}'] = df['close'].ewm(span=period, adjust=False).mean()
        return df

    # =============================================================
    # 💹 MACD
    # =============================================================
    def _add_macd(self, df):
        exp1 = df['close'].ewm(span=12, adjust=False).mean()
        exp2 = df['close'].ewm(span=26, adjust=False).mean()
        df['macd'] = exp1 - exp2
        df['macd_signal'] = df['macd'].ewm(span=9, adjust=False).mean()
        df['macd_hist'] = df['macd'] - df['macd_signal']
        return df

    # =============================================================
    # 💪 RSI
    # =============================================================
    def _add_rsi(self, df, period=14):
        delta = df['close'].diff()
        gain = (delta.where(delta > 0, 0)).rolling(period).mean()
        loss = (-delta.where(delta < 0, 0)).rolling(period).mean()
        rs = gain / (loss + 1e-10)
        df[f'rsi_{period}'] = 100 - (100 / (1 + rs))
        return df

    # =============================================================
    # 🎯 STOCHASTIC OSCILLATOR
    # =============================================================
    def _add_stochastic(self, df, period=14):
        low_min = df['low'].rolling(period).min()
        high_max = df['high'].rolling(period).max()
        df[f'stoch_k_{period}'] = 100 * (df['close'] - low_min) / (high_max - low_min + 1e-10)
        df[f'stoch_d_{period}'] = df[f'stoch_k_{period}'].rolling(3).mean()
        return df

    # =============================================================
    # 📏 ADX (Average Directional Index)
    # =============================================================
    def _add_adx(self, df, period=14):
        high, low, close = df['high'], df['low'], df['close']
        plus_dm = high.diff()
        minus_dm = -low.diff()
        plus_dm[plus_dm < 0] = 0
        minus_dm[minus_dm < 0] = 0
        tr = self._calculate_atr(df, period)
        plus_di = 100 * (plus_dm.rolling(period).mean() / tr)
        minus_di = 100 * (minus_dm.rolling(period).mean() / tr)
        dx = 100 * abs(plus_di - minus_di) / (plus_di + minus_di + 1e-10)
        df['adx'] = dx.rolling(period).mean()
        return df

    # =============================================================
    # 🧭 DEMARKER INDICATOR
    # =============================================================
    def _add_demarker(self, df, period=14):
        demax = df['high'].diff()
        demin = df['low'].diff()
        demax = np.where(demax > 0, demax, 0)
        demin = np.where(demin < 0, abs(demin), 0)
        demax_sma = pd.Series(demax).rolling(period).mean()
        demin_sma = pd.Series(demin).rolling(period).mean()
        df['demarker'] = demax_sma / (demax_sma + demin_sma + 1e-10)
        return df

    # =============================================================
    # 📉 ATR (Average True Range)
    # =============================================================
    def _add_atr(self, df, period=14):
        df['atr'] = self._calculate_atr(df, period)
        return df

    def _calculate_atr(self, df, period=14):
        tr1 = df['high'] - df['low']
        tr2 = abs(df['high'] - df['close'].shift())
        tr3 = abs(df['low'] - df['close'].shift())
        tr = pd.concat([tr1, tr2, tr3], axis=1).max(axis=1)
        return tr.rolling(period).mean()

    # =============================================================
    # 🎢 BOLLINGER BANDS
    # =============================================================
    def _add_bollinger(self, df, period=20):
        sma = df['close'].rolling(period).mean()
        std = df['close'].rolling(period).std()
        df['bb_upper'] = sma + (2 * std)
        df['bb_lower'] = sma - (2 * std)
        df['bb_width'] = (df['bb_upper'] - df['bb_lower']) / sma
        return df

    # =============================================================
    # 🧮 STANDARD DEVIATION FEATURES
    # =============================================================
    def _add_std_features(self, df):
        for period in [10, 20, 50]:
            df[f'std_{period}'] = df['close'].rolling(period).std()
        return df

    # =============================================================
    # 💰 ON-BALANCE VOLUME (OBV)
    # =============================================================
    def _add_obv(self, df):
        obv = [0]
        for i in range(1, len(df)):
            if df['close'][i] > df['close'][i-1]:
                obv.append(obv[-1] + df['volume'][i])
            elif df['close'][i] < df['close'][i-1]:
                obv.append(obv[-1] - df['volume'][i])
            else:
                obv.append(obv[-1])
        df['obv'] = obv
        return df

    # =============================================================
    # 🧩 ACCUMULATION/DISTRIBUTION LINE
    # =============================================================
    def _add_accumulation_distribution(self, df):
        mfm = ((df['close'] - df['low']) - (df['high'] - df['close'])) / (df['high'] - df['low'] + 1e-10)
        df['ad_line'] = (mfm * df['volume']).cumsum()
        return df

    # =============================================================
    # 🪜 PIVOT POINTS
    # =============================================================
    def _add_pivot_points(self, df):
        df['pivot'] = (df['high'] + df['low'] + df['close']) / 3
        df['r1'] = 2 * df['pivot'] - df['low']
        df['s1'] = 2 * df['pivot'] - df['high']
        df['r2'] = df['pivot'] + (df['high'] - df['low'])
        df['s2'] = df['pivot'] - (df['high'] - df['low'])
        return df

    # =============================================================
    # 📈 TRENDLINES (linear regression slope proxy)
    # =============================================================
    def _add_trendlines(self, df, period=20):
        df['trend_slope'] = df['close'].rolling(period).apply(
            lambda x: np.polyfit(range(len(x)), x, 1)[0], raw=True)
        return df

    # =============================================================
    # 🏗️ MARKET PROFILE (volume distribution)
    # =============================================================
    def _add_market_profile(self, df, bins=20):
        df['price_bin'] = pd.qcut(df['close'], bins, duplicates='drop')
        volume_profile = df.groupby('price_bin')['volume'].sum()
        df['market_profile_strength'] = df['price_bin'].map(volume_profile)
        return df

    # =============================================================
    # 🧭 SUPPORT/RESISTANCE LEVELS (local highs/lows)
    # =============================================================
    def _add_support_resistance(self, df, window=10):
        df['support'] = df['low'].rolling(window).min()
        df['resistance'] = df['high'].rolling(window).max()
        df['price_position'] = (df['close'] - df['support']) / (df['resistance'] - df['support'] + 1e-10)
        return df
