<a href="https://colab.research.google.com/github/TechnicalClubRBU-CodeRush1-0/CNN-LSTM/blob/main/CNN_LSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
# ==================== IMPORTS & CONFIGURATION ====================
import tensorflow as tf
import numpy as np
import pandas as pd
import yfinance as yf
import requests
import warnings
import logging
from datetime import datetime, timedelta
from typing import Dict, List, Tuple, Optional, Union
from dataclasses import dataclass
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import joblib
import os

# Configure environment
warnings.filterwarnings('ignore')
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# Set random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

print("Intelligent Portfolio Manager Initializing...")
print(f"TensorFlow version: {tf.__version__}")
gpu_count = len(tf.config.list_physical_devices('GPU'))
print(f"GPU acceleration: {'Enabled' if gpu_count > 0 else 'CPU mode'} ({gpu_count} devices)")
print("Random seeds set for reproducible results")


Intelligent Portfolio Manager Initializing...
TensorFlow version: 2.19.0
GPU acceleration: Enabled (1 devices)
Random seeds set for reproducible results


In [6]:
# ==================== DATA STRUCTURES ====================
@dataclass
class PredictionResult:
    """Standard prediction output format"""
    symbol: str
    action: str  # 'BUY', 'SELL', 'HOLD'
    confidence: float  # 0.0 to 1.0
    current_price: float
    technical_indicators: Dict[str, float]
    data_source: str
    timestamp: str
    model_version: str = "1.0.0"

    def to_dict(self):
        return {
            'symbol': self.symbol,
            'action': self.action,
            'confidence': round(self.confidence, 4),
            'current_price': round(self.current_price, 2),
            'technical_indicators': {k: round(v, 4) for k, v in self.technical_indicators.items()},
            'data_source': self.data_source,
            'timestamp': self.timestamp,
            'model_version': self.model_version
        }

@dataclass
class MarketData:
    """Market data container"""
    symbol: str
    data: pd.DataFrame
    source: str
    timestamp: str
    is_mock: bool = False

print("Core data structures configured:")
print("   - PredictionResult: BUY/SELL/HOLD with confidence scores")
print("   - MarketData: Multi-source market data container")
print("   - Standardized output format for API integration")


Core data structures configured:
   - PredictionResult: BUY/SELL/HOLD with confidence scores
   - MarketData: Multi-source market data container
   - Standardized output format for API integration


In [3]:
# ==================== ROBUST DATA MANAGER ====================
class RobustDataManager:
    """Multi-source data fetcher with intelligent fallbacks"""

    def __init__(self, alpha_vantage_key: Optional[str] = None):
        self.alpha_vantage_key = alpha_vantage_key or "demo"
        self.data_sources = ['yfinance', 'alpha_vantage', 'mock']
        self.cache = {}

    def fetch_yfinance(self, symbol: str, period: int = 60) -> Optional[pd.DataFrame]:
        """Fetch data from YFinance"""
        try:
            logger.info(f"Fetching {symbol} from YFinance...")
            data = yf.download(symbol, period=f'{period}d', progress=False, threads=True)

            if data.empty:
                raise ValueError("YFinance returned empty dataset")

            # Standardize column names
            if 'Adj Close' in data.columns:
                data = data.drop('Adj Close', axis=1)

            return data

        except Exception as e:
            logger.warning(f"YFinance failed for {symbol}: {e}")
            return None

    def fetch_alpha_vantage(self, symbol: str) -> Optional[pd.DataFrame]:
        """Fetch data from Alpha Vantage"""
        try:
            logger.info(f"Fetching {symbol} from Alpha Vantage...")
            url = "https://www.alphavantage.co/query"
            params = {
                'function': 'TIME_SERIES_DAILY',
                'symbol': symbol,
                'apikey': self.alpha_vantage_key,
                'outputsize': 'compact'
            }

            response = requests.get(url, params=params, timeout=15)
            data = response.json()

            if 'Time Series (Daily)' not in data:
                raise ValueError(f"Alpha Vantage API error: {data.get('Note', 'Unknown error')}")

            time_series = data['Time Series (Daily)']
            df = pd.DataFrame.from_dict(time_series, orient='index')
            df.columns = ['Open', 'High', 'Low', 'Close', 'Volume']
            df.index = pd.to_datetime(df.index)
            df = df.astype(float).sort_index()

            return df.tail(60)

        except Exception as e:
            logger.warning(f"Alpha Vantage failed for {symbol}: {e}")
            return None

    def generate_mock_data(self, symbol: str, days: int = 60) -> pd.DataFrame:
        """Generate realistic mock data"""
        logger.info(f"Generating mock data for {symbol}")

        # Set consistent seed based on symbol
        np.random.seed(hash(symbol) % 2**32)

        # Base prices for different symbols
        base_prices = {
            'AAPL': 225, 'NVDA': 450, 'TSLA': 250, 'MSFT': 420, 'GOOGL': 175,
            'AMZN': 180, 'META': 520, 'PLTR': 25, 'SPY': 550, 'QQQ': 480
        }

        base_price = base_prices.get(symbol, 150 + np.random.rand() * 200)
        dates = pd.date_range(end=datetime.now(), periods=days, freq='D')

        # Generate realistic returns with volatility clustering
        returns = np.random.normal(0.002, 0.025, days)

        # Add volatility clustering effect
        for i in range(1, days):
            if abs(returns[i-1]) > 0.03:
                returns[i] = np.random.normal(0, 0.04)

        # Calculate prices
        prices = [base_price]
        for ret in returns[1:]:
            prices.append(prices[-1] * (1 + ret))

        # Create realistic OHLCV data
        df = pd.DataFrame(index=dates)
        df['Close'] = prices
        df['Open'] = df['Close'].shift(1) * (1 + np.random.normal(0, 0.005, days))
        df['High'] = np.maximum(df['Open'], df['Close']) * (1 + np.abs(np.random.normal(0, 0.01, days)))
        df['Low'] = np.minimum(df['Open'], df['Close']) * (1 - np.abs(np.random.normal(0, 0.01, days)))
        df['Volume'] = np.random.randint(20000000, 100000000, days)

        return df.fillna(method='bfill')

    def get_market_data(self, symbol: str) -> MarketData:
        """Get market data with fallback chain"""
        # Try data sources in order
        for source in self.data_sources:
            try:
                if source == 'yfinance':
                    data = self.fetch_yfinance(symbol)
                elif source == 'alpha_vantage':
                    data = self.fetch_alpha_vantage(symbol)
                elif source == 'mock':
                    data = self.generate_mock_data(symbol)

                if data is not None and not data.empty:
                    market_data = MarketData(
                        symbol=symbol,
                        data=data,
                        source=source,
                        timestamp=datetime.now().isoformat(),
                        is_mock=(source == 'mock')
                    )

                    logger.info(f"✅ Successfully fetched {symbol} from {source}")
                    return market_data

            except Exception as e:
                logger.error(f"Error with {source} for {symbol}: {e}")
                continue

        raise RuntimeError(f"All data sources failed for {symbol}")

print("✅ RobustDataManager created")


✅ RobustDataManager created


In [8]:
# ==================== ROBUST DATA MANAGER ====================
class RobustDataManager:
    """Multi-source data fetcher with intelligent fallbacks - Real APIs Only"""

    def __init__(self, alpha_vantage_key: Optional[str] = None, fmp_key: Optional[str] = None,
                 iex_key: Optional[str] = None, twelve_data_key: Optional[str] = None):
        self.alpha_vantage_key = alpha_vantage_key or "demo"
        self.fmp_key = fmp_key or "demo"
        self.iex_key = iex_key
        self.twelve_data_key = twelve_data_key or "demo"
        self.data_sources = ['yfinance', 'alpha_vantage', 'fmp', 'stooq', 'twelve_data', 'iex']
        self.cache = {}

    def fetch_yfinance(self, symbol: str, period: int = 60) -> Optional[pd.DataFrame]:
        """Fetch data from YFinance"""
        try:
            logger.info(f"Fetching {symbol} from YFinance...")
            data = yf.download(symbol, period=f'{period}d', progress=False, threads=True)

            if data.empty:
                raise ValueError("YFinance returned empty dataset")

            # Standardize column names
            if 'Adj Close' in data.columns:
                data = data.drop('Adj Close', axis=1)

            return data

        except Exception as e:
            logger.warning(f"YFinance failed for {symbol}: {e}")
            return None

    def fetch_alpha_vantage(self, symbol: str) -> Optional[pd.DataFrame]:
        """Fetch data from Alpha Vantage"""
        try:
            logger.info(f"Fetching {symbol} from Alpha Vantage...")
            url = "https://www.alphavantage.co/query"
            params = {
                'function': 'TIME_SERIES_DAILY',
                'symbol': symbol,
                'apikey': self.alpha_vantage_key,
                'outputsize': 'compact'
            }

            response = requests.get(url, params=params, timeout=15)
            data = response.json()

            if 'Time Series (Daily)' not in data:
                raise ValueError(f"Alpha Vantage API error: {data.get('Note', 'Unknown error')}")

            time_series = data['Time Series (Daily)']
            df = pd.DataFrame.from_dict(time_series, orient='index')
            df.columns = ['Open', 'High', 'Low', 'Close', 'Volume']
            df.index = pd.to_datetime(df.index)
            df = df.astype(float).sort_index()

            return df.tail(60)

        except Exception as e:
            logger.warning(f"Alpha Vantage failed for {symbol}: {e}")
            return None

    def fetch_fmp(self, symbol: str) -> Optional[pd.DataFrame]:
        """Fetch data from Financial Modeling Prep"""
        try:
            logger.info(f"Fetching {symbol} from Financial Modeling Prep...")
            url = f"https://financialmodelingprep.com/api/v3/historical-price-full/{symbol}"
            params = {
                'apikey': self.fmp_key,
                'from': (datetime.now() - timedelta(days=65)).strftime('%Y-%m-%d'),
                'to': datetime.now().strftime('%Y-%m-%d')
            }

            response = requests.get(url, params=params, timeout=15)
            data = response.json()

            if 'historical' not in data or not data['historical']:
                raise ValueError("FMP returned no historical data")

            df = pd.DataFrame(data['historical'])
            df['date'] = pd.to_datetime(df['date'])
            df = df.set_index('date').sort_index()

            # Rename columns to standard format
            column_mapping = {
                'open': 'Open', 'high': 'High', 'low': 'Low',
                'close': 'Close', 'volume': 'Volume'
            }
            df = df.rename(columns=column_mapping)

            return df[['Open', 'High', 'Low', 'Close', 'Volume']].tail(60)

        except Exception as e:
            logger.warning(f"FMP failed for {symbol}: {e}")
            return None

    def fetch_stooq(self, symbol: str) -> Optional[pd.DataFrame]:
        """Fetch data from Stooq (Free, no API key required)"""
        try:
            logger.info(f"Fetching {symbol} from Stooq...")
            try:
                import pandas_datareader.data as web
            except ImportError:
                logger.warning("pandas_datareader not available, skipping Stooq")
                return None

            end_date = datetime.now()
            start_date = end_date - timedelta(days=65)

            stooq_symbol = f"{symbol}.US"
            data = web.DataReader(stooq_symbol, 'stooq', start_date, end_date)

            if data.empty:
                raise ValueError("Stooq returned empty dataset")

            return data.sort_index().tail(60)

        except Exception as e:
            logger.warning(f"Stooq failed for {symbol}: {e}")
            return None

    def fetch_twelve_data(self, symbol: str) -> Optional[pd.DataFrame]:
        """Fetch data from Twelve Data"""
        try:
            logger.info(f"Fetching {symbol} from Twelve Data...")
            url = "https://api.twelvedata.com/time_series"
            params = {
                'symbol': symbol,
                'interval': '1day',
                'outputsize': '60',
                'apikey': self.twelve_data_key
            }

            response = requests.get(url, params=params, timeout=15)
            data = response.json()

            if 'values' not in data or not data['values']:
                raise ValueError(f"Twelve Data API error: {data.get('message', 'Unknown error')}")

            df = pd.DataFrame(data['values'])
            df['datetime'] = pd.to_datetime(df['datetime'])
            df = df.set_index('datetime').sort_index()

            # Convert to numeric and rename columns
            numeric_cols = ['open', 'high', 'low', 'close', 'volume']
            for col in numeric_cols:
                df[col] = pd.to_numeric(df[col], errors='coerce')

            column_mapping = {
                'open': 'Open', 'high': 'High', 'low': 'Low',
                'close': 'Close', 'volume': 'Volume'
            }
            df = df.rename(columns=column_mapping)

            return df[['Open', 'High', 'Low', 'Close', 'Volume']]

        except Exception as e:
            logger.warning(f"Twelve Data failed for {symbol}: {e}")
            return None

    def fetch_iex(self, symbol: str) -> Optional[pd.DataFrame]:
        """Fetch data from IEX Cloud"""
        try:
            if not self.iex_key:
                logger.warning("IEX API key not provided, skipping IEX")
                return None

            logger.info(f"Fetching {symbol} from IEX Cloud...")
            url = f"https://cloud.iexapis.com/stable/stock/{symbol}/chart/2m"
            params = {'token': self.iex_key}

            response = requests.get(url, params=params, timeout=15)
            data = response.json()

            if not data:
                raise ValueError("IEX returned empty dataset")

            df = pd.DataFrame(data)
            df['date'] = pd.to_datetime(df['date'])
            df = df.set_index('date').sort_index()

            # Rename columns to standard format
            column_mapping = {
                'open': 'Open', 'high': 'High', 'low': 'Low',
                'close': 'Close', 'volume': 'Volume'
            }
            df = df.rename(columns=column_mapping)

            return df[['Open', 'High', 'Low', 'Close', 'Volume']].tail(60)

        except Exception as e:
            logger.warning(f"IEX failed for {symbol}: {e}")
            return None

    def fetch_polygon(self, symbol: str) -> Optional[pd.DataFrame]:
        """Fetch data from Polygon.io"""
        try:
            logger.info(f"Fetching {symbol} from Polygon...")
            # Free tier endpoint
            end_date = datetime.now().strftime('%Y-%m-%d')
            start_date = (datetime.now() - timedelta(days=65)).strftime('%Y-%m-%d')

            url = f"https://api.polygon.io/v2/aggs/ticker/{symbol}/range/1/day/{start_date}/{end_date}"
            params = {'apikey': 'demo'}  # Use demo key or provide real one

            response = requests.get(url, params=params, timeout=15)
            data = response.json()

            if 'results' not in data or not data['results']:
                raise ValueError("Polygon returned no results")

            df = pd.DataFrame(data['results'])
            df['date'] = pd.to_datetime(df['t'], unit='ms')
            df = df.set_index('date').sort_index()

            # Rename columns to standard format
            column_mapping = {
                'o': 'Open', 'h': 'High', 'l': 'Low', 'c': 'Close', 'v': 'Volume'
            }
            df = df.rename(columns=column_mapping)

            return df[['Open', 'High', 'Low', 'Close', 'Volume']].tail(60)

        except Exception as e:
            logger.warning(f"Polygon failed for {symbol}: {e}")
            return None

    def fetch_yahoo_query(self, symbol: str) -> Optional[pd.DataFrame]:
        """Alternative Yahoo Finance query method"""
        try:
            logger.info(f"Fetching {symbol} from Yahoo Query...")
            import requests
            import json

            # Yahoo Finance alternative endpoint
            url = f"https://query1.finance.yahoo.com/v8/finance/chart/{symbol}"
            params = {
                'period1': int((datetime.now() - timedelta(days=65)).timestamp()),
                'period2': int(datetime.now().timestamp()),
                'interval': '1d',
                'includePrePost': 'false',
                'events': 'div%2Csplit'
            }

            headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'}
            response = requests.get(url, params=params, headers=headers, timeout=15)
            data = response.json()

            if 'chart' not in data or not data['chart']['result']:
                raise ValueError("Yahoo Query returned no data")

            result = data['chart']['result'][0]
            timestamps = result['timestamp']
            quotes = result['indicators']['quote']

            df = pd.DataFrame({
                'Open': quotes['open'],
                'High': quotes['high'],
                'Low': quotes['low'],
                'Close': quotes['close'],
                'Volume': quotes['volume']
            })

            df.index = pd.to_datetime(timestamps, unit='s')
            df = df.dropna().sort_index()

            return df.tail(60)

        except Exception as e:
            logger.warning(f"Yahoo Query failed for {symbol}: {e}")
            return None

    def get_market_data(self, symbol: str) -> MarketData:
        """Get market data with comprehensive fallback chain"""
        # Update data sources to include all available APIs
        active_sources = ['yfinance', 'alpha_vantage', 'fmp', 'stooq', 'twelve_data']

        # Add optional sources if keys are provided
        if self.iex_key:
            active_sources.append('iex')

        # Add alternative methods
        active_sources.extend(['polygon', 'yahoo_query'])

        # Try data sources in order
        for source in active_sources:
            try:
                data = None
                if source == 'yfinance':
                    data = self.fetch_yfinance(symbol)
                elif source == 'alpha_vantage':
                    data = self.fetch_alpha_vantage(symbol)
                elif source == 'fmp':
                    data = self.fetch_fmp(symbol)
                elif source == 'stooq':
                    data = self.fetch_stooq(symbol)
                elif source == 'twelve_data':
                    data = self.fetch_twelve_data(symbol)
                elif source == 'iex':
                    data = self.fetch_iex(symbol)
                elif source == 'polygon':
                    data = self.fetch_polygon(symbol)
                elif source == 'yahoo_query':
                    data = self.fetch_yahoo_query(symbol)

                if data is not None and not data.empty and len(data) >= 20:  # Minimum viable data
                    market_data = MarketData(
                        symbol=symbol,
                        data=data,
                        source=source,
                        timestamp=datetime.now().isoformat(),
                        is_mock=False
                    )

                    logger.info(f"Successfully fetched {symbol} from {source} ({len(data)} rows)")
                    return market_data

            except Exception as e:
                logger.error(f"Error with {source} for {symbol}: {e}")
                continue

        raise RuntimeError(f"All {len(active_sources)} data sources failed for {symbol}. "
                         f"No market data available.")

print("Enhanced multi-source data pipeline configured:")
print("   - Primary: YFinance API (real-time market data)")
print("   - Backup 1: Alpha Vantage API (professional financial data)")
print("   - Backup 2: Financial Modeling Prep (comprehensive market data)")
print("   - Backup 3: Stooq (free, no API key required)")
print("   - Backup 4: Twelve Data API (real-time financial data)")
print("   - Backup 5: IEX Cloud (institutional-grade data)")
print("   - Backup 6: Polygon.io (market data aggregation)")
print("   - Backup 7: Yahoo Query (alternative Yahoo endpoint)")
print("   - 8 total data sources ensure maximum reliability")
print("   - Automatic failover with minimum data quality checks")
print("   - No mock data - 100% real market data")


Enhanced multi-source data pipeline configured:
   - Primary: YFinance API (real-time market data)
   - Backup 1: Alpha Vantage API (professional financial data)
   - Backup 2: Financial Modeling Prep (comprehensive market data)
   - Backup 3: Stooq (free, no API key required)
   - Backup 4: Twelve Data API (real-time financial data)
   - Backup 5: IEX Cloud (institutional-grade data)
   - Backup 6: Polygon.io (market data aggregation)
   - Backup 7: Yahoo Query (alternative Yahoo endpoint)
   - 8 total data sources ensure maximum reliability
   - Automatic failover with minimum data quality checks
   - No mock data - 100% real market data
