<a href="https://colab.research.google.com/github/TechnicalClubRBU-CodeRush1-0/CNN-LSTM/blob/main/CNN_LSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [123]:
# Environment setup and library imports for CNN-LSTM portfolio manager

import tensorflow as tf
import numpy as np
import pandas as pd
import yfinance as yf
import requests
import warnings
import logging
from datetime import datetime, timedelta
from typing import Dict, List, Tuple, Optional, Union
from dataclasses import dataclass
from sklearn.preprocessing import MinMaxScaler
import joblib
import os

warnings.filterwarnings('ignore')
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

np.random.seed(42)
tf.random.set_seed(42)

print("Intelligent Portfolio Manager Initializing...")
print(f"TensorFlow version: {tf.__version__}")
gpu_count = len(tf.config.list_physical_devices('GPU'))
print(f"GPU acceleration: {'Enabled' if gpu_count > 0 else 'CPU mode'} ({gpu_count} devices)")


Intelligent Portfolio Manager Initializing...
TensorFlow version: 2.19.0
GPU acceleration: Enabled (1 devices)


In [124]:
# Core data structures for predictions and market data

@dataclass
class PredictionResult:
    symbol: str
    action: str
    confidence: float
    current_price: float
    technical_indicators: Dict[str, float]
    data_source: str
    timestamp: str
    model_version: str = "1.0.0"

    def to_dict(self):
        return {
            'symbol': self.symbol,
            'action': self.action,
            'confidence': round(self.confidence, 4),
            'current_price': round(self.current_price, 2),
            'technical_indicators': {k: round(v, 4) for k, v in self.technical_indicators.items()},
            'data_source': self.data_source,
            'timestamp': self.timestamp,
            'model_version': self.model_version
        }

@dataclass
class MarketData:
    symbol: str
    data: pd.DataFrame
    source: str
    timestamp: str
    is_mock: bool = False

print("Core data structures configured")


Core data structures configured


In [125]:
# Enhanced data manager with NSE fix

class RobustDataManager:
    def __init__(self, alpha_vantage_key: Optional[str] = None, fmp_key: Optional[str] = None,
                 twelve_data_key: Optional[str] = None):
        self.alpha_vantage_key = alpha_vantage_key or "demo"
        self.fmp_key = fmp_key or "demo"
        self.twelve_data_key = twelve_data_key or "demo"
        self.data_sources = ['yfinance', 'alpha_vantage', 'fmp']
        self.cache = {}

    def fetch_yfinance(self, symbol: str, period: int = 120) -> Optional[pd.DataFrame]:
        try:
            logger.info(f"Fetching {symbol} from YFinance...")
            data = yf.download(symbol, period=f'{period}d', progress=False, threads=True)

            if data.empty:
                raise ValueError("YFinance returned empty dataset")

            if isinstance(data.columns, pd.MultiIndex):
                data.columns = data.columns.get_level_values(0)

            if 'Adj Close' in data.columns:
                data = data.drop('Adj Close', axis=1)

            return data

        except Exception as e:
            logger.warning(f"YFinance failed for {symbol}: {e}")
            return None

    def fetch_alpha_vantage(self, symbol: str) -> Optional[pd.DataFrame]:
        try:
            logger.info(f"Fetching {symbol} from Alpha Vantage...")
            base_symbol = symbol.replace('.NS', '')
            url = "https://www.alphavantage.co/query"
            params = {
                'function': 'TIME_SERIES_DAILY',
                'symbol': f'{base_symbol}.BSE',
                'apikey': self.alpha_vantage_key,
                'outputsize': 'full'
            }

            response = requests.get(url, params=params, timeout=15)
            data = response.json()

            if 'Time Series (Daily)' not in data:
                raise ValueError(f"Alpha Vantage API error: {data.get('Note', 'Unknown error')}")

            time_series = data['Time Series (Daily)']
            df = pd.DataFrame.from_dict(time_series, orient='index')
            df.columns = ['Open', 'High', 'Low', 'Close', 'Volume']
            df.index = pd.to_datetime(df.index)
            df = df.astype(float).sort_index()

            return df.tail(120)

        except Exception as e:
            logger.warning(f"Alpha Vantage failed for {symbol}: {e}")
            return None

    def fetch_fmp(self, symbol: str) -> Optional[pd.DataFrame]:
        try:
            logger.info(f"Fetching {symbol} from Financial Modeling Prep...")
            base_symbol = symbol.replace('.NS', '.BSE')
            url = f"https://financialmodelingprep.com/api/v3/historical-price-full/{base_symbol}"
            params = {
                'apikey': self.fmp_key,
                'from': (datetime.now() - timedelta(days=130)).strftime('%Y-%m-%d'),
                'to': datetime.now().strftime('%Y-%m-%d')
            }

            response = requests.get(url, params=params, timeout=15)
            data = response.json()

            if 'historical' not in data or not data['historical']:
                raise ValueError("FMP returned no historical data")

            df = pd.DataFrame(data['historical'])
            df['date'] = pd.to_datetime(df['date'])
            df = df.set_index('date').sort_index()

            column_mapping = {
                'open': 'Open', 'high': 'High', 'low': 'Low',
                'close': 'Close', 'volume': 'Volume'
            }
            df = df.rename(columns=column_mapping)

            return df[['Open', 'High', 'Low', 'Close', 'Volume']].tail(120)

        except Exception as e:
            logger.warning(f"FMP failed for {symbol}: {e}")
            return None

    def get_market_data(self, symbol: str) -> MarketData:
        active_sources = ['yfinance', 'alpha_vantage', 'fmp']

        for source in active_sources:
            try:
                data = None
                if source == 'yfinance':
                    data = self.fetch_yfinance(symbol)
                elif source == 'alpha_vantage':
                    data = self.fetch_alpha_vantage(symbol)
                elif source == 'fmp':
                    data = self.fetch_fmp(symbol)

                if data is not None and not data.empty and len(data) >= 60:
                    market_data = MarketData(
                        symbol=symbol,
                        data=data,
                        source=source,
                        timestamp=datetime.now().isoformat(),
                        is_mock=False
                    )

                    logger.info(f"Successfully fetched {symbol} from {source} ({len(data)} rows)")
                    return market_data

            except Exception as e:
                logger.error(f"Error with {source} for {symbol}: {e}")
                continue

        raise RuntimeError(f"All data sources failed for {symbol}")

print("Data manager configured")


Data manager configured


In [126]:
# Fixed advanced feature engineering with proper Series handling

class AdvancedFeatureEngineer:
    def __init__(self):
        self.feature_names = []

    def _ensure_series(self, result, name="calculation"):
        if isinstance(result, pd.DataFrame):
            if len(result.columns) == 1:
                return result.iloc[:, 0]
            else:
                logger.warning(f"{name} returned DataFrame with multiple columns, using first column")
                return result.iloc[:, 0]
        return result

    def add_price_features(self, df: pd.DataFrame) -> pd.DataFrame:
        df = df.copy()
        df['Returns'] = df['Close'].pct_change()
        df['Log_Returns'] = np.log(df['Close'] / df['Close'].shift(1))
        df['Price_Range'] = (df['High'] - df['Low']) / df['Close']
        df['Gap'] = (df['Open'] - df['Close'].shift(1)) / df['Close'].shift(1)
        df['Body_Size'] = abs(df['Close'] - df['Open']) / df['Close']
        df['Upper_Shadow'] = (df['High'] - np.maximum(df['Open'], df['Close'])) / df['Close']
        df['Lower_Shadow'] = (np.minimum(df['Open'], df['Close']) - df['Low']) / df['Close']
        return df

    def add_moving_averages(self, df: pd.DataFrame) -> pd.DataFrame:
        df = df.copy()
        periods = [3, 5, 8, 13, 21, 34, 55]
        for period in periods:
            sma_col = df['Close'].rolling(window=period).mean()
            sma_col = self._ensure_series(sma_col, f"SMA_{period}")
            df[f'SMA_{period}'] = sma_col
            ratio_calc = df['Close'] / sma_col
            ratio_calc = self._ensure_series(ratio_calc, f"SMA_{period}_Ratio")
            df[f'SMA_{period}_Ratio'] = ratio_calc
            slope_calc = sma_col.diff(5) / sma_col
            slope_calc = self._ensure_series(slope_calc, f"SMA_{period}_Slope")
            df[f'SMA_{period}_Slope'] = slope_calc

        ema_periods = [8, 13, 21, 34, 55]
        for period in ema_periods:
            ema_col = df['Close'].ewm(span=period).mean()
            ema_col = self._ensure_series(ema_col, f"EMA_{period}")
            df[f'EMA_{period}'] = ema_col
            ratio_calc = df['Close'] / ema_col
            ratio_calc = self._ensure_series(ratio_calc, f"EMA_{period}_Ratio")
            df[f'EMA_{period}_Ratio'] = ratio_calc
        return df

    def add_momentum_indicators(self, df: pd.DataFrame) -> pd.DataFrame:
        df = df.copy()
        delta = df['Close'].diff()
        gain = delta.where(delta > 0, 0).rolling(window=21).mean()
        loss = (-delta.where(delta < 0, 0)).rolling(window=21).mean()
        gain = self._ensure_series(gain, "RSI_gain")
        loss = self._ensure_series(loss, "RSI_loss")
        rs = gain / loss
        rs = self._ensure_series(rs, "RS")
        df['RSI'] = 100 - (100 / (1 + rs))
        df['RSI_Momentum'] = df['RSI'].diff(5)

        ema_12 = self._ensure_series(df['Close'].ewm(span=12).mean(), "EMA_12")
        ema_26 = self._ensure_series(df['Close'].ewm(span=26).mean(), "EMA_26")
        df['EMA_12'] = ema_12
        df['EMA_26'] = ema_26
        df['MACD'] = ema_12 - ema_26
        df['MACD_Signal'] = df['MACD'].ewm(span=9).mean()
        df['MACD_Histogram'] = df['MACD'] - df['MACD_Signal']
        df['MACD_Slope'] = df['MACD'].diff(3)

        low_21 = self._ensure_series(df['Low'].rolling(21).min(), "Low_21")
        high_21 = self._ensure_series(df['High'].rolling(21).max(), "High_21")
        df['Stoch_K'] = 100 * ((df['Close'] - low_21) / (high_21 - low_21))
        df['Stoch_D'] = df['Stoch_K'].rolling(5).mean()
        df['Williams_R'] = -100 * ((high_21 - df['Close']) / (high_21 - low_21))
        df['ROC'] = ((df['Close'] - df['Close'].shift(13)) / df['Close'].shift(13)) * 100
        df['Momentum'] = df['Close'] - df['Close'].shift(13)
        return df

    def add_volatility_indicators(self, df: pd.DataFrame) -> pd.DataFrame:
        df = df.copy()
        bb_middle = self._ensure_series(df['Close'].rolling(21).mean(), "BB_Middle")
        bb_std = self._ensure_series(df['Close'].rolling(21).std(), "BB_Std")
        df['BB_Middle'] = bb_middle
        df['BB_Upper'] = bb_middle + (bb_std * 2.5)
        df['BB_Lower'] = bb_middle - (bb_std * 2.5)
        bb_width_calc = (df['BB_Upper'] - df['BB_Lower']) / bb_middle
        df['BB_Width'] = self._ensure_series(bb_width_calc, "BB_Width")
        bb_position_calc = (df['Close'] - df['BB_Lower']) / (df['BB_Upper'] - df['BB_Lower'])
        df['BB_Position'] = self._ensure_series(bb_position_calc, "BB_Position")
        bb_squeeze_calc = bb_std / bb_middle
        df['BB_Squeeze'] = self._ensure_series(bb_squeeze_calc, "BB_Squeeze")

        tr1 = df['High'] - df['Low']
        tr2 = abs(df['High'] - df['Close'].shift(1))
        tr3 = abs(df['Low'] - df['Close'].shift(1))
        tr_combined = pd.concat([tr1, tr2, tr3], axis=1)
        df['TR'] = tr_combined.max(axis=1)
        atr = self._ensure_series(df['TR'].rolling(21).mean(), "ATR")
        df['ATR'] = atr
        df['ATR_Ratio'] = self._ensure_series(atr / df['Close'], "ATR_Ratio")

        vol_5 = self._ensure_series(df['Returns'].rolling(5).std() * np.sqrt(252), "Vol_5")
        vol_13 = self._ensure_series(df['Returns'].rolling(13).std() * np.sqrt(252), "Vol_13")
        vol_21 = self._ensure_series(df['Returns'].rolling(21).std() * np.sqrt(252), "Vol_21")
        df['Volatility_5'] = vol_5
        df['Volatility_13'] = vol_13
        df['Volatility_21'] = vol_21
        df['Volatility_Ratio'] = self._ensure_series(vol_5 / vol_21, "Volatility_Ratio")
        return df

    def add_volume_indicators(self, df: pd.DataFrame) -> pd.DataFrame:
        df = df.copy()
        vol_sma_5 = self._ensure_series(df['Volume'].rolling(5).mean(), "Volume_SMA_5")
        vol_sma_13 = self._ensure_series(df['Volume'].rolling(13).mean(), "Volume_SMA_13")
        vol_sma_21 = self._ensure_series(df['Volume'].rolling(21).mean(), "Volume_SMA_21")
        df['Volume_SMA_5'] = vol_sma_5
        df['Volume_SMA_13'] = vol_sma_13
        df['Volume_SMA_21'] = vol_sma_21
        df['Volume_Ratio_5'] = self._ensure_series(df['Volume'] / vol_sma_5, "Volume_Ratio_5")
        df['Volume_Ratio_21'] = self._ensure_series(df['Volume'] / vol_sma_21, "Volume_Ratio_21")
        df['Volume_Price_Trend'] = (df['Volume'] * df['Returns']).cumsum()
        df['Price_Volume'] = df['Close'] * df['Volume']
        volume_price_sum = (df['Close'] * df['Volume']).rolling(13).sum()
        volume_sum = df['Volume'].rolling(13).sum()
        vwp_calc = volume_price_sum / volume_sum
        df['Volume_Weighted_Price'] = self._ensure_series(vwp_calc, "Volume_Weighted_Price")
        return df

    def create_sequences(self, df: pd.DataFrame, sequence_length: int = 60) -> Tuple[np.ndarray, np.ndarray]:
        feature_cols = [
            'Returns', 'Log_Returns', 'Price_Range', 'Body_Size', 'Upper_Shadow', 'Lower_Shadow',
            'SMA_3_Ratio', 'SMA_5_Ratio', 'SMA_8_Ratio', 'SMA_13_Ratio', 'SMA_21_Ratio',
            'EMA_8_Ratio', 'EMA_13_Ratio', 'EMA_21_Ratio', 'EMA_34_Ratio',
            'RSI', 'RSI_Momentum', 'MACD', 'MACD_Signal', 'MACD_Histogram', 'MACD_Slope',
            'Stoch_K', 'Stoch_D', 'Williams_R', 'ROC', 'Momentum',
            'BB_Position', 'BB_Width', 'BB_Squeeze', 'ATR_Ratio',
            'Volatility_5', 'Volatility_13', 'Volatility_21', 'Volatility_Ratio',
            'Volume_Ratio_5', 'Volume_Ratio_21', 'Volume_Price_Trend'
        ]

        available_cols = [col for col in feature_cols if col in df.columns]
        self.feature_names = available_cols
        features = df[available_cols].fillna(method='bfill').fillna(method='ffill')

        X, y = [], []
        for i in range(sequence_length, len(features) - 1):
            X.append(features.iloc[i-sequence_length:i].values)
            current_price = df['Close'].iloc[i]
            future_price = df['Close'].iloc[i+1]
            if pd.isna(current_price) or pd.isna(future_price) or current_price == 0:
                continue
            future_return = (future_price - current_price) / current_price
            if future_return > 0.02:
                label = 2
            elif future_return < -0.02:
                label = 0
            else:
                label = 1
            y.append(label)

        X = np.array(X)
        y = np.array(y, dtype=np.int32)
        valid_indices = ~(np.isnan(X).any(axis=(1,2)) | np.isnan(y))
        X = X[valid_indices]
        y = y[valid_indices]
        logger.info(f"Created {len(X)} valid sequences")
        logger.info(f"Label distribution: SELL:{np.sum(y==0)}, HOLD:{np.sum(y==1)}, BUY:{np.sum(y==2)}")
        return X, y

    def engineer_features(self, df: pd.DataFrame) -> pd.DataFrame:
        try:
            df = self.add_price_features(df)
            df = self.add_moving_averages(df)
            df = self.add_momentum_indicators(df)
            df = self.add_volatility_indicators(df)
            df = self.add_volume_indicators(df)
            df = df.fillna(method='bfill').fillna(method='ffill')
            return df
        except Exception as e:
            logger.error(f"Feature engineering failed: {e}")
            raise

print("Feature engineering configured")


Feature engineering configured


In [127]:
class CNNLSTMModel:
    def __init__(self, sequence_length: int = 60, n_features: int = 35):
        self.sequence_length = sequence_length
        self.n_features = n_features
        self.model = None
        self.scaler = MinMaxScaler()
        self.is_trained = False
        self.history = None

    def build_model(self) -> tf.keras.Model:
        model = tf.keras.Sequential([
            tf.keras.layers.Conv1D(filters=64, kernel_size=3, activation='relu',
                                  input_shape=(self.sequence_length, self.n_features)),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.Conv1D(filters=64, kernel_size=3, activation='relu'),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.MaxPooling1D(pool_size=2),
            tf.keras.layers.Dropout(0.3),
            tf.keras.layers.Conv1D(filters=32, kernel_size=3, activation='relu'),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.Dropout(0.3),
            tf.keras.layers.LSTM(50, return_sequences=True, dropout=0.3),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.LSTM(25, dropout=0.3),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.Dense(64, activation='relu'),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.Dropout(0.5),
            tf.keras.layers.Dense(32, activation='relu'),
            tf.keras.layers.Dropout(0.3),
            tf.keras.layers.Dense(3, activation='softmax')
        ])

        optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
        model.compile(
            optimizer=optimizer,
            loss='sparse_categorical_crossentropy',
            metrics=['accuracy']
        )
        return model

    def validate_data(self, X: np.ndarray, y: np.ndarray):
        logger.info("Validating training data...")
        if np.any(np.isnan(X)) or np.any(np.isinf(X)):
            logger.error("Found NaN or infinite values in X")
            raise ValueError("Training data X contains NaN or infinite values")
        unique_labels = np.unique(y)
        valid_labels = {0, 1, 2}
        logger.info(f"Unique labels found: {unique_labels}")
        logger.info(f"Label distribution: {np.bincount(y)}")
        if not set(unique_labels).issubset(valid_labels):
            invalid_labels = set(unique_labels) - valid_labels
            logger.error(f"Invalid labels found: {invalid_labels}")
            raise ValueError(f"Labels must be 0, 1, or 2. Found invalid labels: {invalid_labels}")
        if np.any(np.isnan(y)):
            logger.error("Found NaN values in labels y")
            raise ValueError("Labels y contain NaN values")
        logger.info("Data validation passed!")

    def train(self, X: np.ndarray, y: np.ndarray, validation_split: float = 0.25):
        logger.info(f"Training CNN-LSTM model on {len(X)} sequences...")
        logger.info(f"Feature dimensions: {X.shape}")
        self.validate_data(X, y)
        X_scaled = self.scaler.fit_transform(X.reshape(-1, X.shape[-1])).reshape(X.shape)
        y = y.astype(np.int32)
        self.model = self.build_model()
        logger.info(f"Model parameters: {self.model.count_params():,}")

        callbacks = [
            tf.keras.callbacks.EarlyStopping(
                patience=50,
                restore_best_weights=True,
                monitor='val_loss',
                mode='min'
            ),
            tf.keras.callbacks.ReduceLROnPlateau(
                patience=20,
                factor=0.5,
                min_lr=1e-6,
                monitor='val_loss'
            )
        ]

        self.history = self.model.fit(
            X_scaled, y,
            epochs=100,
            batch_size=32,
            validation_split=validation_split,
            callbacks=callbacks,
            verbose=1,
            shuffle=True
        )

        self.is_trained = True
        best_val_acc = max(self.history.history['val_accuracy'])
        final_loss = self.history.history['loss'][-1]
        logger.info(f"Training completed!")
        logger.info(f"Best validation accuracy: {best_val_acc:.4f}")
        logger.info(f"Final loss: {final_loss:.4f}")
        return self.history

    def predict(self, X: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
        if not self.is_trained:
            raise ValueError("Model not trained yet!")
        X_scaled = self.scaler.transform(X.reshape(-1, X.shape[-1])).reshape(X.shape)
        predictions = self.model.predict(X_scaled, verbose=0)
        predicted_classes = np.argmax(predictions, axis=1)
        confidence_scores = np.max(predictions, axis=1)
        return predicted_classes, confidence_scores

print("CNN-LSTM model configured")


CNN-LSTM model configured


In [128]:
# Main portfolio management system integrating all fixed components for NSE stock analysis

class CNNLSTMModel:
    def __init__(self, sequence_length: int = 60, n_features: int = 35):
        self.sequence_length = sequence_length
        self.n_features = n_features
        self.model = None
        self.scaler = MinMaxScaler()
        self.is_trained = False
        self.history = None

    def build_model(self) -> tf.keras.Model:
        model = tf.keras.Sequential([
            tf.keras.layers.Conv1D(filters=64, kernel_size=3, activation='relu',
                                  input_shape=(self.sequence_length, self.n_features)),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.Conv1D(filters=64, kernel_size=3, activation='relu'),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.MaxPooling1D(pool_size=2),
            tf.keras.layers.Dropout(0.3),
            tf.keras.layers.Conv1D(filters=32, kernel_size=3, activation='relu'),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.Dropout(0.3),
            tf.keras.layers.LSTM(50, return_sequences=True, dropout=0.3),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.LSTM(25, dropout=0.3),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.Dense(64, activation='relu'),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.Dropout(0.5),
            tf.keras.layers.Dense(32, activation='relu'),
            tf.keras.layers.Dropout(0.3),
            tf.keras.layers.Dense(3, activation='softmax')
        ])

        optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
        model.compile(
            optimizer=optimizer,
            loss='sparse_categorical_crossentropy',
            metrics=['accuracy']
        )
        return model

    def validate_data(self, X: np.ndarray, y: np.ndarray):
        logger.info("Validating training data...")
        if np.any(np.isnan(X)) or np.any(np.isinf(X)):
            logger.error("Found NaN or infinite values in X")
            raise ValueError("Training data X contains NaN or infinite values")
        unique_labels = np.unique(y)
        valid_labels = {0, 1, 2}
        logger.info(f"Unique labels found: {unique_labels}")
        logger.info(f"Label distribution: {np.bincount(y)}")
        if not set(unique_labels).issubset(valid_labels):
            invalid_labels = set(unique_labels) - valid_labels
            logger.error(f"Invalid labels found: {invalid_labels}")
            raise ValueError(f"Labels must be 0, 1, or 2. Found invalid labels: {invalid_labels}")
        if np.any(np.isnan(y)):
            logger.error("Found NaN values in labels y")
            raise ValueError("Labels y contain NaN values")
        logger.info("Data validation passed!")

    def train(self, X: np.ndarray, y: np.ndarray, validation_split: float = 0.25):
        logger.info(f"Training CNN-LSTM model on {len(X)} sequences...")
        logger.info(f"Feature dimensions: {X.shape}")
        self.validate_data(X, y)
        X_scaled = self.scaler.fit_transform(X.reshape(-1, X.shape[-1])).reshape(X.shape)
        y = y.astype(np.int32)
        self.model = self.build_model()
        logger.info(f"Model parameters: {self.model.count_params():,}")

        callbacks = [
            tf.keras.callbacks.EarlyStopping(
                patience=50,
                restore_best_weights=True,
                monitor='val_loss',
                mode='min'
            ),
            tf.keras.callbacks.ReduceLROnPlateau(
                patience=20,
                factor=0.5,
                min_lr=1e-6,
                monitor='val_loss'
            )
        ]

        self.history = self.model.fit(
            X_scaled, y,
            epochs=100,
            batch_size=32,
            validation_split=validation_split,
            callbacks=callbacks,
            verbose=1,
            shuffle=True
        )

        self.is_trained = True
        best_val_acc = max(self.history.history['val_accuracy'])
        final_loss = self.history.history['loss'][-1]
        logger.info(f"Training completed!")
        logger.info(f"Best validation accuracy: {best_val_acc:.4f}")
        logger.info(f"Final loss: {final_loss:.4f}")
        return self.history

    def predict(self, X: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
        if not self.is_trained:
            raise ValueError("Model not trained yet!")
        X_scaled = self.scaler.transform(X.reshape(-1, X.shape[-1])).reshape(X.shape)
        predictions = self.model.predict(X_scaled, verbose=0)
        predicted_classes = np.argmax(predictions, axis=1)
        confidence_scores = np.max(predictions, axis=1)
        return predicted_classes, confidence_scores

print("CNN-LSTM model configured")


CNN-LSTM model configured


In [129]:
# Model persistence and utility functions for deployment

def save_nse_portfolio_model(portfolio_manager: RobustPortfolioManager,
                           model_path: str = "nse_portfolio_model_v2") -> str:
    if not portfolio_manager.model.is_trained:
        raise ValueError("Cannot save untrained model!")

    logger.info(f"Saving NSE portfolio model to {model_path}/...")
    os.makedirs(model_path, exist_ok=True)
    os.makedirs(f"{model_path}/artifacts", exist_ok=True)

    tf.saved_model.save(portfolio_manager.model.model, f"{model_path}/saved_model")
    logger.info("TensorFlow model saved in SavedModel format")

    joblib.dump(portfolio_manager.model.scaler, f"{model_path}/artifacts/scaler.pkl")
    joblib.dump(portfolio_manager.feature_engineer.feature_names,
                f"{model_path}/artifacts/feature_names.pkl")
    logger.info("Preprocessing components saved")

    metadata = {
        'model_info': portfolio_manager.model_metadata,
        'training_history': portfolio_manager.training_history,
        'feature_engineering': {
            'sequence_length': portfolio_manager.model.sequence_length,
            'n_features': portfolio_manager.model.n_features,
            'feature_names': portfolio_manager.feature_engineer.feature_names
        },
        'model_architecture': {
            'type': 'CNN-LSTM',
            'action_map': portfolio_manager.action_map,
            'thresholds': {'buy': 0.02, 'sell': -0.02}
        },
        'deployment_info': {
            'saved_at': datetime.now().isoformat(),
            'tensorflow_version': tf.__version__,
            'fixes_applied': ['multi_level_columns', 'dataframe_series_conversion', 'savedmodel_format']
        }
    }

    import json
    with open(f"{model_path}/model_metadata.json", 'w') as f:
        json.dump(metadata, f, indent=2, default=str)

    logger.info(f"Model package saved successfully to {model_path}/")
    logger.info(f"Ready for Google Cloud Vertex AI deployment")
    return model_path

def load_nse_portfolio_model(model_path: str = "nse_portfolio_model_v2") -> RobustPortfolioManager:
    logger.info(f"Loading NSE portfolio model from {model_path}/...")
    portfolio_manager = RobustPortfolioManager()
    portfolio_manager.model.model = tf.saved_model.load(f"{model_path}/saved_model")
    logger.info("TensorFlow model loaded")
    portfolio_manager.model.scaler = joblib.load(f"{model_path}/artifacts/scaler.pkl")
    portfolio_manager.feature_engineer.feature_names = joblib.load(f"{model_path}/artifacts/feature_names.pkl")
    logger.info("Preprocessing components loaded")

    import json
    with open(f"{model_path}/model_metadata.json", 'r') as f:
        metadata = json.load(f)

    portfolio_manager.model.sequence_length = metadata['feature_engineering']['sequence_length']
    portfolio_manager.model.n_features = metadata['feature_engineering']['n_features']
    portfolio_manager.model.is_trained = True
    portfolio_manager.model_metadata = metadata['model_info']
    portfolio_manager.training_history = metadata['training_history']

    logger.info(f"NSE Portfolio Model loaded successfully!")
    return portfolio_manager

print("Model persistence system ready")


Model persistence system ready


In [None]:
# Final training execution for clean NSE stocks

CLEAN_NSE_STOCKS = [
    'JINDALSTEL.NS', 'JSWSTEEL.NS', 'TATASTEEL.NS', 'VEDL.NS', 'HINDALCO.NS',
    'HINDZINC.NS', 'SAIL.NS', 'NMDC.NS',
    'TATAMOTORS.NS', 'BAJAJ-AUTO.NS', 'EICHERMOT.NS', 'HEROMOTOCO.NS',
    'M&M.NS', 'ASHOKLEY.NS', 'MARUTI.NS', 'TVSMOTOR.NS',
    'BAJFINANCE.NS', 'MUTHOOTFIN.NS', 'ABFRL.NS', 'HDFCBANK.NS',
    'ICICIBANK.NS', 'SHRIRAMFIN.NS', 'SBIN.NS', 'LICI.NS',
    'JSWENERGY.NS', 'TATAPOWER.NS', 'ADANIPOWER.NS', 'RELIANCE.NS',
    'POWERGRID.NS', 'IOC.NS', 'BPCL.NS', 'NTPC.NS',
    'TCS.NS', 'WIPRO.NS', 'INFY.NS', 'TECHM.NS', 'HCLTECH.NS',
    'PERSISTENT.NS', 'LT.NS', 'TATAELXSI.NS',
    'BLUESTARCO.NS', 'VOLTAS.NS', 'CROMPTON.NS', 'HAVELLS.NS',
    'BAJAJELEC.NS', 'WHIRLPOOL.NS', 'TITAN.NS', 'ASIANPAINT.NS',
    'DABUR.NS', 'GODREJCP.NS', 'BRITANNIA.NS', 'ITC.NS',
    'NESTLEIND.NS', 'MARICO.NS', 'TATACONSUM.NS', 'COLPAL.NS',
    'CIPLA.NS', 'DRREDDY.NS', 'MANKIND.NS', 'SUNPHARMA.NS',
    'LUPIN.NS', 'ZYDUSLIFE.NS', 'BIOCON.NS'
]

def run_final_nse_training():
    print("=" * 80)
    print("NSE PORTFOLIO MANAGER - FINAL TRAINING EXECUTION")
    print("All fixes applied: Multi-level columns, DataFrame/Series conversion")
    print("=" * 80)

    try:
        portfolio_manager = RobustPortfolioManager(
            alpha_vantage_key="demo",
            fmp_key="demo",
            twelve_data_key="demo"
        )

        print(f"\nTRAINING PHASE")
        print(f"Training on {len(CLEAN_NSE_STOCKS)} verified NSE stocks")
        training_results = portfolio_manager.train_model(CLEAN_NSE_STOCKS)

        print(f"\nTRAINING RESULTS:")
        print(f"Status: {training_results['status'].upper()}")
        print(f"Best Validation Accuracy: {training_results['best_val_accuracy']:.4f}")
        print(f"Total Training Sequences: {training_results['model_metadata']['total_sequences']:,}")
        print(f"Feature Count: {training_results['model_metadata']['feature_count']}")
        print(f"Successful Symbols: {len(training_results['model_metadata']['trained_symbols'])}")

        print(f"\nPREDICTION PHASE")
        portfolio_results = portfolio_manager.analyze_portfolio(CLEAN_NSE_STOCKS)
        summary = portfolio_results['summary']

        print(f"\nPORTFOLIO ANALYSIS:")
        print(f"Success Rate: {summary['successful_predictions']}/{summary['total_symbols']} ({summary['success_rate']:.1%})")
        print(f"Average Confidence: {summary['average_confidence']:.2%}")

        print(f"\nACTION DISTRIBUTION:")
        total_predictions = summary['successful_predictions']
        for action, count in summary['action_distribution'].items():
            percentage = (count / total_predictions * 100) if total_predictions > 0 else 0
            print(f"   {action}: {count:>2} stocks ({percentage:>5.1f}%)")

        print(f"\nTOP 10 CONFIDENT PREDICTIONS:")
        confident_predictions = []
        for symbol, result in portfolio_results['portfolio_analysis'].items():
            if 'confidence' in result and 'action' in result:
                confident_predictions.append((symbol, result['action'], result['confidence'], result['current_price']))

        confident_predictions.sort(key=lambda x: x[2], reverse=True)

        for i, (symbol, action, confidence, price) in enumerate(confident_predictions[:10]):
            print(f"{i+1:>2}. {symbol:<15}: {action:<4} | {confidence:>6.2%} | Rs.{price:>8.2f}")

        print(f"\nSAVING MODEL...")
        model_path = save_nse_portfolio_model(portfolio_manager)

        print(f"\n" + "=" * 80)
        print("NSE PORTFOLIO MANAGER - TRAINING COMPLETED SUCCESSFULLY!")
        print(f"Model saved to: {model_path}")
        print(f"Validation Accuracy: {training_results['best_val_accuracy']:.1%}")
        print(f"Portfolio Success Rate: {summary['success_rate']:.1%}")
        print(f"Average Confidence: {summary['average_confidence']:.1%}")
        print("Ready for Google Cloud Vertex AI deployment")
        print("=" * 80)

        return {
            'status': 'success',
            'training_results': training_results,
            'portfolio_analysis': portfolio_results,
            'model_path': model_path
        }

    except Exception as e:
        print(f"\nTRAINING FAILED: {e}")
        import traceback
        traceback.print_exc()
        return {'status': 'failed', 'error': str(e)}

print("Starting NSE portfolio training with all fixes...")
final_results = run_final_nse_training()

if final_results['status'] == 'success':
    print(f"\nHACKATHON READY!")
    print(f"Model path: {final_results['model_path']}")
else:
    print(f"\nTraining failed: {final_results['error']}")


Starting NSE portfolio training with all fixes...
NSE PORTFOLIO MANAGER - FINAL TRAINING EXECUTION
All fixes applied: Multi-level columns, DataFrame/Series conversion

TRAINING PHASE
Training on 63 verified NSE stocks


ERROR:__main__:Failed processing JINDALSTEL.NS: only length-1 arrays can be converted to Python scalars
ERROR:__main__:Failed processing JSWSTEEL.NS: only length-1 arrays can be converted to Python scalars
ERROR:__main__:Failed processing TATASTEEL.NS: only length-1 arrays can be converted to Python scalars
ERROR:__main__:Failed processing VEDL.NS: only length-1 arrays can be converted to Python scalars
ERROR:__main__:Failed processing HINDALCO.NS: only length-1 arrays can be converted to Python scalars
ERROR:__main__:Failed processing HINDZINC.NS: only length-1 arrays can be converted to Python scalars
ERROR:__main__:Failed processing SAIL.NS: only length-1 arrays can be converted to Python scalars
ERROR:__main__:Failed processing NMDC.NS: only length-1 arrays can be converted to Python scalars
ERROR:__main__:Failed processing TATAMOTORS.NS: only length-1 arrays can be converted to Python scalars
ERROR:__main__:Failed processing BAJAJ-AUTO.NS: only length-1 arrays can be converted to 

Epoch 1/100
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 33ms/step - accuracy: 0.3451 - loss: 1.6623 - val_accuracy: 0.8925 - val_loss: 0.7025 - learning_rate: 0.0010
Epoch 2/100
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 18ms/step - accuracy: 0.7926 - loss: 0.6924 - val_accuracy: 0.8925 - val_loss: 0.5088 - learning_rate: 0.0010
Epoch 3/100
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 17ms/step - accuracy: 0.8429 - loss: 0.6151 - val_accuracy: 0.8925 - val_loss: 0.4730 - learning_rate: 0.0010
Epoch 4/100
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 18ms/step - accuracy: 0.8562 - loss: 0.5560 - val_accuracy: 0.8925 - val_loss: 0.4343 - learning_rate: 0.0010
Epoch 5/100
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 17ms/step - accuracy: 0.8562 - loss: 0.5772 - val_accuracy: 0.8925 - val_loss: 0.4351 - learning_rate: 0.0010
Epoch 6/100
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3

In [None]:
def run_portfolio_demo():
    print(" NSE PORTFOLIO MANAGER - LIVE DEMO")
    print("=" * 60)

    demo_stocks = ['RELIANCE.NS', 'TCS.NS', 'HDFCBANK.NS', 'INFY.NS', 'TATASTEEL.NS']

    if 'final_results' in globals() and final_results['status'] == 'success':
        loaded_manager = load_nse_portfolio_model(final_results['model_path'])

        print(f"\n INDIVIDUAL STOCK PREDICTIONS:")
        for stock in demo_stocks:
            try:
                prediction = loaded_manager.predict_stock(stock)
                print(f"{stock:<15}: {prediction.action:<4} | "
                      f"Confidence: {prediction.confidence:>6.2%} | "
                      f"Price: ₹{prediction.current_price:>8.2f}")
            except Exception as e:
                print(f"{stock:<15}: ERROR - {str(e)[:40]}...")

        training_summary = final_results['training_results']
        portfolio_summary = final_results['portfolio_analysis']['summary']

        print(f"\n MODEL PERFORMANCE:")
        print(f"Validation Accuracy: {training_summary['best_val_accuracy']:>6.1%}")
        print(f"Portfolio Success Rate: {portfolio_summary['success_rate']:>6.1%}")
        print(f"Average Confidence: {portfolio_summary['average_confidence']:>6.1%}")
        print(f"Model Features: {training_summary['model_metadata']['feature_count']}")
        print(f"Training Sequences: {training_summary['model_metadata']['total_sequences']:,}")
    else:
        print(" No trained model available")

run_portfolio_demo()
