In [21]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

from datetime import datetime, timedelta
import yfinance as yf
import ta

# Disable SSL verification
import ssl
ssl._create_default_https_context = ssl._create_unverified_context

#### Machine Learning Models
from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import TimeSeriesSplit

#### Deep Learning
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout, Bidirectional, BatchNormalization, GRU
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam

#### Visualization
import plotly.graph_objects as go
from plotly.subplots import make_subplots

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
tf.get_logger().setLevel('ERROR')

class AdvancedStockPredictor:
    def __init__(self):
        self.models = {}
        self.scalers = {}
        self.results = {}
        self.lstm_scaler = None
        self.lstm_model = None
        self.currency_info = {}
        self.feature_importance = {}
        
        self.STOCK_SYMBOL_MAP = {
            'AAPL': 'AAPL', 'APPLE': 'AAPL',
            'MSFT': 'MSFT', 'MICROSOFT': 'MSFT',
            'GOOGL': 'GOOGL', 'GOOGLE': 'GOOGL', 'ALPHABET': 'GOOGL',
            'AMZN': 'AMZN', 'AMAZON': 'AMZN',
            'TSLA': 'TSLA', 'TESLA': 'TSLA',
            'META': 'META', 'FACEBOOK': 'META',
            'NVDA': 'NVDA', 'NVIDIA': 'NVDA',
            'RELIANCE': 'RELIANCE.NS', 
            'TCS': 'TCS.NS', 
            'INFY': 'INFY.NS', 'INFOSYS': 'INFY.NS',
            'HDFC': 'HDFCBANK.NS', 'HDFCBANK': 'HDFCBANK.NS',
            'ICICI': 'ICICIBANK.NS', 'ICICIBANK': 'ICICIBANK.NS',
            'SBIN': 'SBIN.NS', 'SBI': 'SBIN.NS',
            'WIPRO': 'WIPRO.NS',
        }
        
        self.CURRENCY_MAP = {
            '.NS': 'INR ', '.KS': 'KRW ', 
            '.SW': 'CHF ', '.L': 'GBP ',
        }
        self.DEFAULT_CURRENCY = 'USD '
    
    def get_currency_symbol(self, symbol):
        for suffix, currency in self.CURRENCY_MAP.items():
            if symbol.endswith(suffix):
                return currency
        return self.DEFAULT_CURRENCY
    
    def get_correct_symbol(self, symbol_input):
        symbol_input = symbol_input.upper().strip()
        if symbol_input in self.STOCK_SYMBOL_MAP:
            return self.STOCK_SYMBOL_MAP[symbol_input]
        return symbol_input
    
    def fetch_and_prepare_data(self, symbol, years=8):
        """Fetch more historical data for better training"""
        end_date = datetime.now()
        start_date = end_date - timedelta(days=years*365)
        
        correct_symbol = self.get_correct_symbol(symbol)
        print(f"Fetching {years} years of data for {symbol} -> {correct_symbol}...")
        
        try:
            df = yf.download(correct_symbol, start=start_date, end=end_date, progress=False)
            
            if df.empty:
                ticker = yf.Ticker(correct_symbol)
                df = ticker.history(start=start_date, end=end_date)
            
            if df.empty:
                df = yf.download(correct_symbol, period="max", progress=False)
                
            if df.empty:
                raise ValueError(f"No data found for symbol: {symbol}")
            
            if isinstance(df.columns, pd.MultiIndex):
                df.columns = df.columns.droplevel(1)
            
            print(f"Fetched {len(df)} records from {df.index[0].date()} to {df.index[-1].date()}")
            
            self.currency_info[symbol] = self.get_currency_symbol(correct_symbol)
            
            # ADVANCED FEATURE ENGINEERING
            print("Computing advanced features...")
            
            # Basic price features
            df['Returns'] = df['Close'].pct_change()
            df['Log_Returns'] = np.log(df['Close'] / df['Close'].shift(1))
            df['High_Low_Pct'] = (df['High'] - df['Low']) / df['Low'] * 100
            df['Close_Open_Pct'] = (df['Close'] - df['Open']) / df['Open'] * 100
            
            # Trend strength
            df['Trend_Strength'] = df['Close'].rolling(20).apply(
                lambda x: np.polyfit(range(len(x)), x, 1)[0], raw=True
            )
            
            # Multiple timeframe MAs
            for window in [5, 10, 20, 50, 100, 200]:
                df[f'SMA_{window}'] = df['Close'].rolling(window).mean()
                df[f'EMA_{window}'] = df['Close'].ewm(span=window).mean()
                
                # Price position relative to MA
                df[f'Close_to_SMA_{window}'] = (df['Close'] - df[f'SMA_{window}']) / df[f'SMA_{window}'] * 100
            
            # MA crossovers (powerful signals)
            df['SMA_5_20_Cross'] = (df['SMA_5'] > df['SMA_20']).astype(int)
            df['SMA_10_50_Cross'] = (df['SMA_10'] > df['SMA_50']).astype(int)
            df['EMA_12_26_Cross'] = (df['EMA_12'] > df['EMA_26']).astype(int)
            
            # Volatility (critical for prediction)
            for window in [5, 10, 20, 30]:
                df[f'Volatility_{window}'] = df['Returns'].rolling(window).std() * np.sqrt(252)
                df[f'Return_Volatility_Ratio_{window}'] = df['Returns'].rolling(window).mean() / (
                    df['Returns'].rolling(window).std() + 1e-8
                )
            
            # Volume analysis
            df['Volume_SMA_20'] = df['Volume'].rolling(20).mean()
            df['Volume_Ratio'] = df['Volume'] / (df['Volume_SMA_20'] + 1)
            df['Volume_Price_Trend'] = df['Volume'] * df['Returns']
            
            # Price momentum across timeframes
            for period in [1, 3, 5, 10, 20, 30]:
                df[f'Momentum_{period}'] = df['Close'].pct_change(period) * 100
                df[f'ROC_{period}'] = ((df['Close'] - df['Close'].shift(period)) / 
                                        df['Close'].shift(period)) * 100
            
            # Technical indicators
            close = df['Close'].squeeze()
            high = df['High'].squeeze()
            low = df['Low'].squeeze()
            volume = df['Volume'].squeeze()
            
            # RSI with multiple timeframes
            for window in [7, 14, 21]:
                df[f'RSI_{window}'] = ta.momentum.rsi(close, window=window)
                df[f'RSI_{window}_SMA'] = df[f'RSI_{window}'].rolling(14).mean()
            
            # Stochastic Oscillator
            df['Stoch_K'] = ta.momentum.stoch(high, low, close, window=14)
            df['Stoch_D'] = df['Stoch_K'].rolling(3).mean()
            df['Stoch_Overbought'] = (df['Stoch_K'] > 80).astype(int)
            df['Stoch_Oversold'] = (df['Stoch_K'] < 20).astype(int)
            
            # Williams %R
            df['Williams_R'] = ta.momentum.williams_r(high, low, close, lbp=14)
            
            # MACD (trend and momentum)
            macd = ta.trend.MACD(close)
            df['MACD'] = macd.macd()
            df['MACD_Signal'] = macd.macd_signal()
            df['MACD_Diff'] = macd.macd_diff()
            df['MACD_Cross'] = (df['MACD'] > df['MACD_Signal']).astype(int)
            
            # ADX (trend strength)
            df['ADX'] = ta.trend.adx(high, low, close, window=14)
            df['ADX_Strong_Trend'] = (df['ADX'] > 25).astype(int)
            
            # Bollinger Bands
            bb = ta.volatility.BollingerBands(close, window=20, window_dev=2)
            df['BB_High'] = bb.bollinger_hband()
            df['BB_Low'] = bb.bollinger_lband()
            df['BB_Mid'] = bb.bollinger_mavg()
            df['BB_Width'] = (df['BB_High'] - df['BB_Low']) / df['BB_Mid']
            df['BB_Position'] = (close - df['BB_Low']) / (df['BB_High'] - df['BB_Low'])
            df['BB_Upper_Band_Touch'] = (close >= df['BB_High']).astype(int)
            df['BB_Lower_Band_Touch'] = (close <= df['BB_Low']).astype(int)
            
            # ATR (volatility)
            df['ATR'] = ta.volatility.average_true_range(high, low, close, window=14)
            df['ATR_Pct'] = df['ATR'] / close * 100
            
            # OBV (volume-price relationship)
            df['OBV'] = ta.volume.on_balance_volume(close, volume)
            df['OBV_EMA'] = df['OBV'].ewm(span=20).mean()
            df['OBV_Divergence'] = df['OBV'] - df['OBV_EMA']
            
            # Ichimoku Cloud components
            df['Tenkan'] = (df['High'].rolling(9).max() + df['Low'].rolling(9).min()) / 2
            df['Kijun'] = (df['High'].rolling(26).max() + df['Low'].rolling(26).min()) / 2
            df['Senkou_A'] = ((df['Tenkan'] + df['Kijun']) / 2).shift(26)
            df['Senkou_B'] = ((df['High'].rolling(52).max() + df['Low'].rolling(52).min()) / 2).shift(26)
            
            # Lagged features (critical for time series)
            for lag in [1, 2, 3, 5, 10, 15, 20, 30]:
                df[f'Close_Lag_{lag}'] = df['Close'].shift(lag)
                df[f'Volume_Lag_{lag}'] = df['Volume'].shift(lag)
                df[f'Returns_Lag_{lag}'] = df['Returns'].shift(lag)
                df[f'High_Lag_{lag}'] = df['High'].shift(lag)
                df[f'Low_Lag_{lag}'] = df['Low'].shift(lag)
            
            # Rolling statistics (capture trends)
            for window in [5, 10, 20, 30]:
                df[f'Close_Rolling_Mean_{window}'] = df['Close'].rolling(window).mean()
                df[f'Close_Rolling_Std_{window}'] = df['Close'].rolling(window).std()
                df[f'Close_Rolling_Min_{window}'] = df['Close'].rolling(window).min()
                df[f'Close_Rolling_Max_{window}'] = df['Close'].rolling(window).max()
                df[f'Close_Rolling_Skew_{window}'] = df['Close'].rolling(window).skew()
                df[f'Close_Rolling_Kurt_{window}'] = df['Close'].rolling(window).kurt()
            
            # Market regime indicators
            df['High_Volatility_Regime'] = (df['Volatility_20'] > df['Volatility_20'].rolling(100).mean()).astype(int)
            df['Uptrend_Regime'] = (df['SMA_50'] > df['SMA_200']).astype(int)
            
            # Distance from support/resistance
            df['Distance_from_52W_High'] = (df['Close'] / df['Close'].rolling(252).max() - 1) * 100
            df['Distance_from_52W_Low'] = (df['Close'] / df['Close'].rolling(252).min() - 1) * 100
            
            # Clean data
            initial_len = len(df)
            df = df.replace([np.inf, -np.inf], np.nan)
            df = df.dropna()
            dropped = initial_len - len(df)
            
            print(f"Dropped {dropped} rows. Final: {len(df)} records with {len(df.columns)} features")
            
            if len(df) < 300:
                raise ValueError(f"Insufficient data: {len(df)} records")
            
            return df
            
        except Exception as e:
            print(f"ERROR: {str(e)}")
            raise
    
    def prepare_features_for_ml(self, df, target_col='Close', n_ahead=1):
        """Prepare comprehensive feature set"""
        print(f"Preparing features (predicting {n_ahead} days ahead)...")
        
        # Exclude non-feature columns
        exclude_cols = ['Open', 'High', 'Low', 'Adj Close', 'Target']
        feature_cols = [col for col in df.columns if col not in exclude_cols]
        
        # Create target
        df_ml = df.copy()
        df_ml['Target'] = df_ml[target_col].shift(-n_ahead)
        df_ml = df_ml.dropna()
        
        X = df_ml[feature_cols]
        y = df_ml['Target']
        
        print(f"Features: {len(feature_cols)}, Samples: {len(X)}")
        return X, y, feature_cols
    
    def prepare_lstm_data(self, df, sequence_length=60):
        """Prepare LSTM data with optimal features"""
        print(f"Preparing LSTM data (sequence: {sequence_length})...")
        
        lstm_features = [
            'Close', 'Volume', 'Returns', 
            'RSI_14', 'MACD', 'ATR', 'ADX',
            'SMA_10', 'SMA_20', 'SMA_50',
            'EMA_12', 'EMA_26',
            'BB_Width', 'BB_Position',
            'Volatility_20', 'OBV',
            'Stoch_K', 'Williams_R'
        ]
        
        available_features = [col for col in lstm_features if col in df.columns]
        print(f"Using {len(available_features)} LSTM features")
        
        data = df[available_features].values
        
        self.lstm_scaler = MinMaxScaler()
        scaled_data = self.lstm_scaler.fit_transform(data)
        
        X, y = [], []
        for i in range(sequence_length, len(scaled_data)):
            X.append(scaled_data[i-sequence_length:i])
            y.append(scaled_data[i, 0])
        
        X, y = np.array(X), np.array(y)
        
        print(f"LSTM sequences: {len(X)} samples, shape {X.shape}")
        return X, y, available_features
    
    def train_models(self, X_ml, y_ml, X_lstm, y_lstm):
        """Train models with cross-validation"""
        print("\n" + "="*70)
        print("TRAINING ADVANCED MODELS")
        print("="*70)
        
        # Time series split for validation
        tscv = TimeSeriesSplit(n_splits=3)
        
        # Standard train-test split
        X_train_ml, X_test_ml, y_train_ml, y_test_ml = train_test_split(
            X_ml, y_ml, test_size=0.15, shuffle=False
        )
        
        split_idx = int(len(X_lstm) * 0.85)
        X_train_lstm = X_lstm[:split_idx]
        X_test_lstm = X_lstm[split_idx:]
        y_train_lstm = y_lstm[:split_idx]
        y_test_lstm = y_lstm[split_idx:]
        
        print(f"ML - Train: {len(X_train_ml)}, Test: {len(X_test_ml)}")
        print(f"LSTM - Train: {len(X_train_lstm)}, Test: {len(X_test_lstm)}\n")
        
        # === RANDOM FOREST (Optimized) ===
        print("[1/3] Training Random Forest...")
        rf_scaler = RobustScaler()
        X_train_rf = rf_scaler.fit_transform(X_train_ml)
        X_test_rf = rf_scaler.transform(X_test_ml)
        
        rf_model = RandomForestRegressor(
            n_estimators=500,
            max_depth=30,
            min_samples_split=3,
            min_samples_leaf=1,
            max_features='sqrt',
            bootstrap=True,
            oob_score=True,
            random_state=42,
            n_jobs=-1
        )
        
        rf_model.fit(X_train_rf, y_train_ml)
        rf_pred = rf_model.predict(X_test_rf)
        
        # Feature importance
        self.feature_importance['Random_Forest'] = dict(zip(
            X_ml.columns,
            rf_model.feature_importances_
        ))
        
        self.models['Random_Forest'] = rf_model
        self.scalers['Random_Forest'] = rf_scaler
        self.results['Random_Forest'] = {
            'predictions': rf_pred,
            'actual': y_test_ml.values
        }
        
        # === SVR (Optimized) ===
        print("[2/3] Training SVR...")
        svr_scaler = StandardScaler()
        X_train_svr = svr_scaler.fit_transform(X_train_ml)
        X_test_svr = svr_scaler.transform(X_test_ml)
        
        svr_model = SVR(
            kernel='rbf',
            C=200,
            gamma='scale',
            epsilon=0.005,
            cache_size=2000
        )
        
        svr_model.fit(X_train_svr, y_train_ml)
        svr_pred = svr_model.predict(X_test_svr)
        
        self.models['SVR'] = svr_model
        self.scalers['SVR'] = svr_scaler
        self.results['SVR'] = {
            'predictions': svr_pred,
            'actual': y_test_ml.values
        }
        
        # === ADVANCED LSTM ===
        print("[3/3] Training Advanced LSTM...")
        
        self.lstm_model = Sequential([
            Bidirectional(LSTM(150, return_sequences=True), 
                         input_shape=(X_train_lstm.shape[1], X_train_lstm.shape[2])),
            Dropout(0.3),
            BatchNormalization(),
            
            Bidirectional(LSTM(100, return_sequences=True)),
            Dropout(0.3),
            BatchNormalization(),
            
            Bidirectional(GRU(75, return_sequences=False)),
            Dropout(0.25),
            BatchNormalization(),
            
            Dense(64, activation='relu'),
            Dropout(0.2),
            Dense(32, activation='relu'),
            Dropout(0.15),
            Dense(16, activation='relu'),
            Dense(1)
        ])
        
        optimizer = Adam(learning_rate=0.0005)
        self.lstm_model.compile(optimizer=optimizer, loss='huber', metrics=['mae', 'mse'])
        
        early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
        reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.3, patience=7, min_lr=0.00001)
        
        history = self.lstm_model.fit(
            X_train_lstm, y_train_lstm,
            validation_data=(X_test_lstm, y_test_lstm),
            epochs=200,
            batch_size=32,
            callbacks=[early_stop, reduce_lr],
            verbose=0
        )
        
        lstm_pred_scaled = self.lstm_model.predict(X_test_lstm, verbose=0)
        
        # Inverse transform
        n_features = X_train_lstm.shape[2]
        lstm_pred_full = np.zeros((len(lstm_pred_scaled), n_features))
        lstm_pred_full[:, 0] = lstm_pred_scaled.flatten()
        lstm_pred = self.lstm_scaler.inverse_transform(lstm_pred_full)[:, 0]
        
        y_test_full = np.zeros((len(y_test_lstm), n_features))
        y_test_full[:, 0] = y_test_lstm
        y_test_actual = self.lstm_scaler.inverse_transform(y_test_full)[:, 0]
        
        self.results['LSTM'] = {
            'predictions': lstm_pred,
            'actual': y_test_actual
        }
        
        # Display metrics
        print("\n" + "="*70)
        print("MODEL PERFORMANCE")
        print("="*70)
        print(f"{'Model':<20} {'MAE':<12} {'RMSE':<12} {'R2':<10} {'Dir_Acc':<10}")
        print("-"*70)
        
        for model in ['Random_Forest', 'SVR', 'LSTM']:
            self._calculate_metrics(
                self.results[model]['actual'],
                self.results[model]['predictions'],
                model
            )
        
        print("="*70 + "\n")
    
    def _calculate_metrics(self, y_true, y_pred, model_name):
        """Calculate comprehensive metrics"""
        mae = mean_absolute_error(y_true, y_pred)
        rmse = np.sqrt(mean_squared_error(y_true, y_pred))
        r2 = r2_score(y_true, y_pred)
        
        # Directional accuracy
        if len(y_true) > 1:
            true_dir = np.diff(y_true) > 0
            pred_dir = np.diff(y_pred) > 0
            dir_acc = np.mean(true_dir == pred_dir) * 100
        else:
            dir_acc = 0
        
        print(f"{model_name:<20} {mae:<12.2f} {rmse:<12.2f} {r2:<10.3f} {dir_acc:<10.1f}%")
        
        if model_name not in self.results:
            self.results[model_name] = {}
        
        self.results[model_name].update({
            'mae': mae, 'rmse': rmse, 'r2': r2,
            'directional_accuracy': dir_acc
        })
    
    def predict_future_7_days(self, df, feature_cols, lstm_feature_cols):
        """Generate 7-day predictions"""
        print("\nGenerating 7-day predictions...")
        
        predictions = {'Random_Forest': [], 'SVR': [], 'LSTM': []}
        
        last_date = df.index[-1]
        future_dates = []
        current = last_date + timedelta(days=1)
        
        while len(future_dates) < 7:
            if current.weekday() < 5:
                future_dates.append(current)
            current += timedelta(days=1)
        
        # ML predictions
        current_data = df.iloc[-1:][feature_cols].copy()
        
        for day in range(7):
            rf_scaled = self.scalers['Random_Forest'].transform(current_data)
            svr_scaled = self.scalers['SVR'].transform(current_data)
            
            rf_pred = self.models['Random_Forest'].predict(rf_scaled)[0]
            svr_pred = self.models['SVR'].predict(svr_scaled)[0]
            
            predictions['Random_Forest'].append(rf_pred)
            predictions['SVR'].append(svr_pred)
            
            if day < 6 and 'Close' in current_data.columns:
                current_data.iloc[0, current_data.columns.get_loc('Close')] = rf_pred
        
        # LSTM predictions
        last_seq = df[lstm_feature_cols].values[-60:]
        current_seq = self.lstm_scaler.transform(last_seq).copy()
        
        for day in range(7):
            X_pred = current_seq.reshape(1, 60, len(lstm_feature_cols))
            pred_scaled = self.lstm_model.predict(X_pred, verbose=0)[0, 0]
            
            pred_full = np.zeros((1, len(lstm_feature_cols)))
            pred_full[0, 0] = pred_scaled
            pred_actual = self.lstm_scaler.inverse_transform(pred_full)[0, 0]
            
            predictions['LSTM'].append(pred_actual)
            
            if day < 6:
                new_row = np.zeros((1, len(lstm_feature_cols)))
                new_row[0, 0] = pred_scaled
                for i in range(1, len(lstm_feature_cols)):
                    new_row[0, i] = current_seq[-1, i]
                current_seq = np.vstack([current_seq[1:], new_row])
        
        return future_dates, predictions
    
    def create_visualizations(self, symbol, current_price, future_dates, predictions, df):
        """Create all visualizations"""
        currency = self.currency_info.get(symbol, self.DEFAULT_CURRENCY)
        
        # Performance dashboard
        fig1 = make_subplots(
            rows=2, cols=2,
            subplot_titles=('MAE Comparison', 'R2 Score', 'Directional Accuracy', 'Rankings')
        )
        
        models = ['Random_Forest', 'SVR', 'LSTM']
        colors = ['#3498db', '#e74c3c', '#2ecc71']
        
        metrics_df = pd.DataFrame([
            {
                'Model': m,
                'MAE': self.results[m]['mae'],
                'R2': self.results[m]['r2'],
                'Dir_Acc': self.results[m]['directional_accuracy']
            }
            for m in models
        ])
        
        fig1.add_trace(go.Bar(x=metrics_df['Model'], y=metrics_df['MAE'], marker_color=colors), row=1, col=1)
        fig1.add_trace(go.Bar(x=metrics_df['Model'], y=metrics_df['R2'], marker_color=colors), row=1, col=2)
        fig1.add_trace(go.Bar(x=metrics_df['Model'], y=metrics_df['Dir_Acc'], marker_color=colors), row=2, col=1)
        
        ranked = metrics_df.sort_values('R2', ascending=False)
        fig1.add_trace(go.Table(
            header=dict(values=['Model', 'MAE', 'R2', 'Dir_Acc'], fill_color='#34495e', font=dict(color='white')),
            cells=dict(values=[ranked['Model'], ranked['MAE'].round(2), ranked['R2'].round(3), 
                              ranked['Dir_Acc'].round(1)])
        ), row=2, col=2)
        
        fig1.update_layout(title=f"{symbol} - Performance", height=700, showlegend=False)
        fig1.show()
        
        # Predictions chart
        fig2 = go.Figure()
        all_dates = [future_dates[0] - timedelta(days=1)] + future_dates
        
        for model, color in zip(models, colors):
            prices = [current_price] + predictions[model]
            fig2.add_trace(go.Scatter(
                x=all_dates, y=prices, mode='lines+markers',
                name=model, line=dict(color=color, width=3)
            ))
        
        fig2.add_hline(y=current_price, line_dash="dash", opacity=0.5)
        fig2.update_layout(
            title=f"{symbol} - 7-Day Predictions",
            xaxis_title='Date', yaxis_title=f'Price ({currency})',
            height=600, template='plotly_white'
        )
        fig2.show()
        
        return ranked


def main():
    """Main execution function"""
    print("="*70)
    print("ENHANCED STOCK PREDICTION SYSTEM")
    print("Models: Random Forest | SVR | LSTM")
    print("="*70)
    print()
    
    # Get user input
    symbol = input("Enter stock symbol (e.g., AAPL, TSLA, TCS, RELIANCE): ").strip().upper()
    
    if not symbol:
        symbol = "AAPL"
        print(f"Using default symbol: {symbol}")
    
    try:
        # Initialize predictor
        predictor = EnhancedStockPredictor()
        
        # Fetch data
        print(f"\nFetching historical data for {symbol}...")
        df = predictor.fetch_and_prepare_data(symbol, years=15)
        
        current_price = float(df['Close'].iloc[-1])
        currency = predictor.currency_info.get(symbol, predictor.DEFAULT_CURRENCY)
        
        print(f"\nCurrent {symbol} Price: {currency}{current_price:.2f}")
        print(f"Dataset Size: {len(df)} trading days")
        print(f"Date Range: {df.index[0].date()} to {df.index[-1].date()}")
        
        # Prepare features
        X_ml, y_ml, feature_cols = predictor.prepare_features_for_ml(df)
        X_lstm, y_lstm, lstm_feature_cols = predictor.prepare_lstm_data(df)
        
        # Train models
        predictor.train_models(X_ml, y_ml, X_lstm, y_lstm)
        
        # Generate predictions
        future_dates, future_predictions = predictor.predict_future_7_days(
            df, feature_cols, lstm_feature_cols
        )
        
        # Create visualizations
        print("\nGenerating visualizations...")
        df_ranked = predictor.create_performance_dashboard(symbol)
        predictor.create_predictions_chart(symbol, current_price, future_dates, future_predictions)
        predictor.create_actual_vs_predicted_chart(df, symbol)
        
        # Print summary
        print("\n" + "="*70)
        print("PREDICTION SUMMARY")
        print("="*70)
        print(f"\nStock: {symbol}")
        print(f"Current Price: {currency}{current_price:.2f}")
        print(f"Training Period: 6 years ({len(df)} trading days)")
        
        print(f"\n7-DAY PRICE PREDICTIONS:")
        print("-"*70)
        
        best_model = df_ranked.iloc[0]['Model']
        print(f"Best Performing Model: {best_model} (R2: {df_ranked.iloc[0]['R2']:.3f})\n")
        
        for i, date in enumerate(future_dates):
            rf_pred = future_predictions['Random_Forest'][i]
            svr_pred = future_predictions['SVR'][i]
            lstm_pred = future_predictions['LSTM'][i]
            
            # Calculate average
            avg_pred = (rf_pred + svr_pred + lstm_pred) / 3
            avg_change = ((avg_pred - current_price) / current_price) * 100
            
            print(f"{date.strftime('%Y-%m-%d (%a)')}:")
            print(f"  Random Forest: {currency}{rf_pred:>8.2f}  ({((rf_pred-current_price)/current_price*100):+6.2f}%)")
            print(f"  SVR:           {currency}{svr_pred:>8.2f}  ({((svr_pred-current_price)/current_price*100):+6.2f}%)")
            print(f"  LSTM:          {currency}{lstm_pred:>8.2f}  ({((lstm_pred-current_price)/current_price*100):+6.2f}%)")
            print(f"  Average:       {currency}{avg_pred:>8.2f}  ({avg_change:+6.2f}%)")
            print()
        
        # Final recommendations
        print("="*70)
        print("ANALYSIS COMPLETE")
        print("="*70)
        print(f"\nBest Model: {best_model}")
        print(f"Model Accuracy Metrics:")
        print(f"  - R-Squared: {df_ranked.iloc[0]['R2']:.3f}")
        print(f"  - MAE: {currency}{df_ranked.iloc[0]['MAE']:.2f}")
        print(f"  - Directional Accuracy: {df_ranked.iloc[0]['Dir_Acc']:.1f}%")
        
        print("\nNOTE: These predictions are for educational purposes only.")
        print("Always conduct thorough research before making investment decisions.")
        
    except Exception as e:
        print(f"\nERROR: {str(e)}")
        print("\nTroubleshooting Guide:")
        print("1. Verify the stock symbol is correct")
        print("2. For Indian stocks, use: TCS, INFY, RELIANCE, HDFC, etc.")
        print("3. For US stocks, use: AAPL, TSLA, MSFT, GOOGL, etc.")
        print("4. Check your internet connection")
        print("5. Some stocks may have limited historical data")
        
        import traceback
        traceback.print_exc()

if __name__ == "__main__":
    main()

ENHANCED STOCK PREDICTION SYSTEM
Models: Random Forest | SVR | LSTM


Fetching historical data for TCS...
Fetching 15 years of data for TCS -> TCS.NS...
Successfully fetched 3699 records from 2010-10-11 to 2025-10-06
Currency: INR 
Computing advanced technical indicators...
Dropped 199 rows with NaN values
Final dataset: 3500 records with 80 features

Current TCS Price: INR 2988.40
Dataset Size: 3500 trading days
Date Range: 2011-07-27 to 2025-10-06
Preparing enhanced features for ML (predicting 1 days ahead)...
Selected 80 features from 3499 samples
Preparing LSTM data with sequence length: 60
Using 13 features for LSTM
LSTM sequences created: 3440 samples with shape (3440, 60, 13)

TRAINING MODELS WITH ENHANCED PARAMETERS
ML Training: 2799, Testing: 700
LSTM Training: 2752, Testing: 688

[1/3] Training Enhanced Random Forest...
[2/3] Training Enhanced SVR...
[3/3] Training Enhanced Bidirectional LSTM...

MODEL PERFORMANCE METRICS
Model                MAE          RMSE         R2     

Creating predictions chart...


Creating actual vs predicted comparison...



PREDICTION SUMMARY

Stock: TCS
Current Price: INR 2988.40
Training Period: 6 years (3500 trading days)

7-DAY PRICE PREDICTIONS:
----------------------------------------------------------------------
Best Performing Model: Random_Forest (R2: 0.198)

2025-10-07 (Tue):
  Random Forest: INR  2947.53  ( -1.37%)
  SVR:           INR  2896.24  ( -3.08%)
  LSTM:          INR  3381.25  (+13.15%)
  Average:       INR  3075.00  ( +2.90%)

2025-10-08 (Wed):
  Random Forest: INR  2942.51  ( -1.54%)
  SVR:           INR  2895.21  ( -3.12%)
  LSTM:          INR  3386.68  (+13.33%)
  Average:       INR  3074.80  ( +2.89%)

2025-10-09 (Thu):
  Random Forest: INR  2940.96  ( -1.59%)
  SVR:           INR  2895.07  ( -3.12%)
  LSTM:          INR  3388.84  (+13.40%)
  Average:       INR  3074.96  ( +2.90%)

2025-10-10 (Fri):
  Random Forest: INR  2940.96  ( -1.59%)
  SVR:           INR  2895.03  ( -3.12%)
  LSTM:          INR  3390.32  (+13.45%)
  Average:       INR  3075.44  ( +2.91%)

2025-10-13 (Mon):