# Stock Price Prediction System

### Import necessary libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

### Machine Learning libraries

In [2]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.svm import SVR

### Deep Learning libraries

In [3]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout

### Technical Analysis library

In [4]:
import ta

### API libraries

In [5]:
import yfinance as yf
from alpha_vantage.timeseries import TimeSeries

### Visualization libraries

In [6]:
import mplfinance as mpf
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px

import joblib
import os

print("All libraries imported successfully!")

All libraries imported successfully!


In [7]:
# Alpha Vantage API Key
ALPHA_KEY = "909ZVCBKRQTZNM6M"  # set env var in production

def get_market_status():
    """Check current market status and explain data availability"""
    now = datetime.now()
    current_time = now.strftime('%Y-%m-%d %H:%M:%S')
    weekday = now.strftime('%A')
    
    print(f"\n{'='*60}")
    print(f"MARKET STATUS CHECK")
    print(f"{'='*60}")
    print(f"Current Date/Time: {current_time}")
    print(f"Today is: {weekday}")
    
    # Check if it's weekend
    if now.weekday() >= 5:  # Saturday = 5, Sunday = 6
        print(f"⚠️  Markets are CLOSED (Weekend)")
        print(f"💡 Most recent trading day data available")
    else:
        # Check if it's before market open (rough estimate - 9:30 AM EST)
        if now.hour < 9 or (now.hour == 9 and now.minute < 30):
            print(f"🕐 Markets haven't opened yet today")
            print(f"💡 Yesterday's data is the most recent")
        elif now.hour >= 16:  # After 4 PM EST (market close)
            print(f"🔔 Markets closed for the day")
            print(f"💡 Today's data should be available")
        else:
            print(f"📈 Markets are currently OPEN")
            print(f"💡 Real-time data available with some delay")
    
    print(f"{'='*60}")

In [8]:
def fetch_yf(symbol, start=None, end=None, period='2y', interval='1d'):
    """Fetch using yfinance with safe column selection"""
    try:
        if start and end:
            df = yf.download(symbol, start=start, end=end, interval=interval, progress=False)
        else:
            df = yf.download(symbol, period=period, interval=interval, progress=False)

        # If no data is returned
        if df.empty:
            print(f"No data found for symbol: {symbol}")
            return None

        # If multiple tickers, yfinance returns MultiIndex columns -> select the symbol level
        if isinstance(df.columns, pd.MultiIndex):
            df = df.xs(symbol, axis=1, level=1)

        # Some tickers don't have Adj Close — check before selecting
        expected_cols = ['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume']
        available_cols = [c for c in expected_cols if c in df.columns]

        df = df[available_cols]  # only keep available columns
        df.dropna(inplace=True)
        
        if df.empty:
            print(f"No data available after cleaning for symbol: {symbol}")
            return None
            
        print(f"Successfully fetched data for {symbol}. Shape: {df.shape}")
        return df
    except Exception as e:
        print(f"Error fetching data for {symbol}: {str(e)}")
        return None

In [9]:
def add_technical_indicators(df):
    df = df.copy()
    # Simple moving averages
    df['SMA_10'] = df['Close'].rolling(10).mean()
    df['SMA_50'] = df['Close'].rolling(50).mean()
    # Exponential moving average
    df['EMA_20'] = df['Close'].ewm(span=20, adjust=False).mean()
    # RSI
    df['RSI'] = ta.momentum.rsi(df['Close'], window=14)
    # MACD
    macd = ta.trend.MACD(df['Close'])
    df['MACD'] = macd.macd()
    df['MACD_SIGNAL'] = macd.macd_signal()
    # Bollinger Bands
    bb = ta.volatility.BollingerBands(df['Close'], window=20, window_dev=2)
    df['BB_H'] = bb.bollinger_hband()
    df['BB_L'] = bb.bollinger_lband()
    df.dropna(inplace=True)
    return df

def prepare_supervised(df, feature_cols, target_col='Close', n_ahead=5):
    df = df.copy()
    df['Target'] = df[target_col].shift(-n_ahead)
    df.dropna(inplace=True)
    X = df[feature_cols].values
    y = df['Target'].values
    return X, y, df

def train_and_eval_models(X_train, X_test, y_train, y_test):
    results = {}
    models = {
        'LinearRegression': LinearRegression(),
        'RandomForest': RandomForestRegressor(n_estimators=100, random_state=42),
        'SVR': SVR(kernel='rbf', C=1.0, epsilon=0.01)
    }
    for name, m in models.items():
        m.fit(X_train, y_train)
        preds = m.predict(X_test)
        results[name] = {
            'model': m,
            'mse': mean_squared_error(y_test, preds),
            'mae': mean_absolute_error(y_test, preds),
            'r2': r2_score(y_test, preds),
            'predictions': preds
        }
    return results

In [10]:
def create_sequences(data, seq_length=60):
    X, y = [], []
    for i in range(seq_length, len(data)):
        X.append(data[i-seq_length:i])
        y.append(data[i, 0])  # assuming first column is Close scaled
    return np.array(X), np.array(y)

def build_lstm(input_shape):
    model = Sequential()
    model.add(LSTM(64, return_sequences=True, input_shape=input_shape))
    model.add(Dropout(0.2))
    model.add(LSTM(32, return_sequences=False))
    model.add(Dropout(0.2))
    model.add(Dense(16, activation='relu'))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mse', metrics=['mae'])
    return model

def generate_future_dates_enhanced(last_date, days_ahead=7, include_weekends_info=True):
    """
    Enhanced future date generation with better weekend handling and information
    """
    future_dates = []
    skipped_dates = []
    current_date = last_date + timedelta(days=1)
    
    print(f"\n📅 GENERATING FUTURE TRADING DATES")
    print(f"Last trading day in data: {last_date.strftime('%Y-%m-%d (%A)')}")
    
    days_checked = 0
    while len(future_dates) < days_ahead and days_checked < days_ahead * 2:  # Safety limit
        day_name = current_date.strftime('%A')
        
        # Skip weekends (Saturday=5, Sunday=6)
        if current_date.weekday() < 5:  # Monday to Friday
            future_dates.append(current_date)
            print(f"✅ {current_date.strftime('%Y-%m-%d')} ({day_name}) - Trading Day")
        else:
            skipped_dates.append((current_date, day_name))
            if include_weekends_info:
                print(f"⏭️  {current_date.strftime('%Y-%m-%d')} ({day_name}) - Skipped (Weekend)")
        
        current_date += timedelta(days=1)
        days_checked += 1
    
    print(f"\n📊 Generated {len(future_dates)} future trading days")
    if skipped_dates:
        print(f"🚫 Skipped {len(skipped_dates)} weekend days")
    
    return future_dates[:days_ahead]

In [11]:
def predict_future_prices(model, scaler, df, feature_cols, days_ahead=7):
    """Predict future prices for the next N days"""
    future_predictions = []
    current_data = df[feature_cols].iloc[-1:].values.copy()
    
    for day in range(days_ahead):
        # Scale the current data
        current_data_scaled = scaler.transform(current_data)
        
        # Make prediction
        prediction = model.predict(current_data_scaled)[0]
        future_predictions.append(prediction)
        
        # Update the current data for next prediction (using the prediction as new Close price)
        if day < days_ahead - 1:
            current_data[0, 0] = prediction  # Update Close price
    
    return future_predictions

def predict_future_lstm(model, scaler, df, sequence_length=60, days_ahead=7):
    """Predict future prices using LSTM model"""
    # Get the last sequence
    last_sequence = df[['Close', 'Volume']].values[-sequence_length:]
    last_sequence_scaled = scaler.transform(last_sequence)
    
    future_predictions = []
    current_sequence = last_sequence_scaled.copy()
    
    for day in range(days_ahead):
        # Reshape for LSTM input
        X_pred = current_sequence.reshape(1, sequence_length, 2)
        
        # Make prediction
        prediction_scaled = model.predict(X_pred, verbose=0)[0, 0]
        
        # Inverse transform to get actual price
        dummy_array = np.zeros((1, 2))
        dummy_array[0, 0] = prediction_scaled
        prediction_actual = scaler.inverse_transform(dummy_array)[0, 0]
        
        future_predictions.append(prediction_actual)
        
        # Update the sequence for next prediction
        if day < days_ahead - 1:
            new_row = np.array([[prediction_scaled, current_sequence[-1, 1]]])
            current_sequence = np.vstack([current_sequence[1:], new_row])
    
    return future_predictions

In [12]:
def create_future_predictions_table(symbol, today_price, future_dates, rf_predictions, lstm_predictions=None):
    """
    Create a beautiful visualization table for future predictions
    """
    # Prepare data for the table
    table_data = []
    
    for i, date in enumerate(future_dates):
        rf_pred = rf_predictions[i]
        rf_change = ((rf_pred - today_price) / today_price) * 100
        
        row = {
            'Date': date.strftime('%Y-%m-%d'),
            'Day': date.strftime('%A'),
            'Days_Ahead': i + 1,
            'RF_Price': rf_pred,
            'RF_Change_%': rf_change,
            'RF_Direction': '📈' if rf_change > 0 else '📉' if rf_change < 0 else '➡️'
        }
        
        if lstm_predictions:
            lstm_pred = lstm_predictions[i]
            lstm_change = ((lstm_pred - today_price) / today_price) * 100
            row.update({
                'LSTM_Price': lstm_pred,
                'LSTM_Change_%': lstm_change,
                'LSTM_Direction': '📈' if lstm_change > 0 else '📉' if lstm_change < 0 else '➡️'
            })
        
        table_data.append(row)
    
    df_predictions = pd.DataFrame(table_data)
    
    # Create interactive table using Plotly
    fig = go.Figure()
    
    # Prepare table data
    if lstm_predictions:
        header_values = ['Date', 'Day', 'Days<br>Ahead', 
                        'RF Price<br>($)', 'RF Change<br>(%)', 'RF<br>Trend',
                        'LSTM Price<br>($)', 'LSTM Change<br>(%)', 'LSTM<br>Trend']
        
        cell_values = [
            df_predictions['Date'].tolist(),
            df_predictions['Day'].tolist(),
            df_predictions['Days_Ahead'].tolist(),
            [f"${x:.2f}" for x in df_predictions['RF_Price']],
            [f"{x:+.2f}%" for x in df_predictions['RF_Change_%']],
            df_predictions['RF_Direction'].tolist(),
            [f"${x:.2f}" for x in df_predictions['LSTM_Price']],
            [f"{x:+.2f}%" for x in df_predictions['LSTM_Change_%']],
            df_predictions['LSTM_Direction'].tolist()
        ]
    else:
        header_values = ['Date', 'Day', 'Days<br>Ahead', 
                        'RF Price<br>($)', 'RF Change<br>(%)', 'RF<br>Trend']
        
        cell_values = [
            df_predictions['Date'].tolist(),
            df_predictions['Day'].tolist(),
            df_predictions['Days_Ahead'].tolist(),
            [f"${x:.2f}" for x in df_predictions['RF_Price']],
            [f"{x:+.2f}%" for x in df_predictions['RF_Change_%']],
            df_predictions['RF_Direction'].tolist()
        ]
    
    # Color cells based on positive/negative changes
    def get_cell_colors(changes):
        colors = []
        for change in changes:
            if change > 0:
                colors.append('#d4edda')  # Light green for positive
            elif change < 0:
                colors.append('#f8d7da')  # Light red for negative
            else:
                colors.append('#fff3cd')  # Light yellow for neutral
        return colors
    # Create color arrays for each column
    rf_colors = get_cell_colors(df_predictions['RF_Change_%'])
    
    if lstm_predictions:
        lstm_colors = get_cell_colors(df_predictions['LSTM_Change_%'])
        fill_colors = [['white'] * len(df_predictions),  # Date
                      ['white'] * len(df_predictions),   # Day
                      ['white'] * len(df_predictions),   # Days Ahead
                      rf_colors,                         # RF Price
                      rf_colors,                         # RF Change
                      rf_colors,                         # RF Direction
                      lstm_colors,                       # LSTM Price
                      lstm_colors,                       # LSTM Change
                      lstm_colors]                       # LSTM Direction
    else:
        fill_colors = [['white'] * len(df_predictions),  # Date
                      ['white'] * len(df_predictions),   # Day
                      ['white'] * len(df_predictions),   # Days Ahead
                      rf_colors,                         # RF Price
                      rf_colors,                         # RF Change
                      rf_colors]                         # RF Direction
    
    fig.add_trace(go.Table(
        header=dict(
            values=header_values,
            fill_color='#1f2937',
            font=dict(color='white', size=14, family="Arial Black"),
            align='center',
            height=40
        ),
        cells=dict(
            values=cell_values,
            fill_color=fill_colors,
            font=dict(size=12),
            align='center',
            height=35
        )
    ))
    
    fig.update_layout(
        title=dict(
            text=f'{symbol} - Future Price Predictions Table<br><sub>Current Price: ${today_price:.2f}</sub>',
            x=0.5,
            font=dict(size=18, color='#1f2937')
        ),
        height=400,
        margin=dict(l=20, r=20, t=80, b=20)
    )
    
    return fig, df_predictions

In [13]:
def create_prediction_comparison_chart(symbol, today_price, future_dates, rf_predictions, lstm_predictions=None):
    """
    Create a comparison chart for different model predictions
    """
    fig = go.Figure()
    
    # Add current price as starting point
    all_dates = [future_dates[0] - timedelta(days=1)] + future_dates
    rf_prices = [today_price] + rf_predictions
    
    # Add RandomForest predictions
    fig.add_trace(go.Scatter(
        x=all_dates, 
        y=rf_prices,
        name='RandomForest Prediction',
        line=dict(color='#2E86AB', width=3),
        marker=dict(size=8, symbol='circle')
    ))
    
    # Add LSTM predictions if available
    if lstm_predictions:
        lstm_prices = [today_price] + lstm_predictions
        fig.add_trace(go.Scatter(
            x=all_dates, 
            y=lstm_prices,
            name='LSTM Prediction',
            line=dict(color='#F24236', width=3, dash='dot'),
            marker=dict(size=8, symbol='diamond')
        ))
    
    # Add current price marker
    fig.add_trace(go.Scatter(
        x=[all_dates[0]], 
        y=[today_price],
        name='Current Price',
        mode='markers',
        marker=dict(size=15, color='green', symbol='star')
    ))
    
    # Add horizontal line for current price reference
    fig.add_hline(
        y=today_price, 
        line_dash="dash", 
        line_color="gray", 
        annotation_text=f"Current: ${today_price:.2f}"
    )
    
    fig.update_layout(
        title=f'{symbol} - Future Price Predictions Comparison',
        xaxis_title='Date',
        yaxis_title='Price ($)',
        template='plotly_white',
        height=500,
        hovermode='x unified'
    )
    
    return fig


In [14]:

def create_professional_stock_visualization(df, actual_prices, predicted_prices, symbol, model_name, future_dates=None, future_prices=None):
    # Create a subset of the dataframe for the validation period
    val_dates = df.index[-len(actual_prices):]
    
    # Create subplots with secondary y-axis for volume
    fig = make_subplots(
        rows=2, cols=1,
        shared_xaxes=True,
        vertical_spacing=0.1,
        subplot_titles=(f'{symbol} Price Prediction with {model_name}', 'Volume'),
        row_width=[0.2, 0.7]
    )
    
    # Add actual price line
    fig.add_trace(
        go.Scatter(x=val_dates, y=actual_prices, name="Actual Price", 
                  line=dict(color='#2E86AB', width=2)),
        row=1, col=1
    )
    
    # Add predicted price line
    fig.add_trace(
        go.Scatter(x=val_dates, y=predicted_prices, name="Predicted Price", 
                  line=dict(color='#F24236', width=2, dash='dot')),
        row=1, col=1
    )
    
    # Add future predictions if available
    if future_dates is not None and future_prices is not None:
        fig.add_trace(
            go.Scatter(x=future_dates, y=future_prices, name="Future Prediction", 
                      line=dict(color='#00FF00', width=3, dash='dash'),
                      marker=dict(size=8, symbol='star')),
            row=1, col=1
        )
    
    # Add volume bar chart
    fig.add_trace(
        go.Bar(x=val_dates, y=df['Volume'].values[-len(actual_prices):], 
               name="Volume", marker_color='#A23B72'),
        row=2, col=1
    )
    
    # Add moving averages if available
    if 'SMA_50' in df.columns:
        fig.add_trace(
            go.Scatter(x=val_dates, y=df['SMA_50'].values[-len(actual_prices):], 
                       name="SMA 50", line=dict(color='orange', width=1)),
            row=1, col=1
        )
    
    # Update layout
    fig.update_layout(
        title=f'{symbol} Stock Price Prediction with {model_name}',
        yaxis_title='Price ($)',
        xaxis_rangeslider_visible=False,
        template='plotly_white',
        height=700,
        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)
    )
    
    # Update y-axis label
    fig.update_yaxes(title_text="Price ($)", row=1, col=1)
    fig.update_yaxes(title_text="Volume", row=2, col=1)
    
    # Add prediction accuracy annotation
    mae = mean_absolute_error(actual_prices, predicted_prices)
    mape = np.mean(np.abs((actual_prices - predicted_prices) / actual_prices)) * 100
    
    fig.add_annotation(
        x=0.02, y=0.98,
        xref="paper", yref="paper",
        text=f"MAE: ${mae:.2f}<br>MAPE: {mape:.2f}%",
        showarrow=False,
        font=dict(size=12),
        align="left",
        bordercolor="black",
        borderwidth=1,
        borderpad=4,
        bgcolor="white",
        opacity=0.8
    )
    
    return fig

In [15]:
def plot_technical_indicators(df, symbol):
    fig = make_subplots(
        rows=3, cols=1,
        shared_xaxes=True,
        vertical_spacing=0.05,
        subplot_titles=('Price with Bollinger Bands', 'RSI', 'MACD'),
        row_width=[0.2, 0.2, 0.6]
    )
    
    # Price with Bollinger Bands
    fig.add_trace(
        go.Scatter(x=df.index, y=df['Close'], name="Close", line=dict(color='#2E86AB')),
        row=1, col=1
    )
    
    if 'BB_H' in df.columns and 'BB_L' in df.columns:
        fig.add_trace(
            go.Scatter(x=df.index, y=df['BB_H'], name="Upper Band", line=dict(color='gray', dash='dash')),
            row=1, col=1
        )
        fig.add_trace(
            go.Scatter(x=df.index, y=df['BB_L'], name="Lower Band", line=dict(color='gray', dash='dash'),
                      fill='tonexty', fillcolor='rgba(128,128,128,0.1)'),
            row=1, col=1
        )
    
    # RSI
    if 'RSI' in df.columns:
        fig.add_trace(
            go.Scatter(x=df.index, y=df['RSI'], name="RSI", line=dict(color='purple')),
            row=2, col=1
        )
        # Add RSI overbought/oversold lines
        fig.add_hline(y=70, line_dash="dash", line_color="red", row=2, col=1)
        fig.add_hline(y=30, line_dash="dash", line_color="green", row=2, col=1)
    
    # MACD
    if 'MACD' in df.columns and 'MACD_SIGNAL' in df.columns:
        fig.add_trace(
            go.Scatter(x=df.index, y=df['MACD'], name="MACD", line=dict(color='blue')),
            row=3, col=1
        )
        fig.add_trace(
            go.Scatter(x=df.index, y=df['MACD_SIGNAL'], name="Signal", line=dict(color='red')),
            row=3, col=1
        )
    
    fig.update_layout(
        title=f'{symbol} Technical Indicators',
        height=800,
        showlegend=True,
        template='plotly_white'
    )
    
    return fig

# Stock symbol mapping for common stocks
STOCK_SYMBOL_MAP = {
    # US Stocks
    'AAPL': 'AAPL', 'APPLE': 'AAPL',
    'MSFT': 'MSFT', 'MICROSOFT': 'MSFT',
    'GOOGL': 'GOOGL', 'GOOGLE': 'GOOGL', 'ALPHABET': 'GOOGL',
    'AMZN': 'AMZN', 'AMAZON': 'AMZN',
    'TSLA': 'TSLA', 'TESLA': 'TSLA',
    'META': 'META', 'FACEBOOK': 'META',
    'NFLX': 'NFLX', 'NETFLIX': 'NFLX',
    'NVDA': 'NVDA', 'NVIDIA': 'NVDA',
    
    # Indian Stocks (NSE)
    'RELIANCE': 'RELIANCE.NS', 'L&T': 'LT.NS', 'LT': 'LT.NS', 'LARSEN': 'LT.NS',
    'TCS': 'TCS.NS', 'INFY': 'INFY.NS', 'INFOSYS': 'INFY.NS',
    'HDFC': 'HDFCBANK.NS', 'HDFCBANK': 'HDFCBANK.NS',
    'ICICI': 'ICICIBANK.NS', 'ICICIBANK': 'ICICIBANK.NS',
    'SBIN': 'SBIN.NS', 'SBI': 'SBIN.NS',
    'WIPRO': 'WIPRO.NS', 'AXISBANK': 'AXISBANK.NS',
    'KOTAKBANK': 'KOTAKBANK.NS', 'KOTAK': 'KOTAKBANK.NS',
    'BHARTI': 'BHARTIARTL.NS', 'AIRTEL': 'BHARTIARTL.NS',
    
    # International Stocks
    'SAMSUNG': '005930.KS', 'TOYOTA': 'TM', 'SONY': 'SONY',
    'SHELL': 'SHEL', 'BP': 'BP', 'TOTAL': 'TTE'
}


In [16]:
def get_correct_symbol(symbol_input):
    """Convert user input to correct Yahoo Finance symbol"""
    symbol_input = symbol_input.upper().strip()
    
    # Check if input is already in the map
    if symbol_input in STOCK_SYMBOL_MAP:
        return STOCK_SYMBOL_MAP[symbol_input]
    
    # Check if it's a known Indian stock without .NS
    if symbol_input + '.NS' in STOCK_SYMBOL_MAP.values():
        return symbol_input + '.NS'
    
    # If it contains special characters, try to clean it
    if '&' in symbol_input:
        # Handle cases like "L&T"
        cleaned = symbol_input.replace('&', '').replace(' ', '')
        if cleaned in STOCK_SYMBOL_MAP:
            return STOCK_SYMBOL_MAP[cleaned]
    
    # If no mapping found, return as is (user might know the exact symbol)
    return symbol_input

def suggest_symbols(partial_input):
    """Suggest possible symbols based on partial input"""
    partial = partial_input.upper()
    suggestions = []
    
    for name, symbol in STOCK_SYMBOL_MAP.items():
        if partial in name.upper() or partial in symbol.upper():
            suggestions.append((name, symbol))
    
    return suggestions[:5]  # Return top 5 suggestions

def predict_stock_price_enhanced(symbol_input, period='3y', n_ahead=5, future_days=7):
    """
    Enhanced stock prediction with better date handling and visualizations
    """
    # Check market status first
    get_market_status()
    
    # Get correct symbol format
    symbol = get_correct_symbol(symbol_input)
    
    print(f"\n{'='*60}")
    print(f"PREDICTING STOCK PRICE FOR: {symbol_input} -> {symbol}")
    print(f"{'='*60}")
    
    # Step 1: Fetch data with date information
    df = fetch_yf(symbol, period=period)
    if df is None:
        print(f"Could not fetch data for {symbol}. Please check the symbol and try again.")
        
        # Suggest similar symbols
        suggestions = suggest_symbols(symbol_input)
        if suggestions:
            print(f"\nDid you mean one of these?")
            for name, correct_symbol in suggestions:
                print(f"  {name} -> {correct_symbol}")
        
        return None
    
    # Show data date range
    print(f"📊 Data Range: {df.index[0].strftime('%Y-%m-%d')} to {df.index[-1].strftime('%Y-%m-%d')}")
    print(f"📈 Most recent price: ${df['Close'].iloc[-1]:.2f}")
    
    # Step 2: Add technical indicators
    df = add_technical_indicators(df)
    print(f"Data shape after technical indicators: {df.shape}")
    
    # Step 3: Prepare features
    features = ['Close','SMA_10','SMA_50','EMA_20','RSI','MACD','MACD_SIGNAL','BB_H','BB_L','Volume']
    available_features = [f for f in features if f in df.columns]
    df = df.dropna(subset=available_features)
    
    if len(df) < 100:
        print(f"Not enough data for {symbol}. Only {len(df)} records available.")
        return None
    
    # Step 4: Train models
    print("\n🤖 Training Classical Models...")
    X, y, df_supervised = prepare_supervised(df, feature_cols=available_features, n_ahead=n_ahead)
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)
    
    scaler = MinMaxScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    results = train_and_eval_models(X_train_scaled, X_test_scaled, y_train, y_test)
    
    print("\n📊 Classical Models Performance:")
    for k, v in results.items():
        print(f"{k:15} MSE: {v['mse']:.4f}, MAE: {v['mae']:.4f}, R²: {v['r2']:.4f}")
    
    # Step 5: LSTM Model
    print("\n🧠 Training LSTM Model...")
    lstm_df = df[['Close','Volume']].copy()
    scaler_lstm = MinMaxScaler()
    scaled = scaler_lstm.fit_transform(lstm_df)
    
    SEQ_LEN = 60
    X_seq, y_seq = create_sequences(scaled, seq_length=SEQ_LEN)
    
    lstm_results = None
    if len(X_seq) >= 100:
        split = int(0.8 * len(X_seq))
        X_tr, X_val = X_seq[:split], X_seq[split:]
        y_tr, y_val = y_seq[:split], y_seq[split:]
        
        lstm = build_lstm(input_shape=(X_tr.shape[1], X_tr.shape[2]))
        early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=8, restore_best_weights=True)
        
        history = lstm.fit(X_tr, y_tr, validation_data=(X_val, y_val), 
                          epochs=50, batch_size=32, callbacks=[early], verbose=0)
        
        val_preds = lstm.predict(X_val, verbose=0).flatten()
        
        y_val_un = scaler_lstm.inverse_transform(
            np.concatenate([y_val.reshape(-1,1), np.zeros((len(y_val),1))], axis=1)
        )[:,0]
        preds_un = scaler_lstm.inverse_transform(
            np.concatenate([val_preds.reshape(-1,1), np.zeros((len(val_preds),1))], axis=1)
        )[:,0]
        
        lstm_mae = mean_absolute_error(y_val_un, preds_un)
        lstm_mse = mean_squared_error(y_val_un, preds_un)
        print(f"LSTM Performance: MSE: {lstm_mse:.4f}, MAE: {lstm_mae:.4f}")
        
        lstm_results = {
            'actual': y_val_un,
            'predicted': preds_un,
            'model': lstm,
            'scaler': scaler_lstm
        }
    else:
        print(f"Not enough sequences for LSTM training. Need at least 100, got {len(X_seq)}")
    
    # Step 6: Enhanced Future Predictions
    print(f"\n🔮 Generating {future_days}-Day Future Predictions...")
    
    last_date = df.index[-1]
    future_dates = generate_future_dates_enhanced(last_date, days_ahead=future_days)
    
    # Future predictions
    rf_model = results['RandomForest']['model']
    rf_future_predictions = predict_future_prices(rf_model, scaler, df, available_features, days_ahead=future_days)
    
    lstm_future_predictions = None
    if lstm_results is not None:
        lstm_future_predictions = predict_future_lstm(lstm_results['model'], lstm_results['scaler'], 
                                                     df, sequence_length=SEQ_LEN, days_ahead=future_days)
    
    # Step 7: Create Enhanced Visualizations
    today_price = df['Close'].iloc[-1]
    
    print("\n🎨 Creating Enhanced Visualizations...")
    
    # 1. Future Predictions Table
    table_fig, predictions_df = create_future_predictions_table(
        symbol, today_price, future_dates, rf_future_predictions, lstm_future_predictions
    )
    table_fig.show()
    
    # 2. Comparison Chart
    comparison_fig = create_prediction_comparison_chart(
        symbol, today_price, future_dates, rf_future_predictions, lstm_future_predictions
    )
    comparison_fig.show()
    
    # 3. Classical models visualization with future predictions
    best_classical = results['RandomForest']
    classical_fig = create_professional_stock_visualization(
        df, y_test, best_classical['predictions'], symbol, "RandomForest",
        future_dates, rf_future_predictions
    )
    classical_fig.show()
    
    # 4. LSTM visualization if available
    if lstm_results is not None:
        lstm_fig = create_professional_stock_visualization(
            df, lstm_results['actual'], lstm_results['predicted'], symbol, "LSTM",
            future_dates, lstm_future_predictions
        )
        lstm_fig.show()
    
    # 5. Technical indicators visualization
    tech_fig = plot_technical_indicators(df, symbol)
    tech_fig.show()
    
    # Display detailed predictions in console
    print(f"\n{'='*80}")
    print(f"📈 DETAILED FUTURE PREDICTIONS FOR {symbol}")
    print(f"{'='*80}")
    print(f"📅 Analysis Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    print(f"💰 Current Price: ${today_price:.2f}")
    print(f"📊 Last Trading Day: {last_date.strftime('%Y-%m-%d (%A)')}")
    print(f"\n🔮 Next {future_days} Trading Days:")
    print("-" * 80)
    
    for i, date in enumerate(future_dates):
        rf_pred = rf_future_predictions[i]
        rf_change = ((rf_pred - today_price) / today_price) * 100
        
        print(f"📅 {date.strftime('%Y-%m-%d (%A)')}: ", end="")
        print(f"RF: ${rf_pred:.2f} ({rf_change:+.2f}%)", end="")
        
        if lstm_future_predictions:
            lstm_pred = lstm_future_predictions[i]
            lstm_change = ((lstm_pred - today_price) / today_price) * 100
            print(f" | LSTM: ${lstm_pred:.2f} ({lstm_change:+.2f}%)", end="")
            
            # Show which model is more bullish/bearish
            if abs(rf_change) > abs(lstm_change):
                stronger = "RF more volatile"
            elif abs(lstm_change) > abs(rf_change):
                stronger = "LSTM more volatile"
            else:
                stronger = "Similar volatility"
            print(f" | {stronger}", end="")
        
        # Add trend emoji
        if rf_change > 2:
            print(" 🚀")
        elif rf_change > 0:
            print(" 📈")
        elif rf_change < -2:
            print(" 📉")
        elif rf_change < 0:
            print(" 🔻")
        else:
            print(" ➡️")
    
    # Summary statistics
    rf_avg_change = np.mean([((p - today_price) / today_price) * 100 for p in rf_future_predictions])
    rf_max_price = max(rf_future_predictions)
    rf_min_price = min(rf_future_predictions)
    
    print(f"\n📊 SUMMARY STATISTICS:")
    print(f"   RandomForest Average Change: {rf_avg_change:+.2f}%")
    print(f"   Predicted Range: ${rf_min_price:.2f} - ${rf_max_price:.2f}")
    print(f"   Price Volatility: {((rf_max_price - rf_min_price) / today_price) * 100:.2f}%")
    
    if lstm_future_predictions:
        lstm_avg_change = np.mean([((p - today_price) / today_price) * 100 for p in lstm_future_predictions])
        print(f"   LSTM Average Change: {lstm_avg_change:+.2f}%")
    
    # Step 8: Save models
    print("\n💾 Saving Models...")
    
    # Save RandomForest model
    rf_filename = f"{symbol.replace('.', '_')}_rf_model.joblib"
    joblib.dump(results['RandomForest']['model'], rf_filename)
    print(f"RandomForest model saved as {rf_filename}")
    
    # Save the MinMaxScaler used for classical models
    scaler_filename = f"{symbol.replace('.', '_')}_scaler.joblib"
    joblib.dump(scaler, scaler_filename)
    print(f"Scaler saved as {scaler_filename}")
    
    # Save LSTM model and its scaler
    if lstm_results is not None:
        lstm_filename = f"{symbol.replace('.', '_')}_lstm_model.keras"
        lstm_results['model'].save(lstm_filename)
        print(f"LSTM model saved as {lstm_filename}")
        
        lstm_scaler_filename = f"{symbol.replace('.', '_')}_lstm_scaler.joblib"
        joblib.dump(lstm_results['scaler'], lstm_scaler_filename)
        print(f"LSTM scaler saved as {lstm_scaler_filename}")
    
    print(f"\n✅ Prediction completed for {symbol}!")
    
    return {
        'classical_results': results,
        'lstm_results': lstm_results,
        'dataframe': df,
        'original_symbol': symbol_input,
        'correct_symbol': symbol,
        'future_predictions': {
            'dates': future_dates,
            'rf_predictions': rf_future_predictions,
            'lstm_predictions': lstm_future_predictions
        },
        'predictions_table': predictions_df,
        'current_price': today_price
    }


In [17]:

def create_portfolio_analysis(symbols_list, future_days=7):
    """
    Analyze multiple stocks and create a portfolio comparison
    """
    print(f"\n{'='*80}")
    print(f"📊 PORTFOLIO ANALYSIS FOR MULTIPLE STOCKS")
    print(f"{'='*80}")
    
    portfolio_data = []
    all_results = {}
    
    for symbol_input in symbols_list:
        try:
            print(f"\n🔄 Processing {symbol_input}...")
            result = predict_stock_price_enhanced(symbol_input, future_days=future_days)
            if result is not None:
                all_results[symbol_input] = result
                
                # Extract key data for portfolio comparison
                current_price = result['current_price']
                rf_predictions = result['future_predictions']['rf_predictions']
                lstm_predictions = result['future_predictions']['lstm_predictions']
                
                # Calculate metrics
                rf_avg_change = np.mean([((p - current_price) / current_price) * 100 for p in rf_predictions])
                rf_volatility = np.std([((p - current_price) / current_price) * 100 for p in rf_predictions])
                
                portfolio_data.append({
                    'Symbol': result['correct_symbol'],
                    'Current_Price': current_price,
                    'RF_Avg_Change_%': rf_avg_change,
                    'RF_Volatility_%': rf_volatility,
                    'Trend': '📈' if rf_avg_change > 0 else '📉' if rf_avg_change < 0 else '➡️'
                })
                
        except Exception as e:
            print(f"❌ Error processing {symbol_input}: {str(e)}")
            continue
    
    if portfolio_data:
        # Create portfolio comparison table
        portfolio_df = pd.DataFrame(portfolio_data)
        
        # Create portfolio comparison visualization
        fig = go.Figure()
        
        # Prepare portfolio table
        header_values = ['Symbol', 'Current Price ($)', 'Avg Change (%)', 'Volatility (%)', 'Trend']
        cell_values = [
            portfolio_df['Symbol'].tolist(),
            [f"${x:.2f}" for x in portfolio_df['Current_Price']],
            [f"{x:+.2f}%" for x in portfolio_df['RF_Avg_Change_%']],
            [f"{x:.2f}%" for x in portfolio_df['RF_Volatility_%']],
            portfolio_df['Trend'].tolist()
        ]
        
        # Color code based on performance
        def get_portfolio_colors(changes):
            colors = []
            for change in changes:
                if change > 1:
                    colors.append('#d4edda')  # Light green for strong positive
                elif change > 0:
                    colors.append('#f0f9f0')  # Very light green for positive
                elif change < -1:
                    colors.append('#f8d7da')  # Light red for strong negative
                elif change < 0:
                    colors.append('#fdf2f2')  # Very light red for negative
                else:
                    colors.append('#fff3cd')  # Light yellow for neutral
            return colors
        
        change_colors = get_portfolio_colors(portfolio_df['RF_Avg_Change_%'])
        
        fig.add_trace(go.Table(
            header=dict(
                values=header_values,
                fill_color='#1f2937',
                font=dict(color='white', size=14, family="Arial Black"),
                align='center',
                height=40
            ),
            cells=dict(
                values=cell_values,
                fill_color=[['white'] * len(portfolio_df),  # Symbol
                           ['white'] * len(portfolio_df),   # Current Price
                           change_colors,                   # Avg Change
                           ['white'] * len(portfolio_df),   # Volatility
                           change_colors],                  # Trend
                font=dict(size=12),
                align='center',
                height=35
            )
        ))
        
        fig.update_layout(
            title='📊 Portfolio Analysis - Stock Performance Comparison',
            height=300 + len(portfolio_df) * 35,
            margin=dict(l=20, r=20, t=80, b=20)
        )
        
        fig.show()
        
        # Display portfolio summary
        print(f"\n{'='*60}")
        print(f"📊 PORTFOLIO SUMMARY")
        print(f"{'='*60}")
        print(f"Total Stocks Analyzed: {len(portfolio_df)}")
        
        # Best and worst performers
        best_performer = portfolio_df.loc[portfolio_df['RF_Avg_Change_%'].idxmax()]
        worst_performer = portfolio_df.loc[portfolio_df['RF_Avg_Change_%'].idxmin()]
        
        print(f" Best Performer: {best_performer['Symbol']} ({best_performer['RF_Avg_Change_%']:+.2f}%)")
        print(f" Worst Performer: {worst_performer['Symbol']} ({worst_performer['RF_Avg_Change_%']:+.2f}%)")
        
        # Risk analysis
        lowest_risk = portfolio_df.loc[portfolio_df['RF_Volatility_%'].idxmin()]
        highest_risk = portfolio_df.loc[portfolio_df['RF_Volatility_%'].idxmax()]
        
        print(f" Lowest Risk: {lowest_risk['Symbol']} ({lowest_risk['RF_Volatility_%']:.2f}% volatility)")
        print(f" Highest Risk: {highest_risk['Symbol']} ({highest_risk['RF_Volatility_%']:.2f}% volatility)")
        
        return all_results, portfolio_df
    
    return all_results, None


In [20]:
# Main execution block
if __name__ == "__main__":
    print("🚀 ENHANCED STOCK PRICE PREDICTION SYSTEM")
    print("="*60)
    print("Features:")
    print("✅ Market status checking")
    print("✅ Enhanced date handling (weekend awareness)")
    print("✅ Interactive prediction tables")
    print("✅ Multiple model comparisons")
    print("✅ Portfolio analysis")
    print("✅ Professional visualizations")
    print("="*60)
    
    print("\nYou can enter:")
    print("- US Stocks: AAPL, MSFT, GOOGL, TSLA, AMZN, etc.")
    print("- Indian Stocks: RELIANCE, TCS, INFY, L&T, HDFC, etc.")
    print("- Other: Use exact Yahoo Finance symbols")
    print("\nEnter stock symbols (type 'done' when finished)")
    print("="*60)
    
    stocks_to_predict = []
    while True:
        symbol = input("\nEnter stock symbol: ").strip()
        if symbol.lower() == 'done':
            break
        if symbol:
            stocks_to_predict.append(symbol)
    
    if not stocks_to_predict:
        # Default stocks if none provided
        stocks_to_predict = ['AAPL', 'MSFT', 'RELIANCE', 'TCS']
        print(f"\nNo symbols entered. Using default stocks: {stocks_to_predict}")
    
    # Choose analysis type
    if len(stocks_to_predict) > 1:
        analysis_type = input(f"\nChoose analysis type:\n1. Individual analysis\n2. Portfolio analysis\nEnter choice (1/2): ").strip()
        if analysis_type == '2':
            all_results, portfolio_df = create_portfolio_analysis(stocks_to_predict, future_days=7)
        else:
            # Individual analysis
            all_results = {}
            for symbol_input in stocks_to_predict:
                try:
                    result = predict_stock_price_enhanced(symbol_input, future_days=7)
                    if result is not None:
                        all_results[symbol_input] = result
                except Exception as e:
                    print(f"Error processing {symbol_input}: {str(e)}")
                    continue
    else:
        # Single stock analysis
        all_results = {}
        symbol_input = stocks_to_predict[0]
        try:
            result = predict_stock_price_enhanced(symbol_input, future_days=7)
            if result is not None:
                all_results[symbol_input] = result
        except Exception as e:
            print(f"Error processing {symbol_input}: {str(e)}")
    
    # Final summary report
    print(f"\n{'='*80}")
    print("📊 FINAL PREDICTION SUMMARY REPORT")
    print(f"{'='*80}")
    
    for symbol_input, results in all_results.items():
        classical = results['classical_results']['RandomForest']
        symbol_used = results['correct_symbol']
        future_preds = results['future_predictions']
        
        print(f"\n📈 {symbol_input} -> {symbol_used}:")
        print(f"   Current Price: ${results['current_price']:.2f}")
        print(f"   RandomForest - MAE: ${classical['mae']:.2f}, R²: {classical['r2']:.4f}")
        
        if results['lstm_results'] is not None:
            lstm_mae = mean_absolute_error(
                results['lstm_results']['actual'], 
                results['lstm_results']['predicted']
            )
            print(f"   LSTM - MAE: ${lstm_mae:.2f}")
        
        # Show next day prediction
        if future_preds['dates']:
            next_date = future_preds['dates'][0].strftime('%Y-%m-%d')
            next_rf_pred = future_preds['rf_predictions'][0]
            today_price = results['current_price']
            change = ((next_rf_pred - today_price) / today_price) * 100
            
            print(f"   Next Trading Day ({next_date}): ${next_rf_pred:.2f} ({change:+.2f}%)")
    
    print(f"\nAnalysis completed for {len(all_results)} stocks!")
    print("\nAvailable symbols for future analysis:")
    print("🇺🇸 US Stocks: AAPL, MSFT, GOOGL, AMZN, TSLA, META, NFLX, NVDA")
    print("🇮🇳 Indian Stocks: RELIANCE, TCS, INFY, L&T, HDFC, SBIN, WIPRO")
    print("International: Use Yahoo Finance symbols")
    
    print(f"\nModels and scalers have been saved for future use!")
    print("Run the script again to analyze different stocks!")

🚀 ENHANCED STOCK PRICE PREDICTION SYSTEM
Features:
✅ Market status checking
✅ Enhanced date handling (weekend awareness)
✅ Interactive prediction tables
✅ Multiple model comparisons
✅ Portfolio analysis
✅ Professional visualizations

You can enter:
- US Stocks: AAPL, MSFT, GOOGL, TSLA, AMZN, etc.
- Indian Stocks: RELIANCE, TCS, INFY, L&T, HDFC, etc.
- Other: Use exact Yahoo Finance symbols

Enter stock symbols (type 'done' when finished)



MARKET STATUS CHECK
Current Date/Time: 2025-09-28 13:42:34
Today is: Sunday
⚠️  Markets are CLOSED (Weekend)
💡 Most recent trading day data available

PREDICTING STOCK PRICE FOR: TCS -> TCS.NS
Successfully fetched data for TCS.NS. Shape: (744, 5)
📊 Data Range: 2022-09-26 to 2025-09-26
📈 Most recent price: $2899.10
Data shape after technical indicators: (695, 13)

🤖 Training Classical Models...

📊 Classical Models Performance:
LinearRegression MSE: 11540.1254, MAE: 82.0311, R²: 0.6450
RandomForest    MSE: 13922.3589, MAE: 88.8116, R²: 0.5717
SVR             MSE: 94596.1703, MAE: 263.7846, R²: -1.9103

🧠 Training LSTM Model...
LSTM Performance: MSE: 5861.9144, MAE: 56.5265

🔮 Generating 7-Day Future Predictions...

📅 GENERATING FUTURE TRADING DATES
Last trading day in data: 2025-09-26 (Friday)
⏭️  2025-09-27 (Saturday) - Skipped (Weekend)
⏭️  2025-09-28 (Sunday) - Skipped (Weekend)
✅ 2025-09-29 (Monday) - Trading Day
✅ 2025-09-30 (Tuesday) - Trading Day
✅ 2025-10-01 (Wednesday) - Tradin


📈 DETAILED FUTURE PREDICTIONS FOR TCS.NS
📅 Analysis Date: 2025-09-28 13:43:47
💰 Current Price: $2899.10
📊 Last Trading Day: 2025-09-26 (Friday)

🔮 Next 7 Trading Days:
--------------------------------------------------------------------------------
📅 2025-09-29 (Monday): RF: $3041.15 (+4.90%) | LSTM: $3064.30 (+5.70%) | LSTM more volatile 🚀
📅 2025-09-30 (Tuesday): RF: $3035.24 (+4.70%) | LSTM: $3056.95 (+5.44%) | LSTM more volatile 🚀
📅 2025-10-01 (Wednesday): RF: $3035.45 (+4.70%) | LSTM: $3053.12 (+5.31%) | LSTM more volatile 🚀
📅 2025-10-02 (Thursday): RF: $3035.45 (+4.70%) | LSTM: $3051.64 (+5.26%) | LSTM more volatile 🚀
📅 2025-10-03 (Friday): RF: $3035.45 (+4.70%) | LSTM: $3052.09 (+5.28%) | LSTM more volatile 🚀
📅 2025-10-06 (Monday): RF: $3035.45 (+4.70%) | LSTM: $3053.97 (+5.34%) | LSTM more volatile 🚀
📅 2025-10-07 (Tuesday): RF: $3035.45 (+4.70%) | LSTM: $3056.37 (+5.42%) | LSTM more volatile 🚀

📊 SUMMARY STATISTICS:
   RandomForest Average Change: +4.73%
   Predicted Range: $30