<a href="https://colab.research.google.com/github/Shashank-agastya/Stock_1/blob/main/stock_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Complete Stock Market Time Series Forecasting Project
# ALL REQUIRED MODELS: ARIMA, SARIMA, Prophet, LSTM

# Install required packages
#!pip install yfinance prophet plotly streamlit statsmodels tensorflow scikit-learn -q

import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from prophet import Prophet
from sklearn.metrics import mean_absolute_percentage_error, mean_squared_error
from sklearn.preprocessing import MinMaxScaler
import warnings
warnings.filterwarnings('ignore')

# For ARIMA/SARIMA models
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import adfuller

# For LSTM model
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

class CompleteStockForecaster:
    def __init__(self, symbol='AAPL', period='2y'):
        self.symbol = symbol
        self.period = period
        self.data = None
        self.models = {}
        self.forecasts = {}
        self.metrics = {}

    def fetch_data(self):
        """Fetch stock data using yfinance"""
        print(f"📈 Fetching data for {self.symbol}...")
        ticker = yf.Ticker(self.symbol)
        self.data = ticker.history(period=self.period)
        print(f"✅ Data fetched: {len(self.data)} records")
        print(f"📅 Date range: {self.data.index[0].strftime('%Y-%m-%d')} to {self.data.index[-1].strftime('%Y-%m-%d')}")
        return self.data

    def analyze_time_series(self):
        """Analyze time series properties"""
        print("\n🔍 TIME SERIES ANALYSIS:")

        # Check stationarity
        result = adfuller(self.data['Close'].dropna())
        print(f"📊 Stationarity Test (ADF):")
        print(f"   - ADF Statistic: {result[0]:.4f}")
        print(f"   - p-value: {result[1]:.4f}")
        print(f"   - Is Stationary: {'Yes' if result[1] < 0.05 else 'No'}")

        # Seasonal decomposition (use shorter period that fits our data)
        period = min(50, len(self.data['Close']) // 3)  # Use 50 days or 1/3 of data, whichever is smaller
        if len(self.data['Close']) >= 2 * period:
            decomposition = seasonal_decompose(self.data['Close'], model='multiplicative', period=period)
            print(f"📈 Seasonal decomposition completed with period={period}")
        else:
            print(f"⚠️  Insufficient data for seasonal decomposition (need {2*period}, have {len(self.data['Close'])})")
            decomposition = None

        return decomposition

    def prepare_data_differencing(self, data, d=1):
        """Make data stationary for ARIMA"""
        diff_data = data.copy()
        for i in range(d):
            diff_data = diff_data.diff().dropna()
        return diff_data

    # MODEL 1: ARIMA
    def train_arima(self, order=(5,1,0)):
        """Train ARIMA model"""
        print(f"\n🔄 Training ARIMA{order} model...")

        try:
            # Make data stationary
            stationary_data = self.prepare_data_differencing(self.data['Close'], d=order[1])

            # Fit ARIMA model
            model = ARIMA(self.data['Close'], order=order)
            fitted_model = model.fit()

            self.models['ARIMA'] = fitted_model
            print(f"✅ ARIMA model trained successfully!")
            print(f"   - AIC: {fitted_model.aic:.2f}")

            return fitted_model

        except Exception as e:
            print(f"❌ ARIMA training failed: {e}")
            return None

    # MODEL 2: SARIMA
    def train_sarima(self, order=(1,1,1), seasonal_order=(1,1,1,12)):
        """Train SARIMA model"""
        print(f"\n🔄 Training SARIMA{order}x{seasonal_order} model...")

        try:
            model = SARIMAX(self.data['Close'],
                           order=order,
                           seasonal_order=seasonal_order)
            fitted_model = model.fit(disp=False)

            self.models['SARIMA'] = fitted_model
            print(f"✅ SARIMA model trained successfully!")
            print(f"   - AIC: {fitted_model.aic:.2f}")

            return fitted_model

        except Exception as e:
            print(f"❌ SARIMA training failed: {e}")
            return None

    # MODEL 3: Prophet
    def train_prophet(self):
        """Train Facebook Prophet model"""
        print(f"\n🔄 Training Prophet model...")

        try:
            # Prepare data
            df = self.data.reset_index()
            df = df[['Date', 'Close']].rename(columns={'Date': 'ds', 'Close': 'y'})
            df['ds'] = df['ds'].dt.tz_localize(None)  # Remove timezone

            # Train model
            model = Prophet(
                daily_seasonality=True,
                weekly_seasonality=True,
                yearly_seasonality=True,
                changepoint_prior_scale=0.05
            )
            model.fit(df)

            self.models['Prophet'] = {'model': model, 'data': df}
            print(f"✅ Prophet model trained successfully!")

            return model

        except Exception as e:
            print(f"❌ Prophet training failed: {e}")
            return None

    # MODEL 4: LSTM
    def prepare_lstm_data(self, lookback=60):
        """Prepare data for LSTM model"""
        # Scale the data
        scaler = MinMaxScaler(feature_range=(0, 1))
        scaled_data = scaler.fit_transform(self.data['Close'].values.reshape(-1, 1))

        # Create sequences
        X, y = [], []
        for i in range(lookback, len(scaled_data)):
            X.append(scaled_data[i-lookback:i, 0])
            y.append(scaled_data[i, 0])

        X, y = np.array(X), np.array(y)
        X = X.reshape((X.shape[0], X.shape[1], 1))

        return X, y, scaler

    def train_lstm(self, lookback=60, epochs=50):
        """Train LSTM model"""
        print(f"\n🔄 Training LSTM model...")

        try:
            # Prepare data
            X, y, scaler = self.prepare_lstm_data(lookback)

            # Split data
            train_size = int(len(X) * 0.8)
            X_train, X_test = X[:train_size], X[train_size:]
            y_train, y_test = y[:train_size], y[train_size:]

            # Build LSTM model
            model = Sequential([
                LSTM(50, return_sequences=True, input_shape=(lookback, 1)),
                Dropout(0.2),
                LSTM(50, return_sequences=True),
                Dropout(0.2),
                LSTM(50),
                Dropout(0.2),
                Dense(1)
            ])

            model.compile(optimizer='adam', loss='mean_squared_error')

            # Train model
            history = model.fit(X_train, y_train,
                              epochs=epochs,
                              batch_size=32,
                              validation_data=(X_test, y_test),
                              verbose=0)

            self.models['LSTM'] = {
                'model': model,
                'scaler': scaler,
                'lookback': lookback,
                'history': history
            }

            print(f"✅ LSTM model trained successfully!")
            print(f"   - Final Loss: {history.history['loss'][-1]:.6f}")

            return model

        except Exception as e:
            print(f"❌ LSTM training failed: {e}")
            return None

    def forecast_all_models(self, days=30):
        """Generate forecasts from all models"""
        print(f"\n🔮 Generating {days}-day forecasts...")

        forecasts = {}

        # ARIMA Forecast
        if 'ARIMA' in self.models and self.models['ARIMA'] is not None:
            try:
                forecast = self.models['ARIMA'].forecast(steps=days)
                forecasts['ARIMA'] = forecast
                print("✅ ARIMA forecast generated")
            except Exception as e:
                print(f"❌ ARIMA forecast failed: {e}")

        # SARIMA Forecast
        if 'SARIMA' in self.models and self.models['SARIMA'] is not None:
            try:
                forecast = self.models['SARIMA'].forecast(steps=days)
                forecasts['SARIMA'] = forecast
                print("✅ SARIMA forecast generated")
            except Exception as e:
                print(f"❌ SARIMA forecast failed: {e}")

        # Prophet Forecast
        if 'Prophet' in self.models and self.models['Prophet'] is not None:
            try:
                model = self.models['Prophet']['model']
                future = model.make_future_dataframe(periods=days)
                forecast = model.predict(future)
                forecasts['Prophet'] = forecast.tail(days)['yhat'].values
                print("✅ Prophet forecast generated")
            except Exception as e:
                print(f"❌ Prophet forecast failed: {e}")

        # LSTM Forecast
        if 'LSTM' in self.models and self.models['LSTM'] is not None:
            try:
                lstm_info = self.models['LSTM']
                model = lstm_info['model']
                scaler = lstm_info['scaler']
                lookback = lstm_info['lookback']

                # Get last sequence
                last_sequence = scaler.transform(self.data['Close'].tail(lookback).values.reshape(-1, 1))

                predictions = []
                current_sequence = last_sequence.reshape(1, lookback, 1)

                for _ in range(days):
                    pred = model.predict(current_sequence, verbose=0)
                    predictions.append(pred[0, 0])

                    # Update sequence
                    current_sequence = np.roll(current_sequence, -1, axis=1)
                    current_sequence[0, -1, 0] = pred[0, 0]

                # Inverse transform
                predictions = scaler.inverse_transform(np.array(predictions).reshape(-1, 1)).flatten()
                forecasts['LSTM'] = predictions
                print("✅ LSTM forecast generated")

            except Exception as e:
                print(f"❌ LSTM forecast failed: {e}")

        self.forecasts = forecasts
        return forecasts

    def evaluate_models(self):
        """Evaluate all models"""
        print(f"\n📊 EVALUATING MODEL PERFORMANCE...")

        metrics = {}
        actual_prices = self.data['Close'].tail(50).values  # Last 50 days for evaluation

        for model_name, model in self.models.items():
            if model is None:
                continue

            try:
                if model_name == 'ARIMA':
                    fitted_values = model.fittedvalues.tail(50).values
                elif model_name == 'SARIMA':
                    fitted_values = model.fittedvalues.tail(50).values
                elif model_name == 'Prophet':
                    # Get fitted values from Prophet
                    df = model['data']
                    prophet_model = model['model']
                    fitted = prophet_model.predict(df)
                    fitted_values = fitted['yhat'].tail(50).values
                elif model_name == 'LSTM':
                    # Evaluate LSTM on test data
                    X, y, scaler = self.prepare_lstm_data()
                    test_start = int(len(X) * 0.8)
                    X_test = X[test_start:]
                    y_test = y[test_start:]

                    predictions = model['model'].predict(X_test, verbose=0)
                    fitted_values = scaler.inverse_transform(predictions).flatten()
                    actual_prices = scaler.inverse_transform(y_test.reshape(-1, 1)).flatten()

                # Calculate metrics
                mape = mean_absolute_percentage_error(actual_prices, fitted_values) * 100
                rmse = np.sqrt(mean_squared_error(actual_prices, fitted_values))

                metrics[model_name] = {
                    'MAPE': round(mape, 2),
                    'RMSE': round(rmse, 2)
                }

                print(f"✅ {model_name}: MAPE={mape:.2f}%, RMSE={rmse:.2f}")

            except Exception as e:
                print(f"❌ {model_name} evaluation failed: {e}")
                metrics[model_name] = {'MAPE': 'N/A', 'RMSE': 'N/A'}

        self.metrics = metrics
        return metrics

    def plot_comprehensive_results(self, days=30):
        """Create comprehensive visualization"""
        fig = make_subplots(
            rows=2, cols=2,
            subplot_titles=(
                'Historical Data & Forecasts',
                'Model Performance Comparison',
                'Forecast Comparison',
                'Time Series Decomposition'
            ),
            specs=[[{"colspan": 2}, None],
                   [{"type": "bar"}, {"type": "scatter"}]]
        )

        # Historical data
        dates = self.data.index
        fig.add_trace(
            go.Scatter(x=dates, y=self.data['Close'],
                      name='Historical', line=dict(color='blue')),
            row=1, col=1
        )

        # Forecasts
        colors = {'ARIMA': 'red', 'SARIMA': 'green', 'Prophet': 'purple', 'LSTM': 'orange'}
        future_dates = pd.date_range(start=dates[-1] + pd.Timedelta(days=1), periods=days)

        for model_name, forecast in self.forecasts.items():
            if forecast is not None:
                fig.add_trace(
                    go.Scatter(x=future_dates, y=forecast,
                              name=f'{model_name} Forecast',
                              line=dict(color=colors.get(model_name, 'black'), dash='dash')),
                    row=1, col=1
                )

        # Model performance bar chart
        if self.metrics:
            models = list(self.metrics.keys())
            mapes = [self.metrics[m]['MAPE'] if self.metrics[m]['MAPE'] != 'N/A' else 0 for m in models]

            fig.add_trace(
                go.Bar(x=models, y=mapes, name='MAPE (%)', marker_color='lightblue'),
                row=2, col=1
            )

        # Forecast values comparison
        if self.forecasts:
            for model_name, forecast in self.forecasts.items():
                if forecast is not None:
                    fig.add_trace(
                        go.Scatter(x=list(range(1, len(forecast)+1)), y=forecast,
                                  name=f'{model_name}', mode='lines+markers'),
                        row=2, col=2
                    )

        fig.update_layout(height=800, title_text=f"Complete Stock Analysis: {self.symbol}")
        fig.show()

    def generate_comprehensive_report(self):
        """Generate detailed analysis report"""
        current_price = self.data['Close'].iloc[-1]

        report = f"""
        📈 COMPREHENSIVE STOCK MARKET ANALYSIS REPORT
        {'='*60}

        🏢 STOCK INFORMATION:
        - Symbol: {self.symbol}
        - Current Price: ${current_price:.2f}
        - Analysis Period: {self.period}
        - Data Points: {len(self.data)} records
        - Date Range: {self.data.index[0].strftime('%Y-%m-%d')} to {self.data.index[-1].strftime('%Y-%m-%d')}

        🤖 MODELS IMPLEMENTED:
        """

        for model_name in ['ARIMA', 'SARIMA', 'Prophet', 'LSTM']:
            status = "✅ Trained" if model_name in self.models and self.models[model_name] is not None else "❌ Failed"
            report += f"        - {model_name}: {status}\n"

        report += f"""
        📊 MODEL PERFORMANCE METRICS:
        """

        for model_name, metrics in self.metrics.items():
            report += f"        - {model_name}: MAPE={metrics['MAPE']}%, RMSE={metrics['RMSE']}\n"

        if self.forecasts:
            report += f"""
        🔮 FORECAST SUMMARY (30 days):
        """
            for model_name, forecast in self.forecasts.items():
                if forecast is not None and len(forecast) > 0:
                    forecast_price = forecast.iloc[-1] if isinstance(forecast, pd.Series) else forecast[-1]
                    change = ((forecast_price - current_price) / current_price) * 100
                    report += f"        - {model_name}: ${forecast_price:.2f} ({change:+.2f}%)\n"

        # Find best model
        best_model = None
        best_mape = float('inf')

        for model_name, metrics in self.metrics.items():
            if metrics['MAPE'] != 'N/A' and metrics['MAPE'] < best_mape:
                best_mape = metrics['MAPE']
                best_model = model_name

        report += f"""
        🏆 BEST PERFORMING MODEL: {best_model} (MAPE: {best_mape}%)

        📋 TECHNICAL ANALYSIS SUMMARY:
        - Trend: {'Upward' if self.data['Close'].iloc[-1] > self.data['Close'].iloc[-30] else 'Downward'}
        - Volatility: {((self.data['Close'].std() / self.data['Close'].mean()) * 100):.2f}%
        - Volume Trend: {self.data['Volume'].tail(10).mean()/1000000:.2f}M avg (last 10 days)

        ⚠️  DISCLAIMER:
        This analysis is for educational purposes only.
        Not financial advice. Past performance doesn't guarantee future results.
        Always consult with financial professionals before making investment decisions.
        """

        return report

# MAIN EXECUTION
if __name__ == "__main__":
    print("🚀 STARTING COMPLETE STOCK MARKET ANALYSIS...")
    print("=" * 60)

    # Initialize forecaster
    forecaster = CompleteStockForecaster('AAPL', '2y')

    # Step 1: Data Collection & Analysis
    data = forecaster.fetch_data()
    decomposition = forecaster.analyze_time_series()

    # Step 2: Train All Models
    print("\n🤖 TRAINING ALL REQUIRED MODELS:")
    print("-" * 40)

    # Train each model
    forecaster.train_arima(order=(5,1,0))
    forecaster.train_sarima(order=(1,1,1), seasonal_order=(1,1,1,12))
    forecaster.train_prophet()
    forecaster.train_lstm(epochs=20)  # Reduced epochs for faster training

    # Step 3: Generate Forecasts
    forecasts = forecaster.forecast_all_models(days=30)

    # Step 4: Evaluate Models
    metrics = forecaster.evaluate_models()

    # Step 5: Visualize Results
    forecaster.plot_comprehensive_results()

    # Step 6: Generate Report
    report = forecaster.generate_comprehensive_report()
    print(report)

    # Save results
    results_df = pd.DataFrame({
        'Model': list(forecasts.keys()),
        'Forecast_30_days': [
            forecasts[model].iloc[-1] if isinstance(forecasts[model], pd.Series) and len(forecasts[model]) > 0
            else (forecasts[model][-1] if len(forecasts[model]) > 0 else None)
            for model in forecasts.keys()
        ],
        'MAPE': [metrics.get(model, {}).get('MAPE', 'N/A') for model in forecasts.keys()],
        'RMSE': [metrics.get(model, {}).get('RMSE', 'N/A') for model in forecasts.keys()]
    })

    results_df.to_csv(f'{forecaster.symbol}_complete_analysis.csv', index=False)

    print(f"\n💾 Complete analysis saved to {forecaster.symbol}_complete_analysis.csv")
    print("\n✅ ALL ANALYSIS COMPLETE! Ready for presentation.")
    print("\n📋 DELIVERABLES GENERATED:")
    print("   ✓ Historical data analysis")
    print("   ✓ ARIMA model implementation")
    print("   ✓ SARIMA model implementation")
    print("   ✓ Prophet model implementation")
    print("   ✓ LSTM model implementation")
    print("   ✓ Model comparison & evaluation")
    print("   ✓ Interactive visualizations")
    print("   ✓ Comprehensive report")
    print("   ✓ CSV results export")

🚀 STARTING COMPLETE STOCK MARKET ANALYSIS...
📈 Fetching data for AAPL...
✅ Data fetched: 500 records
📅 Date range: 2023-09-05 to 2025-09-02

🔍 TIME SERIES ANALYSIS:
📊 Stationarity Test (ADF):
   - ADF Statistic: -1.8602
   - p-value: 0.3510
   - Is Stationary: No
📈 Seasonal decomposition completed with period=50

🤖 TRAINING ALL REQUIRED MODELS:
----------------------------------------

🔄 Training ARIMA(5, 1, 0) model...


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


✅ ARIMA model trained successfully!
   - AIC: 2671.43

🔄 Training SARIMA(1, 1, 1)x(1, 1, 1, 12) model...


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


✅ SARIMA model trained successfully!
   - AIC: 2666.82

🔄 Training Prophet model...


DEBUG:cmdstanpy:input tempfile: /tmp/tmplcona97e/c7t7ourt.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmplcona97e/prdg0ygo.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.12/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=29663', 'data', 'file=/tmp/tmplcona97e/c7t7ourt.json', 'init=/tmp/tmplcona97e/prdg0ygo.json', 'output', 'file=/tmp/tmplcona97e/prophet_model_oabaedg/prophet_model-20250902145952.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
14:59:52 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
14:59:52 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing


✅ Prophet model trained successfully!

🔄 Training LSTM model...
✅ LSTM model trained successfully!
   - Final Loss: 0.011047

🔮 Generating 30-day forecasts...
✅ ARIMA forecast generated
✅ SARIMA forecast generated
✅ Prophet forecast generated


  return get_prediction_index(
  return get_prediction_index(


✅ LSTM forecast generated

📊 EVALUATING MODEL PERFORMANCE...
✅ ARIMA: MAPE=0.94%, RMSE=2.85
✅ SARIMA: MAPE=0.93%, RMSE=2.98
✅ Prophet: MAPE=2.65%, RMSE=6.95
✅ LSTM: MAPE=3.42%, RMSE=8.40



        📈 COMPREHENSIVE STOCK MARKET ANALYSIS REPORT
        
        🏢 STOCK INFORMATION:
        - Symbol: AAPL
        - Current Price: $228.09
        - Analysis Period: 2y
        - Data Points: 500 records
        - Date Range: 2023-09-05 to 2025-09-02
        
        🤖 MODELS IMPLEMENTED:
                - ARIMA: ✅ Trained
        - SARIMA: ✅ Trained
        - Prophet: ✅ Trained
        - LSTM: ✅ Trained

        📊 MODEL PERFORMANCE METRICS:
                - ARIMA: MAPE=0.94%, RMSE=2.85
        - SARIMA: MAPE=0.93%, RMSE=2.98
        - Prophet: MAPE=2.65%, RMSE=6.95
        - LSTM: MAPE=3.42%, RMSE=8.4

        🔮 FORECAST SUMMARY (30 days):
                - ARIMA: $227.60 (-0.21%)
        - SARIMA: $228.75 (+0.29%)
        - Prophet: $218.35 (-4.27%)
        - LSTM: $250.80 (+9.96%)

        🏆 BEST PERFORMING MODEL: SARIMA (MAPE: 0.93%)
        
        📋 TECHNICAL ANALYSIS SUMMARY:
        - Trend: Upward
        - Volatility: 11.57%
        - Volume Trend: 36.14M avg (last