In [13]:
import numpy as np
import pandas as pd
import yfinance as yf
import pandas_ta as ta
from sklearn.ensemble import RandomForestClassifier
from datetime import date

# Workaround for numpy NaN attribute if missing
if not hasattr(np, 'NaN'):
    np.NaN = np.nan

def predict_stock_direction(ticker):
    """
    Predict next-day stock direction using Random Forest and technical indicators.
    """
    try:
        print(f"📥 Downloading data for: {ticker}")
        data = yf.download(ticker, start="2020-01-01", end=date.today(), progress=False)

        if data.empty:
            print("❌ No data returned. Please check the ticker symbol.")
            return

        # Handle MultiIndex columns from yfinance
        if isinstance(data.columns, pd.MultiIndex):
            print("⚙️ Flattening MultiIndex columns...")
            data.columns = ['_'.join(col).lower() for col in data.columns.values]
        else:
            data.columns = data.columns.str.lower()

        # Rename columns to standard names if needed (handles yfinance with ticker suffix)
        expected = {
            f'open_{ticker.lower()}': 'open',
            f'high_{ticker.lower()}': 'high',
            f'low_{ticker.lower()}': 'low',
            f'close_{ticker.lower()}': 'close',
            f'volume_{ticker.lower()}': 'volume',
        }
        data.rename(columns=expected, inplace=True)

        print("✅ Cleaned columns:", list(data.columns))

        required_cols = ['open', 'high', 'low', 'close', 'volume']
        if not all(col in data.columns for col in required_cols):
            missing = [col for col in required_cols if col not in data.columns]
            print(f"❌ Missing required columns: {missing}")
            return

        # Calculate indicators
        print("📊 Calculating indicators (RSI, MACD, EMA)...")
        data.ta.rsi(length=14, append=True)
        data.ta.macd(fast=12, slow=26, signal=9, append=True)
        data.ta.ema(length=50, append=True)
        data.ta.ema(length=200, append=True)

        print("🔍 Available columns after indicators:")
        print(data.columns.tolist())

        # Dynamically find the exact indicator column names (case insensitive)
        def find_column(substr):
            found = [col for col in data.columns if substr.lower() in col.lower()]
            return found[0] if found else None

        rsi_col = find_column('rsi_14')
        macd_col = find_column('macd_12_26_9')
        ema_50_col = find_column('ema_50')
        ema_200_col = find_column('ema_200')

        if not all([rsi_col, macd_col, ema_50_col, ema_200_col]):
            print(f"❌ Missing indicator columns! Found: RSI({rsi_col}), MACD({macd_col}), EMA50({ema_50_col}), EMA200({ema_200_col})")
            return

        # Add engineered feature: EMA difference
        data['ema_diff'] = data[ema_50_col] - data[ema_200_col]

        # Add lagged return as a feature
        data['lag1_return'] = data['close'].pct_change().shift(1)

        # Create target variable: 1 if next day's close > today's close else 0
        data['target'] = (data['close'].shift(-1) > data['close']).astype(int)

        # Drop rows with missing values (due to indicators and shifts)
        data.dropna(inplace=True)

        # Define features list dynamically
        features = ['open', 'high', 'low', 'close', 'volume', rsi_col, macd_col, 'ema_diff', 'lag1_return']

        print(f"🤖 Training model on {len(data)} rows with features: {features}")

        X = data[features]
        y = data['target']

        model = RandomForestClassifier(n_estimators=100, min_samples_split=50, random_state=42)
        model.fit(X, y)
        print("✅ Model training complete.")

        # Use last available row for prediction
        latest = X.iloc[-1].values.reshape(1, -1)
        prediction = model.predict(latest)[0]
        confidence = model.predict_proba(latest)[0][prediction]

        direction = "UP" if prediction == 1 else "DOWN"

        print("\n" + "="*60)
        print(f"🔮 PREDICTION FOR {ticker.upper()} - NEXT TRADING DAY")
        print(f"📈 The price is predicted to go: {direction}")
        print(f"📊 Confidence: {confidence:.2%}")
        print("="*60)

    except Exception as e:
        print(f"❌ ERROR: {e}")

# --- Run the prediction ---
if __name__ == "__main__":
    predict_stock_direction("AAPL")  # Change ticker here as needed


📥 Downloading data for: AAPL


  data = yf.download(ticker, start="2020-01-01", end=date.today(), progress=False)


⚙️ Flattening MultiIndex columns...
✅ Cleaned columns: ['close', 'high', 'low', 'open', 'volume']
📊 Calculating indicators (RSI, MACD, EMA)...
🔍 Available columns after indicators:
['close', 'high', 'low', 'open', 'volume', 'RSI_14', 'MACD_12_26_9', 'MACDh_12_26_9', 'MACDs_12_26_9', 'EMA_50', 'EMA_200']
🤖 Training model on 1199 rows with features: ['open', 'high', 'low', 'close', 'volume', 'RSI_14', 'MACD_12_26_9', 'ema_diff', 'lag1_return']
✅ Model training complete.

🔮 PREDICTION FOR AAPL - NEXT TRADING DAY
📈 The price is predicted to go: DOWN
📊 Confidence: 53.70%


