In [None]:
import yfinance as yf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import r2_score, mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
import seaborn as sns
from datetime import datetime
import sys

def get_user_input():
    """Get and validate user input for stock data parameters."""
    try:
        # Get stock symbol
        stock_symbol = input("Enter stock symbol (e.g., AAPL, GOOGL): ").upper().strip()
        
        # Get date range
        while True:
            try:
                start_date = input("Enter start date (YYYY-MM-DD): ")
                datetime.strptime(start_date, '%Y-%m-%d')
                break
            except ValueError:
                print("Invalid date format. Please use YYYY-MM-DD format.")
        
        while True:
            try:
                end_date = input("Enter end date (YYYY-MM-DD): ")
                end_datetime = datetime.strptime(end_date, '%Y-%m-%d')
                start_datetime = datetime.strptime(start_date, '%Y-%m-%d')
                if end_datetime <= start_datetime:
                    print("End date must be after start date.")
                    continue
                break
            except ValueError:
                print("Invalid date format. Please use YYYY-MM-DD format.")
        
        # Get timeframe
        valid_timeframes = {'1': '1d', '2': '1wk', '3': '1mo'}
        while True:
            print("\nSelect timeframe:")
            print("1. Daily")
            print("2. Weekly")
            print("3. Monthly")
            timeframe_choice = input("Enter your choice (1-3): ")
            
            if timeframe_choice in valid_timeframes:
                timeframe = valid_timeframes[timeframe_choice]
                break
            print("Invalid choice. Please select 1, 2, or 3.")
        
        return stock_symbol, start_date, end_date, timeframe
    
    except KeyboardInterrupt:
        print("\nOperation cancelled by user.")
        sys.exit(0)

def validate_stock_data(data, stock_symbol):
    """Validate the fetched stock data."""
    if data is None or data.empty:
        print(f"No data found for stock symbol: {stock_symbol}")
        return False
        
    if len(data) < 60:  # Minimum required for sequence generation
        print(f"Insufficient data points for {stock_symbol}. Need at least 60 data points.")
        return False
        
    return True

def fetch_stock_data(stock_symbol, start_date, end_date, timeframe):
    """Fetch stock data and handle potential errors."""
    try:
        print(f"\nFetching data for {stock_symbol}...")
        data = yf.download(stock_symbol, start=start_date, end=end_date, interval=timeframe)
        
        if validate_stock_data(data, stock_symbol):
            print(f"Successfully fetched {len(data)} data points for {stock_symbol}")
            return data
        return None
    
    except Exception as e:
        print(f"Error fetching data: {e}")
        return None

def calculate_technical_indicators(data, price_col):
    """Calculate MACD and RSI indicators."""
    print("Calculating technical indicators...")
    # MACD calculation
    short_ema = data[price_col].ewm(span=12, adjust=False).mean()
    long_ema = data[price_col].ewm(span=26, adjust=False).mean()
    macd = short_ema - long_ema
    macd_signal = macd.ewm(span=9, adjust=False).mean()
    
    # RSI calculation
    delta = data[price_col].diff(1)
    gain = delta.where(delta > 0, 0)
    loss = -delta.where(delta < 0, 0)
    avg_gain = gain.rolling(window=14).mean()
    avg_loss = loss.rolling(window=14).mean()
    rs = avg_gain / avg_loss
    rsi = 100 - (100 / (1 + rs))
    
    return macd, macd_signal, rsi

def plot_indicators(data, price_col, macd, macd_signal, rsi, stock_symbol):
    """Plot price and technical indicators."""
    fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(12, 12))
    
    # Price plot
    ax1.plot(data.index, data[price_col])
    ax1.set_title(f"{stock_symbol} - {price_col}")
    ax1.grid(True)
    
    # MACD plot
    ax2.plot(data.index, macd, label='MACD', color='blue')
    ax2.plot(data.index, macd_signal, label='Signal Line', color='red')
    ax2.set_title(f"{stock_symbol} - MACD")
    ax2.legend()
    ax2.grid(True)
    
    # RSI plot
    ax3.plot(data.index, rsi, label='RSI', color='green')
    ax3.axhline(y=70, color='r', linestyle='--')
    ax3.axhline(y=30, color='r', linestyle='--')
    ax3.set_title(f"{stock_symbol} - RSI")
    ax3.legend()
    ax3.grid(True)
    
    plt.tight_layout()
    plt.show()

def prepare_sequences(scaled_data, sequence_length):
    """Prepare sequences for LSTM model."""
    X, y = [], []
    for i in range(sequence_length, len(scaled_data)):
        X.append(scaled_data[i-sequence_length:i])
        y.append(scaled_data[i, 0])
    return np.array(X), np.array(y)

def build_lstm_model(sequence_length, n_features):
    """Build and compile LSTM model."""
    model = Sequential([
        LSTM(100, return_sequences=True, input_shape=(sequence_length, n_features)),
        Dropout(0.2),
        LSTM(100, return_sequences=True),
        Dropout(0.2),
        LSTM(50, return_sequences=False),
        Dropout(0.2),
        Dense(25),
        Dense(1)
    ])
    model.compile(optimizer='adam', loss='huber')
    return model

def main():
    print("Stock Price Prediction using LSTM\n")
    
    # Get user input
    stock_symbol, start_date, end_date, timeframe = get_user_input()
    
    # Fetch data
    data = fetch_stock_data(stock_symbol, start_date, end_date, timeframe)
    if data is None:
        return
    
    # Determine price column
    price_col = 'Adj Close' if 'Adj Close' in data.columns else 'Close'
    
    # Calculate indicators
    macd, macd_signal, rsi = calculate_technical_indicators(data, price_col)
    
    # Plot indicators
    plot_indicators(data, price_col, macd, macd_signal, rsi, stock_symbol)
    
    print("\nPreparing data for LSTM model...")
    
    # Ensure all data is 1-dimensional
    price = data[price_col].values.flatten()
    volume = data['Volume'].values.flatten()
    high_low_diff = (data['High'] - data['Low']).values.flatten()
    macd = macd.values.flatten()
    rsi = rsi.values.flatten()
    
    # Create DataFrame with flattened arrays
    df = pd.DataFrame(
        {
            'price': price,
            'macd': macd,
            'rsi': rsi,
            'volume': volume,
            'high_low_diff': high_low_diff
        },
        index=data.index
    )
    
    # Drop any rows with NaN values
    df = df.dropna()
    
    # Scale data
    scaler = MinMaxScaler()
    scaled_data = scaler.fit_transform(df)
    
    # Prepare sequences
    X, y = prepare_sequences(scaled_data, sequence_length=60)
    
    # Split data
    train_size = int(len(X) * 0.8)
    X_train, X_test = X[:train_size], X[train_size:]
    y_train, y_test = y[:train_size], y[train_size:]
    
    print("\nTraining LSTM model...")
    
    # Build and train model
    model = build_lstm_model(sequence_length=60, n_features=X.shape[2])
    early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
    
    history = model.fit(
        X_train, y_train,
        validation_data=(X_test, y_test),
        epochs=100,
        batch_size=32,
        callbacks=[early_stopping],
        verbose=1
    )
    
    print("\nMaking predictions...")
    
    # Make predictions
    train_pred = model.predict(X_train)
    test_pred = model.predict(X_test)
    
    # Inverse transform predictions
    train_pred_prices = scaler.inverse_transform(
        np.concatenate([train_pred, np.zeros((train_pred.shape[0], df.shape[1]-1))], axis=1)
    )[:, 0]
    test_pred_prices = scaler.inverse_transform(
        np.concatenate([test_pred, np.zeros((test_pred.shape[0], df.shape[1]-1))], axis=1)
    )[:, 0]
    
    # Calculate actual prices
    train_actual_prices = scaler.inverse_transform(
        np.concatenate([y_train.reshape(-1, 1), np.zeros((y_train.shape[0], df.shape[1]-1))], axis=1)
    )[:, 0]
    test_actual_prices = scaler.inverse_transform(
        np.concatenate([y_test.reshape(-1, 1), np.zeros((y_test.shape[0], df.shape[1]-1))], axis=1)
    )[:, 0]
    
    # Plot results
    plt.figure(figsize=(12, 6))
    plt.plot(train_actual_prices, label='Training Actual')
    plt.plot(train_pred_prices, label='Training Predicted')
    plt.plot(range(len(train_actual_prices), len(train_actual_prices) + len(test_actual_prices)),
             test_actual_prices, label='Testing Actual')
    plt.plot(range(len(train_actual_prices), len(train_actual_prices) + len(test_pred_prices)),
             test_pred_prices, label='Testing Predicted')
    plt.title(f'{stock_symbol} Stock Price Prediction')
    plt.legend()
    plt.grid(True)
    plt.show()
    
    # Calculate and display metrics
    train_r2 = r2_score(train_actual_prices, train_pred_prices)
    test_r2 = r2_score(test_actual_prices, test_pred_prices)
    train_rmse = np.sqrt(mean_squared_error(train_actual_prices, train_pred_prices))
    test_rmse = np.sqrt(mean_squared_error(test_actual_prices, test_pred_prices))
    
    print("\nModel Performance Metrics:")
    print(f"Training R² Score: {train_r2:.4f}")
    print(f"Testing R² Score: {test_r2:.4f}")
    print(f"Training RMSE: ${train_rmse:.2f}")
    print(f"Testing RMSE: ${test_rmse:.2f}")

if __name__ == "__main__":
    main()

Stock Price Prediction using LSTM

