# Stock Price Prediction System

This notebook implements an advanced stock price prediction system using machine learning models including XGBoost ensemble and LSTM neural networks.

## Import Libraries

In [1]:
!pip install yfinance xgboost tensorflow scikit-learn pandas numpy matplotlib plotly seaborn --quiet



In [2]:
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import warnings
warnings.filterwarnings('ignore')

In [3]:
from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split, TimeSeriesSplit
import xgboost as xgb
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.linear_model import Ridge, ElasticNet

In [4]:
try:
    import tensorflow as tf
    from tensorflow.keras.models import Sequential
    from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization, GRU
    from tensorflow.keras.optimizers import Adam
    from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
    LSTM_AVAILABLE = True
    print("TensorFlow available for LSTM models")
except ImportError:
    print("TensorFlow not available. LSTM model will be skipped.")
    LSTM_AVAILABLE = False

TensorFlow available for LSTM models


In [5]:
from datetime import datetime, timedelta

## Configuration

In [6]:
# Configuration parameters
STOCK_TICKER = "AAPL"  # Default stock ticker
MODEL_CHOICE = 1  # 1: XGBoost, 2: LSTM, 3: Both
FORECAST_HORIZON = 1  # Days to predict ahead

## Data Fetching Functions

In [None]:
## Data Fetching Functions

def fetch_stock_data(ticker_symbol, period="max"):
    """Fetch stock data from Yahoo Finance"""
    ticker = yf.Ticker(ticker_symbol)
    data = ticker.history(period=period)
    
    if data.empty:
        print(f"No data found for {ticker_symbol}")
        return None
    
    info = ticker.info
    company_name = info.get('longName', ticker_symbol)
    
    print(f"\nFetched data for {company_name} ({ticker_symbol})")
    print(f"Data range: {data.index[0].date()} to {data.index[-1].date()}")
    print(f"Total trading days: {len(data)}")
    
    return data, company_name

def get_valid_ticker():
    """Get valid ticker input from user"""
    while True:
        stock = input("Enter a valid stock ticker (e.g., AAPL, TSLA, MSFT): ").upper()
        try:
            test = yf.Ticker(stock)
            if test.history(period="1d").empty:
                print("Invalid ticker. Please try again.")
            else:
                return stock
        except Exception as e:
            print(f"Error validating ticker: {e}")
            print("Invalid input. Please try again.")

def get_model_choice():
    """Get model choice from user"""
    print("\nChoose prediction model:")
    print("1. XGBoost (Less time, High accuracy)")
    print("2. LSTM (More time, Most accuracy)")  
    print("3. Both (Conclusion with both models)")
    
    while True:
        choice = input("Enter your choice (1/2/3): ")
        if choice in ['1', '2', '3']:
            return int(choice)
        print("Invalid choice. Please enter 1, 2, or 3.")

## Data Exploration

# Test data fetching
print("Testing data fetching...")
sample_data = fetch_stock_data("AAPL", "1y")
if sample_data is not None:
    data, company_name = sample_data
    print(f"\nSample data shape: {data.shape}")
    print(f"Columns: {list(data.columns)}")
    print(f"\nFirst few rows:")
    print(data.head())
    print(f"\nLast few rows:")
    print(data.tail())
    print(f"\nBasic statistics:")
    print(data.describe())

## Basic Visualization

plt.figure(figsize=(12, 6))
if sample_data is not None:
    data, _ = sample_data
    plt.plot(data.index, data['Close'], label='Close Price', linewidth=2)
    plt.title(f'{company_name} Stock Price - Last Year')
    plt.xlabel('Date')
    plt.ylabel('Price ($)')
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()
else:
    print("No data available for visualization")

Testing data fetching...


$AAPL: possibly delisted; no price data found  (period=1y)


No data found for AAPL
No data available for visualization


<Figure size 1200x600 with 0 Axes>

## Technical Indicators

In [14]:
## Technical Indicators

def calculate_rsi(prices, window=14):
    """Calculate Relative Strength Index"""
    delta = prices.diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=window).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=window).mean()
    rs = gain / loss
    rsi = 100 - (100 / (1 + rs))
    return rsi

def calculate_macd(prices, fast=12, slow=26, signal=9):
    """Calculate MACD indicator"""
    ema_fast = prices.ewm(span=fast).mean()
    ema_slow = prices.ewm(span=slow).mean()
    macd = ema_fast - ema_slow
    macd_signal = macd.ewm(span=signal).mean()
    macd_hist = macd - macd_signal
    return macd, macd_signal, macd_hist

def calculate_bollinger_bands(prices, window=20, num_std=2):
    """Calculate Bollinger Bands"""
    rolling_mean = prices.rolling(window=window).mean()
    rolling_std = prices.rolling(window=window).std()
    bb_upper = rolling_mean + (rolling_std * num_std)
    bb_lower = rolling_mean - (rolling_std * num_std)
    return bb_upper, rolling_mean, bb_lower

def calculate_stochastic(high, low, close, k_window=14, d_window=3):
    """Calculate Stochastic Oscillator"""
    lowest_low = low.rolling(window=k_window).min()
    highest_high = high.rolling(window=k_window).max()
    stoch_k = 100 * ((close - lowest_low) / (highest_high - lowest_low))
    stoch_d = stoch_k.rolling(window=d_window).mean()
    return stoch_k, stoch_d

def calculate_atr(high, low, close, window=14):
    """Calculate Average True Range"""
    tr1 = high - low
    tr2 = abs(high - close.shift())
    tr3 = abs(low - close.shift())
    true_range = pd.concat([tr1, tr2, tr3], axis=1).max(axis=1)
    atr = true_range.rolling(window=window).mean()
    return atr

def calculate_williams_r(high, low, close, window=14):
    """Calculate Williams %R"""
    highest_high = high.rolling(window=window).max()
    lowest_low = low.rolling(window=window).min()
    williams_r = -100 * ((highest_high - close) / (highest_high - lowest_low))
    return williams_r

def add_technical_indicators(data):
    """Add all technical indicators to the dataset"""
    df = data.copy()
    
    # Basic price features
    df['Returns'] = df['Close'].pct_change()
    df['Log_Returns'] = np.log(df['Close'] / df['Close'].shift(1))
    df['High_Low_Pct'] = (df['High'] - df['Low']) / df['Close']
    df['Open_Close_Pct'] = (df['Close'] - df['Open']) / df['Open']
    df['Price_Volume'] = df['Close'] * df['Volume']
    df['Volume_Rate'] = df['Volume'] / df['Volume'].rolling(window=20).mean()
    
    # Moving averages
    for period in [3, 5, 10, 20, 50, 100]:
        df[f'MA_{period}'] = df['Close'].rolling(window=period).mean()
        df[f'MA_{period}_ratio'] = df['Close'] / df[f'MA_{period}']
        df[f'EMA_{period}'] = df['Close'].ewm(span=period).mean()
        df[f'EMA_{period}_ratio'] = df['Close'] / df[f'EMA_{period}']
        df[f'Close_MA_{period}_diff'] = df['Close'] - df[f'MA_{period}']
    
    # Technical indicators
    df['RSI'] = calculate_rsi(df['Close'])
    df['RSI_7'] = calculate_rsi(df['Close'], 7)
    df['RSI_21'] = calculate_rsi(df['Close'], 21)
    
    df['MACD'], df['MACD_Signal'], df['MACD_Hist'] = calculate_macd(df['Close'])
    
    df['BB_Upper'], df['BB_Middle'], df['BB_Lower'] = calculate_bollinger_bands(df['Close'])
    df['BB_Width'] = (df['BB_Upper'] - df['BB_Lower']) / df['BB_Middle']
    df['BB_Position'] = (df['Close'] - df['BB_Lower']) / (df['BB_Upper'] - df['BB_Lower'])
    
    df['Stoch_K'], df['Stoch_D'] = calculate_stochastic(df['High'], df['Low'], df['Close'])
    df['ATR'] = calculate_atr(df['High'], df['Low'], df['Close'])
    df['Williams_R'] = calculate_williams_r(df['High'], df['Low'], df['Close'])
    
    # Volume and volatility indicators
    for period in [5, 10, 20]:
        df[f'Vol_MA_{period}'] = df['Volume'].rolling(window=period).mean()
        df[f'Vol_Std_{period}'] = df['Volume'].rolling(window=period).std()
        df[f'Price_Volatility_{period}'] = df['Close'].rolling(window=period).std()
        df[f'High_MA_{period}'] = df['High'].rolling(window=period).mean()
        df[f'Low_MA_{period}'] = df['Low'].rolling(window=period).mean()
    
    return df

## Test Technical Indicators

if sample_data is not None:
    data, company_name = sample_data
    print("Adding technical indicators...")
    data_with_indicators = add_technical_indicators(data)
    print(f"Dataset shape after adding indicators: {data_with_indicators.shape}")
    print(f"New columns added: {len(data_with_indicators.columns) - len(data.columns)}")
    
    # Display some indicators
    print("\nSample technical indicators:")
    indicator_cols = ['RSI', 'MACD', 'BB_Position', 'Stoch_K', 'ATR']
    available_cols = [col for col in indicator_cols if col in data_with_indicators.columns]
    if available_cols:
        print(data_with_indicators[available_cols].tail())

## Model Training Functions

In [9]:
def train_xgboost_model(data, forecast_horizon=1):
    """Train XGBoost ensemble model"""
    # TODO: Implement XGBoost training
    pass

In [10]:
def train_lstm_model(data, forecast_horizon=1):
    """Train LSTM model"""
    # TODO: Implement LSTM training
    pass

## Visualization Functions

In [11]:
def create_interactive_plot(data_with_signals, company_name, ticker_symbol):
    """Create interactive Plotly visualization"""
    # TODO: Implement interactive plotting
    pass

## Main Execution

In [12]:
# Main execution will be implemented in later commits
print("Stock Prediction System initialized")

Stock Prediction System initialized
