In [83]:
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import plotly.express as px

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense 

In [84]:
tickers = ['AAPL', 'GOOGL', 'MSFT', 'AMZN', 'META', 'TSLA', 'NFLX', 'NVDA', 'INTC', 'AMD']

### Download 2 Years of Stock Data

In [85]:
data = {}

for ticker in tickers:
    df = yf.download(ticker, period="2y")
    df.dropna(inplace=True)
    data[ticker] = df[['Open', 'High', 'Low', 'Close', 'Volume']]

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Save Data to .csv

In [86]:
# Save the data in df to CSV
for ticker, df in data.items():
    df.to_csv(f'Stocks/{ticker}_data.csv')

Load Data from .csv

In [87]:
# Load the data from CSV
data = {}
for ticker in tickers:
    df = pd.read_csv(f'Stocks/{ticker}_data.csv', index_col=0, parse_dates=True)
    # Convert columns to numeric, coercing errors to NaN, and drop rows with NaN values
    df[['Open', 'High', 'Low', 'Close']] = df[['Open', 'High', 'Low', 'Close']].apply(pd.to_numeric, errors='coerce')
    df.dropna(subset=['Open', 'High', 'Low', 'Close', 'Volume'], inplace=True)
    data[ticker] = df[['Open', 'High', 'Low', 'Close', 'Volume']].astype({'Open': 'float64', 'High': 'float64', 'Low': 'float64', 'Close': 'float64', 'Volume': 'int64'})


Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.


Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.


Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.


Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.


Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.


Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and a

In [88]:
fig = px.line()

for ticker, df in data.items():
    fig.add_scatter(x=df.index, y=df['Close'], mode='lines', name=ticker)

fig.update_layout(
    title="Closing Prices of 10 Stocks",
    xaxis_title="Date",
    yaxis_title="Close Price",
    legend_title="Stocks"
)

fig.show()

In [89]:
# Apply Percentage Change, Z-score Normalization
for ticker, df in data.items():
    df['Pct_Change'] = df['Close'].pct_change() * 100
    df['Z_Score'] = (df['Pct_Change'] - df['Pct_Change'].mean()) / df['Pct_Change'].std()

In [90]:
fig = px.line()

fig_pct = px.line()
fig_z = px.line()

for ticker, df in data.items():
    fig_pct.add_scatter(x=df.index, y=df['Pct_Change'], mode='lines', name=f'{ticker} - Pct Change')
    fig_z.add_scatter(x=df.index, y=df['Z_Score'], mode='lines', name=f'{ticker} - Z Score')

fig_pct.update_layout(
    title="Percentage Change for All Stocks",
    xaxis_title="Date",
    yaxis_title="Percentage Change",
    legend_title="Stocks"
)

fig_z.update_layout(
    title="Z Score for All Stocks",
    xaxis_title="Date",
    yaxis_title="Z Score",
    legend_title="Stocks"
)

fig_pct.show()
fig_z.show()

In [91]:
def add_technical_indicators(df):
    # Copy the dataframe to avoid modifying the original
    df_new = df.copy()
    
    # Moving averages
    df_new['MA5'] = df_new['Close'].rolling(window=5).mean()
    df_new['MA20'] = df_new['Close'].rolling(window=20).mean()
    df_new['MA50'] = df_new['Close'].rolling(window=50).mean()
    
    # Price momentum
    df_new['Price_Change'] = df_new['Close'].pct_change()
    df_new['Price_Change_5d'] = df_new['Close'].pct_change(periods=5)
    
    # Volatility
    df_new['Volatility_5d'] = df_new['Close'].pct_change().rolling(window=5).std()
    
    # Volume indicators
    df_new['Volume_Change'] = df_new['Volume'].pct_change()
    df_new['Volume_MA5'] = df_new['Volume'].rolling(window=5).mean()
    
    # Relative strength index (simplified)
    delta = df_new['Close'].diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
    rs = gain / loss
    df_new['RSI'] = 100 - (100 / (1 + rs))
    
    # MACD
    exp1 = df_new['Close'].ewm(span=12, adjust=False).mean()
    exp2 = df_new['Close'].ewm(span=26, adjust=False).mean()
    df_new['MACD'] = exp1 - exp2
    df_new['MACD_Signal'] = df_new['MACD'].ewm(span=9, adjust=False).mean()
    
    # Drop NaN values
    df_new.dropna(inplace=True)
    
    return df_new

# Apply technical indicators to all stocks
for ticker in tickers:
    data[ticker] = add_technical_indicators(data[ticker])

In [92]:
# Define the technical indicators to plot
technical_indicators = ['MA5', 'MA20', 'MA50', 'Price_Change', 'Price_Change_5d', 'Volatility_5d', 'Volume_Change', 'Volume_MA5', 'RSI', 'MACD', 'MACD_Signal']

# Iterate through each technical indicator and plot
for indicator in technical_indicators:
    fig = px.line()
    for ticker, df in data.items():
        if indicator in df.columns:
            fig.add_scatter(x=df.index, y=df[indicator], mode='lines', name=f'{ticker} - {indicator}')
    
    fig.update_layout(
        title=f"{indicator} for All Stocks",
        xaxis_title="Date",
        yaxis_title=indicator,
        legend_title="Stocks"
    )
    
    fig.show()

In [93]:
# Define the technical indicators to plot
indicators_to_plot = ['Close', 'MA5', 'MA20', 'MA50', 'Price_Change', 'Price_Change_5d', 'Volatility_5d', 'Volume_Change', 'Volume_MA5', 'RSI', 'MACD', 'MACD_Signal']

# Iterate through each stock and plot the selected indicators
for ticker in tickers:
    fig = px.line()
    for indicator in indicators_to_plot:
        if indicator in data[ticker].columns:
            if indicator == 'Volume_MA5':
                fig.add_scatter(x=data[ticker].index, y=data[ticker][indicator] / 1_000_000, mode='lines', name=f'{ticker} - {indicator} (in Millions)')
            else:
                fig.add_scatter(x=data[ticker].index, y=data[ticker][indicator], mode='lines', name=f'{ticker} - {indicator}')
    
    # Update layout
    fig.update_layout(
        title=f"{ticker} - Selected Technical Indicators",
        xaxis_title="Date",
        yaxis_title="Value",
        legend_title="Indicators"
    )
    
    # Show the plot
    fig.show()


Build Dataset

In [94]:
# Prepare the dataset for LSTM
def prepare_lstm_data(df, features, target, lookback=60):
    data = df[features + [target]].dropna()
    scaler = MinMaxScaler()
    scaled_data = scaler.fit_transform(data)
    
    X, y = [], []
    for i in range(lookback, len(scaled_data)):
        X.append(scaled_data[i-lookback:i, :-1])  # Features
        y.append(scaled_data[i, -1])  # Target
    
    X, y = np.array(X), np.array(y)
    return X, y, scaler

# Train and evaluate LSTM for each stock
results = {}
lookback = 60
features = technical_indicators
target = 'Close'

for ticker in tickers:
    df = data[ticker]
    X, y, scaler = prepare_lstm_data(df, features, target, lookback)
    
    # Split into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=False)
    
    # Build LSTM model
    model = Sequential([
        LSTM(50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])),
        LSTM(50, return_sequences=False),
        Dense(25),
        Dense(1)
    ])
    model.compile(optimizer='adam', loss='mean_squared_error')
    
    # Train the model
    model.fit(X_train, y_train, batch_size=32, epochs=10, verbose=0)
    
    # Evaluate the model
    predictions = model.predict(X_test)
    predictions = scaler.inverse_transform(np.concatenate([np.zeros((predictions.shape[0], len(features))), predictions], axis=1))[:, -1]
    y_test_actual = scaler.inverse_transform(np.concatenate([np.zeros((y_test.shape[0], len(features))), y_test.reshape(-1, 1)], axis=1))[:, -1]
    
    # Calculate RMSE
    rmse = np.sqrt(np.mean((predictions - y_test_actual) ** 2))
    results[ticker] = rmse

# Compare results
for ticker, rmse in results.items():
    print(f"{ticker}: RMSE = {rmse}")


Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.



[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 86ms/step



Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.



[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 83ms/step



Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.



[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 82ms/step



Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.



[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 82ms/step



Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.



[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 80ms/step



Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.



[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 89ms/step



Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.



[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 107ms/step



Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.



[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 97ms/step



Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.



[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 90ms/step



Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.



[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 85ms/step
AAPL: RMSE = 15.998531039657406
GOOGL: RMSE = 7.245227484208112
MSFT: RMSE = 15.229704245542369
AMZN: RMSE = 7.642323882019205
META: RMSE = 42.69238912523588
TSLA: RMSE = 32.34672432807931
NFLX: RMSE = 65.86409824993648
NVDA: RMSE = 9.799113639284965
INTC: RMSE = 2.9313379599212466
AMD: RMSE = 12.186310223142915


In [95]:
# Predict future prices using the trained LSTM model
def predict_future_prices(model, data, scaler, lookback, n_days):
    # Get the most recent data for prediction
    recent_data = data[-lookback:]
    scaled_recent_data = scaler.transform(recent_data)

    # Prepare the input for prediction
    X_input = np.array([scaled_recent_data[:, :-1]])  # Exclude the target column

    # Predict future prices
    future_prices = []
    for _ in range(n_days):
        prediction = model.predict(X_input, verbose=0)
        future_prices.append(prediction[0, 0])

        # Update the input with the new prediction
        new_row = np.append(X_input[0, -1, :-1], prediction[0, 0])
        X_input = np.append(X_input[:, 1:, :], [[new_row]], axis=1)

    # Inverse transform the predictions to get actual prices
    future_prices = scaler.inverse_transform(
        np.concatenate([np.zeros((n_days, recent_data.shape[1] - 1)), np.array(future_prices).reshape(-1, 1)], axis=1)
    )[:, -1]

    return future_prices

n_days = 30  # Number of days to predict

# Predict future prices for each ticker
for ticker in tickers:
    df = data[ticker]
    features = technical_indicators
    target = 'Close'

    recent_data = df[features + [target]].dropna().values
    future_prices = predict_future_prices(model, recent_data, scaler, lookback, n_days)

    # Print the predicted future prices
    # print(f"Predicted future prices for {ticker} for the next {n_days} days:")
    # print(future_prices)


X does not have valid feature names, but MinMaxScaler was fitted with feature names


X does not have valid feature names, but MinMaxScaler was fitted with feature names


X does not have valid feature names, but MinMaxScaler was fitted with feature names


X does not have valid feature names, but MinMaxScaler was fitted with feature names


X does not have valid feature names, but MinMaxScaler was fitted with feature names


X does not have valid feature names, but MinMaxScaler was fitted with feature names


X does not have valid feature names, but MinMaxScaler was fitted with feature names


X does not have valid feature names, but MinMaxScaler was fitted with feature names


X does not have valid feature names, but MinMaxScaler was fitted with feature names


X does not have valid feature names, but MinMaxScaler was fitted with feature names



In [96]:
# Plot the predicted future prices
for ticker in tickers:
    df = data[ticker]
    features = technical_indicators
    target = 'Close'

    recent_data = df[features + [target]].dropna().values
    future_prices = predict_future_prices(model, recent_data, scaler, lookback, n_days)

    # Create a date range for the future prices
    last_date = pd.to_datetime(df.index[-1])
    future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=n_days)

    # Plot the predicted future prices
    fig = px.line()
    fig.add_scatter(x=df.index, y=df['Close'], mode='lines', name=f'{ticker} - Historical Prices')
    fig.add_scatter(x=future_dates, y=future_prices, mode='lines', name=f'{ticker} - Predicted Future Prices')

    fig.update_layout(
        title=f"{ticker} - Predicted Future Prices for the Next {n_days} Days",
        xaxis_title="Date",
        yaxis_title="Price",
        legend_title="Price Type"
    )

    fig.show()



X does not have valid feature names, but MinMaxScaler was fitted with feature names




X does not have valid feature names, but MinMaxScaler was fitted with feature names




X does not have valid feature names, but MinMaxScaler was fitted with feature names




X does not have valid feature names, but MinMaxScaler was fitted with feature names




X does not have valid feature names, but MinMaxScaler was fitted with feature names




X does not have valid feature names, but MinMaxScaler was fitted with feature names




X does not have valid feature names, but MinMaxScaler was fitted with feature names




X does not have valid feature names, but MinMaxScaler was fitted with feature names




X does not have valid feature names, but MinMaxScaler was fitted with feature names




X does not have valid feature names, but MinMaxScaler was fitted with feature names

