In [8]:
pip install pandas-datareader




In [9]:

# Set up Yahoo Finance API
# Override the pandas datareader with Yahoo Finance
# Define the stock symbol and date range
# Fetch historical stock price data

# Display the first few rows of the dataset

import pandas as pd
from pandas_datareader import data as pdr
import yfinance as yf

yf.pdr_override()

stock_symbol = "AAPL"
start_date = "2022-01-01"
end_date = "2022-12-31"

stock_data = pdr.get_data_yahoo(stock_symbol, start=start_date, end=end_date)

print(stock_data.head())


[*********************100%%**********************]  1 of 1 completed
                  Open        High         Low       Close   Adj Close  \
Date                                                                     
2022-01-03  177.830002  182.880005  177.710007  182.009995  180.190948   
2022-01-04  182.630005  182.940002  179.119995  179.699997  177.904053   
2022-01-05  179.610001  180.169998  174.639999  174.919998  173.171844   
2022-01-06  172.699997  175.300003  171.639999  172.000000  170.281006   
2022-01-07  172.889999  174.139999  171.029999  172.169998  170.449310   

               Volume  
Date                   
2022-01-03  104487900  
2022-01-04   99310400  
2022-01-05   94537600  
2022-01-06   96904000  
2022-01-07   86709100  


In [10]:
stock_data.fillna(stock_data.mean(), inplace=True)

In [11]:
# Min-Max Scaling: This method scales the data to a specific range, often between 0 and 1. It's suitable for features with clear boundaries, like stock prices.
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(stock_data)


In [12]:
# Standardization (Z-Score Scaling): This method scales the data to have a mean of 0 and a standard deviation of 1. It's suitable for features that may not have clear boundaries.
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
standardized_data = scaler.fit_transform(stock_data)


In [13]:
# Input sequence: OHLC prices and volumes for N days
 # Target sequence: Close price for the next day
import numpy as np

def create_sequences(data, input_seq_length):
    X, y = [], []
    for i in range(len(data) - input_seq_length):
        input_seq = data[i:i+input_seq_length, :]


        target = data[i+input_seq_length, -1]  # Assuming the Close price is the last column

        X.append(input_seq)
        y.append(target)

    return np.array(X), np.array(y)

input_seq_length = 10
X, y = create_sequences(scaled_data, input_seq_length)


In [15]:
# using LSTM with 64 units
from tensorflow.keras.layers import LSTM


def build_lstm_model(input_shape, lstm_units):
    model = Sequential()
    model.add(LSTM(lstm_units, input_shape=input_shape))
    model.add(Dense(1))

    model.compile(loss='mean_squared_error', optimizer='adam')
    return model

lstm_model = build_lstm_model((input_seq_length, X.shape[2]), lstm_units=64)


In [16]:
# Define hyperparameters
batch_size = 64
learning_rate = 0.001
epochs = 50

# Compile the model
lstm_model.compile(loss='mean_squared_error', optimizer=keras.optimizers.Adam(learning_rate=learning_rate))


In [17]:
from sklearn.model_selection import train_test_split

# Split data into training, validation, and test sets
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)


In [18]:
history = lstm_model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(X_val, y_val), verbose=2)


Epoch 1/50
3/3 - 2s - loss: 0.0685 - val_loss: 0.0456 - 2s/epoch - 794ms/step
Epoch 2/50
3/3 - 0s - loss: 0.0483 - val_loss: 0.0754 - 77ms/epoch - 26ms/step
Epoch 3/50
3/3 - 0s - loss: 0.0504 - val_loss: 0.0486 - 57ms/epoch - 19ms/step
Epoch 4/50
3/3 - 0s - loss: 0.0371 - val_loss: 0.0317 - 51ms/epoch - 17ms/step
Epoch 5/50
3/3 - 0s - loss: 0.0338 - val_loss: 0.0276 - 53ms/epoch - 18ms/step
Epoch 6/50
3/3 - 0s - loss: 0.0335 - val_loss: 0.0256 - 52ms/epoch - 17ms/step
Epoch 7/50
3/3 - 0s - loss: 0.0297 - val_loss: 0.0258 - 52ms/epoch - 17ms/step
Epoch 8/50
3/3 - 0s - loss: 0.0262 - val_loss: 0.0291 - 45ms/epoch - 15ms/step
Epoch 9/50
3/3 - 0s - loss: 0.0261 - val_loss: 0.0303 - 44ms/epoch - 15ms/step
Epoch 10/50
3/3 - 0s - loss: 0.0254 - val_loss: 0.0257 - 45ms/epoch - 15ms/step
Epoch 11/50
3/3 - 0s - loss: 0.0239 - val_loss: 0.0205 - 52ms/epoch - 17ms/step
Epoch 12/50
3/3 - 0s - loss: 0.0227 - val_loss: 0.0196 - 49ms/epoch - 16ms/step
Epoch 13/50
3/3 - 0s - loss: 0.0223 - val_loss: 0.

In [19]:
#Find accuracy on test data using MSE
from sklearn.metrics import mean_squared_error

y_pred = lstm_model.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
print("Test MSE:", mse)


Test MSE: 0.011446811965379694
