In [4]:
# Import libraries for data manipulation and visualization
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Import libraries for model building
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense

# Import libraries for ARIMA model
from statsmodels.tsa.arima.model import ARIMA

# For fetching financial data
import yfinance as yf


In [5]:
# Fetch historical data for Apple (AAPL) from Yahoo Finance
data = yf.download('AAPL', start='2020-01-01', end='2024-01-01')

# Display the first few rows of the dataset to understand its structure
data.head()


[*********************100%%**********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-01-02,74.059998,75.150002,73.797501,75.087502,72.876114,135480400
2020-01-03,74.287498,75.144997,74.125,74.357498,72.167603,146322800
2020-01-06,73.447502,74.989998,73.1875,74.949997,72.742668,118387200
2020-01-07,74.959999,75.224998,74.370003,74.597504,72.400536,108872000
2020-01-08,74.290001,76.110001,74.290001,75.797501,73.565208,132079200


In [6]:
data.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1006 entries, 2020-01-02 to 2023-12-29
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Open       1006 non-null   float64
 1   High       1006 non-null   float64
 2   Low        1006 non-null   float64
 3   Close      1006 non-null   float64
 4   Adj Close  1006 non-null   float64
 5   Volume     1006 non-null   int64  
dtypes: float64(5), int64(1)
memory usage: 55.0 KB


In [7]:
data.describe()


Unnamed: 0,Open,High,Low,Close,Adj Close,Volume
count,1006.0,1006.0,1006.0,1006.0,1006.0,1006.0
mean,140.675507,142.321389,139.143536,140.808131,138.864225,98952110.0
std,33.310018,33.430571,33.179199,33.313857,33.576954,54396530.0
min,57.02,57.125,53.1525,56.092499,54.569733,24048300.0
25%,123.682503,125.030003,122.157499,123.592501,121.187897,64076750.0
50%,145.540001,147.264999,144.120003,145.860001,143.754524,84675400.0
75%,166.302498,168.147503,164.815002,166.214996,164.267044,115506900.0
max,198.020004,199.619995,197.0,198.110001,197.361084,426510000.0


In [8]:
data.corr()

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume
Open,1.0,0.999188,0.999027,0.997894,0.997758,-0.64462
High,0.999188,1.0,0.998884,0.999008,0.99884,-0.63571
Low,0.999027,0.998884,1.0,0.999073,0.998993,-0.656454
Close,0.997894,0.999008,0.999073,1.0,0.999861,-0.646689
Adj Close,0.997758,0.99884,0.998993,0.999861,1.0,-0.648157
Volume,-0.64462,-0.63571,-0.656454,-0.646689,-0.648157,1.0


In [9]:
# Select the 'Close' price column for prediction
close_prices = data['Close']

# Convert the close prices into a numpy array
close_prices = close_prices.values

# Reshape the data to be a 2D array
close_prices = close_prices.reshape(-1, 1)

# Scale the data to be within the range (0,1)
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(close_prices)

# View the first few rows of the scaled data
scaled_data[:5]


array([[0.13375114],
       [0.1286109 ],
       [0.13278292],
       [0.13030088],
       [0.13875052]])

In [10]:
# Define the number of previous days to consider for prediction
look_back = 60

# Create empty lists to hold the training data
X_train, y_train = [], []

# Loop over the data to create sequences
for i in range(look_back, len(scaled_data)):
    X_train.append(scaled_data[i-look_back:i, 0])
    y_train.append(scaled_data[i, 0])

# Convert the training data into numpy arrays
X_train, y_train = np.array(X_train), np.array(y_train)

# Reshape the data to be in the format [samples, time steps, features]
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))

# Display the shape of the training data
X_train.shape, y_train.shape


((946, 60, 1), (946,))

In [11]:
# Initialize the Sequential model
model = Sequential()

# Add the first LSTM layer with 50 units and return sequences for stacking
model.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], 1)))

# Add a second LSTM layer with 50 units
model.add(LSTM(units=50))

# Add the output layer with 1 unit for the predicted price
model.add(Dense(1))

# Compile the model with mean squared error loss and the Adam optimizer
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model on the training data for 1 epoch
model.fit(X_train, y_train, epochs=1, batch_size=1)


  super().__init__(**kwargs)


[1m946/946[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 18ms/step - loss: 0.0143


<keras.src.callbacks.history.History at 0x257a1992610>

In [12]:
# Fetch new data for prediction (e.g., the last 60 days)
test_data = scaled_data[-look_back:]

# Reshape the test data to match the input shape of the model
test_data = test_data.reshape(1, look_back, 1)

# Predict the next day's closing price
predicted_price = model.predict(test_data)

# Inverse transform the predicted price to the original scale
predicted_price = scaler.inverse_transform(predicted_price)

# Display the predicted price
predicted_price[0][0]


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 369ms/step


195.04662

In [13]:

# Save the trained model to a file
model.save('lstm_model.h5')

