<a href="https://colab.research.google.com/github/Asmaaad37/ARCH-TECH-PROJECTS/blob/main/Proj_2_Stock_Price_Prediction_with_LSTM_ML.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Stock Price Prediction with LSTM

LSTM stands for Long Short Term Memory Networks. It is a type of recurrent neural network that is commonly used for regression and time series forecasting in machine learning. It can memorize data for long periods, which differentiates LSTM neural networks from other neural networks.

In [1]:
# Importing Necessary Libraries.
import pandas as pd
import yfinance as yf
import datetime
from datetime import date, timedelta
today = date.today()

*Collecting the latest stock price data of Apple*

In [2]:
d1 = today.strftime("%Y-%m-%d")
end_date = d1
d2 = date.today() - timedelta(days=5000)
d2 = d2.strftime("%Y-%m-%d")
start_date = d2

In [7]:
data = yf.download("AAPL",
                   start=start_date,
                   end=end_date,
                   progress=False)
data["Date"] = data.index
data = data[["Date", "Open", "High", "Low", "Close", "Volume"]]
data.reset_index(drop=True, inplace=True)
data.tail()


YF.download() has changed argument auto_adjust default to True



Price,Date,Open,High,Low,Close,Volume
Ticker,Unnamed: 1_level_1,AAPL,AAPL,AAPL,AAPL,AAPL
3434,2025-07-21,212.100006,215.779999,211.630005,212.479996,51377400
3435,2025-07-22,213.139999,214.949997,212.229996,214.399994,46404100
3436,2025-07-23,215.0,215.149994,212.410004,214.149994,46989300
3437,2025-07-24,213.899994,215.690002,213.529999,213.759995,46022600
3438,2025-07-25,214.699997,215.240005,213.399994,213.880005,40219700


**Visualizing Apple Stock Price Analysis**

In [11]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3439 entries, 0 to 3438
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   Date    3439 non-null   datetime64[ns]
 1   Open    3439 non-null   float64       
 2   High    3439 non-null   float64       
 3   Low     3439 non-null   float64       
 4   Close   3439 non-null   float64       
 5   Volume  3439 non-null   int64         
dtypes: datetime64[ns](1), float64(4), int64(1)
memory usage: 161.3 KB


In [10]:
import plotly.graph_objects as go

# Flatten column names
data.columns = [col[0] if isinstance(col, tuple) else col for col in data.columns]

# Ensure Date is a column (already is in your case, but make sure)
data.reset_index(drop=True, inplace=True)

# Plot candlestick
fig = go.Figure(data=[go.Candlestick(x=data["Date"],
                                     open=data["Open"],
                                     high=data["High"],
                                     low=data["Low"],
                                     close=data["Close"])])

fig.update_layout(title="Apple Stock Price Analysis",
                  xaxis_title="Date",
                  yaxis_title="Stock Price (USD)",
                  xaxis_rangeslider_visible=False)

fig.show()


# Moving Averages (e.g., 50-day and 200-day)

In [12]:
# Add moving averages to your data
data["MA50"] = data["Close"].rolling(window=50).mean()
data["MA200"] = data["Close"].rolling(window=200).mean()

# Add moving averages to the figure
fig.add_trace(go.Scatter(x=data["Date"], y=data["MA50"],
                         mode='lines', name='50-Day MA',
                         line=dict(color='blue', width=1.5)))

fig.add_trace(go.Scatter(x=data["Date"], y=data["MA200"],
                         mode='lines', name='200-Day MA',
                         line=dict(color='orange', width=1.5)))

**Now let’s have a look at the correlation of all the columns with the Close column as it is the target column:**

In [13]:
correlation = data.corr()
print(correlation["Close"].sort_values(ascending=False))

Close     1.000000
High      0.999884
Low       0.999879
Open      0.999747
MA50      0.995078
MA200     0.984912
Date      0.924651
Volume   -0.529883
Name: Close, dtype: float64


# Training LSTM for Stock Price Prediction

In [17]:
# Splitting
X = data[["Open", "High", "Low", "Volume"]]
y = data["Close"]
X = X.to_numpy()
y = y.to_numpy()
y = y.reshape(-1, 1)

In [18]:
from sklearn.model_selection import train_test_split
xtrain, xtest, ytrain, ytest = train_test_split(X, y, test_size=0.2, random_state=42)

In [19]:
# Preparing a simple NN Architecture for LSTM
from keras.models import Sequential
from keras.layers import Dense, LSTM
model = Sequential()
model.add(LSTM(128, return_sequences=True, input_shape= (xtrain.shape[1], 1)))
model.add(LSTM(64, return_sequences=False))
model.add(Dense(25))
model.add(Dense(1))
model.summary()


Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.



In [20]:
# Training our NN
model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(xtrain, ytrain, batch_size=1, epochs=30)

Epoch 1/30
[1m2751/2751[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 6ms/step - loss: 1973.6185
Epoch 2/30
[1m2751/2751[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 7ms/step - loss: 31.6772
Epoch 3/30
[1m2751/2751[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 6ms/step - loss: 14.3956
Epoch 4/30
[1m2751/2751[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 7ms/step - loss: 25.5695
Epoch 5/30
[1m2751/2751[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 7ms/step - loss: 14.9820
Epoch 6/30
[1m2751/2751[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 7ms/step - loss: 23.4465
Epoch 7/30
[1m2751/2751[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 6ms/step - loss: 24.8695
Epoch 8/30
[1m2751/2751[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 6ms/step - loss: 19.1661
Epoch 9/30
[1m2751/2751[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 7ms/step - loss: 16.1900
Epoch 10/30
[1m2751/2751[0m [32m━━━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x797c596aca90>

In [21]:
# Testing this model
import numpy as np
#features = [Open, High, Low, Adj Close, Volume]
features = np.array([[177.089996, 180.419998, 177.070007, 74919600]])
model.predict(features)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 635ms/step


array([[179.29782]], dtype=float32)