**project outline:**
- Load data
- Preprocess it
- Feature engineering
- Build LSTM model (Long Short-Term Memory)
- Train it
- Evaluate and predict
- Insights

In [26]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
import yfinance as yf

%matplotlib qt

# **Download the data**

In [10]:
ticker = 'AAPL' #Replace with ticker of your choice
start_date = '2015-01-01'
end_date = '2025-01-01'

df = yf.download(ticker,start_date,end_date)
print(df.head())

Downloading stock data...


[*********************100%***********************]  1 of 1 completed

Price           Close       High        Low       Open     Volume
Ticker           AAPL       AAPL       AAPL       AAPL       AAPL
Date                                                             
2015-01-02  24.347172  24.817057  23.906236  24.805922  212818400
2015-01-05  23.661270  24.195737  23.474208  24.115567  257142000
2015-01-06  23.663496  23.924048  23.300503  23.725850  263188400
2015-01-07  23.995319  24.095531  23.761490  23.872837  160423600
2015-01-08  24.917269  24.975170  24.206873  24.324903  237458000





In [11]:
df.tail()

Price,Close,High,Low,Open,Volume
Ticker,AAPL,AAPL,AAPL,AAPL,AAPL
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2024-12-24,258.200012,258.209991,255.289993,255.490005,23234700
2024-12-26,259.019989,260.100006,257.630005,258.190002,27237100
2024-12-27,255.589996,258.700012,253.059998,257.829987,42355300
2024-12-30,252.199997,253.5,250.75,252.229996,35557500
2024-12-31,250.419998,253.279999,249.429993,252.440002,39480700


# **Preprocess the data**

In [12]:
df = df[['Close']] #keep only the 'Close' price
df.dropna(inplace=True) #remove missing values

Preprocessing data...


In [13]:
scaler = MinMaxScaler(feature_range=(0,1))
scaled_df = scaler.fit_transform(df)

# **Create sequences for training**

In [14]:
sequence_length = 60 # use last 60 days to predict the next day
X = []
y = []

for i in range(sequence_length, len(scaled_df)):
    X.append(scaled_df[i-sequence_length:i,0])
    y.append(scaled_df[i,0])
    
X = np.array(X)
y = np.array(y)

# **Splitting the data**

In [18]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#Reshape for LSTM input
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

Splitting data...


# **Build LSTM model**

In [19]:
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1],1)))
model.add(LSTM(units=50))
model.add(Dense(units=1))

model.compile(optimizer='adam', loss='mean_squared_error')

# **Train the model**

In [20]:
model.fit(X_train, y_train, epochs=10, batch_size=32) #train model for 10 epochs with size of 32

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x1dbf42f3e50>

# **Make a prediction**

In [21]:
predicted_prices = model.predict(X_test)
predicted_prices = scaler.inverse_transform(predicted_prices)



# **Results**

In [28]:
plt.figure(figsize=(14,6))

actual_prices = scaler.inverse_transform(y_test.reshape(-1,1))
plt.plot(range(len(actual_prices)), actual_prices, color='blue', label='Actual Prices')

predicted_prices = predicted_prices.flatten() # flatten to 1D for plotting and avoidin error like (492,) and (1, 492)
plt.plot(range(len(predicted_prices)), predicted_prices, color='red', label='Predicted Prices')

plt.title(f'{ticker} Stock Price Prediction')
plt.xlabel('Time')
plt.ylabel('Stock Price')
plt.legend()
plt.show()