# 📉 Dimensionality Reduction for Stock Price Prediction using PCA + LSTM

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense
from sklearn.model_selection import train_test_split
import yfinance as yf

In [None]:
# Download stock data
df = yf.download('AAPL', start='2018-01-01', end='2023-12-31')
df = df[['Open', 'High', 'Low', 'Close', 'Volume']]
df.dropna(inplace=True)

In [None]:
# Normalize data
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(df)

# Apply PCA
pca = PCA(n_components=3)
pca_data = pca.fit_transform(scaled_data)

In [None]:
# Create sequences for LSTM
X, y = [], []
n_steps = 60
for i in range(n_steps, len(pca_data)):
    X.append(pca_data[i - n_steps:i])
    y.append(scaled_data[i, 3])  # predicting the 'Close' price

X, y = np.array(X), np.array(y)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

In [None]:
# Build LSTM model
model = Sequential()
model.add(LSTM(50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(LSTM(50))
model.add(Dense(1))

model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(X_train, y_train, epochs=10, batch_size=32)

In [None]:
# Predict and plot
predictions = model.predict(X_test)
actual = y_test

plt.plot(actual, color='blue', label='Actual Close Price')
plt.plot(predictions, color='red', label='Predicted Close Price')
plt.title('Stock Price Prediction using PCA + LSTM')
plt.xlabel('Time')
plt.ylabel('Normalized Close Price')
plt.legend()
plt.show()