In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import yfinance as yf

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error

from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.arima.model import ARIMA

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

data = yf.download("AAPL", start="2021-01-01", end="2024-01-01")


data = data[['Adj Close']]
data.head()
data.isnull().sum()
train_size = int(len(data) * 0.8)

train = data[:train_size]
test = data[train_size:]
adf_result = adfuller(train['Adj Close'])

print("ADF Statistic:", adf_result[0])
print("p-value:", adf_result[1])
# ARIMA parameters selected using ACF & PACF
arima_model = ARIMA(train['Adj Close'], order=(5, 1, 0))
arima_fitted = arima_model.fit()
arima_predictions = arima_fitted.forecast(steps=len(test))
scaler = MinMaxScaler(feature_range=(0,1))
scaled_data = scaler.fit_transform(data)
def create_sequences(data, window_size=60):
    X, y = [], []
    for i in range(window_size, len(data)):
        X.append(data[i-window_size:i, 0])
        y.append(data[i, 0])
    return np.array(X), np.array(y)

X, y = create_sequences(scaled_data)
X_train = X[:train_size]
X_test = X[train_size:]

y_train = y[:train_size]
y_test = y[train_size:]


X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))
lstm_model = Sequential()
lstm_model.add(LSTM(50, return_sequences=True, input_shape=(60,1)))
lstm_model.add(LSTM(50))
lstm_model.add(Dense(1))

lstm_model.compile(optimizer='adam', loss='mean_squared_error')
history = lstm_model.fit(
    X_train, y_train,
    epochs=20,
    batch_size=32,
    validation_data=(X_test, y_test)
)
lstm_predictions = lstm_model.predict(X_test)


lstm_predictions = scaler.inverse_transform(lstm_predictions)
y_test_actual = scaler.inverse_transform(y_test.reshape(-1,1))
def evaluate_model(actual, predicted):
    mae = mean_absolute_error(actual, predicted)
    mse = mean_squared_error(actual, predicted)
    rmse = np.sqrt(mse)
    return mae, mse, rmse


arima_mae, arima_mse, arima_rmse = evaluate_model(
    test['Adj Close'], arima_predictions
)


lstm_mae, lstm_mse, lstm_rmse = evaluate_model(
    y_test_actual, lstm_predictions
)

print("ARIMA Performance:")
print("MAE:", arima_mae)
print("MSE:", arima_mse)
print("RMSE:", arima_rmse)

print("\nLSTM Performance:")
print("MAE:", lstm_mae)
print("MSE:", lstm_mse)
print("RMSE:", lstm_rmse)
plt.figure(figsize=(12,6))
plt.plot(test.index, test['Adj Close'], label='Actual Price')
plt.plot(test.index, arima_predictions, label='ARIMA Prediction')
plt.plot(test.index[-len(lstm_predictions):], lstm_predictions, label='LSTM Prediction')
plt.legend()
plt.title("Stock Price Prediction: ARIMA vs LSTM")
plt.xlabel("Date")
plt.ylabel("Price")
plt.show()
arima_residuals = test['Adj Close'] - arima_predictions
lstm_residuals = y_test_actual.flatten() - lstm_predictions.flatten()

plt.figure(figsize=(12,5))
plt.plot(arima_residuals, label='ARIMA Residuals')
plt.plot(lstm_residuals, label='LSTM Residuals')
plt.legend()
plt.title("Residual Comparison")
plt.show()
plt.figure(figsize=(8,5))
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.legend()
plt.title("LSTM Learning Curve")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.show()
