In [75]:
import pandas as pd
from sklearn.model_selection import train_test_split
from statsmodels.tsa.arima.model import ARIMA
from prophet import Prophet
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

# Load the data
data = pd.read_csv('yahoo_stock.csv')



In [2]:
# Set the date column as the index
data.set_index('Date', inplace=True)

# Split the data into train and test sets
train_data, test_data = train_test_split(data, test_size=0.2, shuffle=False)



In [63]:
# ARIMA forecasting
model = ARIMA(train_data['Adj Close'], order=(1, 1, 1))
model_fit = model.fit()
arima_forecast_test = model_fit.forecast(steps=365)
arima_forecast_df = pd.DataFrame(arima_forecast_test, columns=['yhat'], index=arima_forecast_test.index)


# Prophet forecasting
def prophet_forecast(data):
    m = Prophet()
    data_with_ds_y = pd.DataFrame({'ds': data.index, 'y': data['Adj Close']})
    m.fit(data_with_ds_y)
    future = m.make_future_dataframe(periods=365, freq='D')
    forecast = m.predict(future)
    return forecast




prophet_forecast_test= prophet_forecast(train_data)


No frequency information was provided, so inferred frequency D will be used.


No frequency information was provided, so inferred frequency D will be used.


No frequency information was provided, so inferred frequency D will be used.

16:11:37 - cmdstanpy - INFO - Chain [1] start processing
16:11:37 - cmdstanpy - INFO - Chain [1] done processing


In [85]:
#LSTM forecasting
prices = data["Adj Close"].values.reshape(-1, 1)
#Scale Data
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_prices = scaler.fit_transform(prices)

LOOKBACK = 60

X, y = [], []

for i in range(LOOKBACK, len(scaled_prices)):
    X.append(scaled_prices[i-LOOKBACK:i, 0])
    y.append(scaled_prices[i, 0])

X = np.array(X)
y = np.array(y)
X = np.reshape(X, (X.shape[0], X.shape[1], 1))

train_size = int(len(X) - 365)

X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

model = Sequential([
    LSTM(50, return_sequences=True, input_shape=(LOOKBACK, 1)),
    Dropout(0.2),
    LSTM(50, return_sequences=False),
    Dropout(0.2),
    Dense(1)
])

model.compile(
    optimizer="adam",
    loss="mean_squared_error"
)

model.fit(
    X_train,
    y_train,
    epochs=25,
    batch_size=32,
    validation_data=(X_test, y_test),
    verbose=1
)


Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<keras.src.callbacks.History at 0x33eaeddd0>

In [86]:
pred_scaled = model.predict(X_test)

predictions = scaler.inverse_transform(pred_scaled)
actuals = scaler.inverse_transform(y_test.reshape(-1, 1))




In [87]:
test_dates = test_data.index

lstm_forecast = pd.Series(
    predictions.flatten(),
    index=test_dates,
    name="LSTM"
)

lstm_forecast

Date
2019-11-22    3066.595703
2019-11-23    3067.002197
2019-11-24    3067.052002
2019-11-25    3066.872803
2019-11-26    3068.035400
                 ...     
2020-11-16    3463.477783
2020-11-17    3475.557373
2020-11-18    3485.763672
2020-11-19    3491.388428
2020-11-20    3494.692139
Name: LSTM, Length: 365, dtype: float32

In [100]:
predicted_values = pd.DataFrame({
    "Date": prophet_forecast_test["ds"].tail(365).values,
    "Actual": test_data["Adj Close"].values,
    "ARIMA": arima_forecast_test.values,
    "LSTM": lstm_forecast.values,
    "Prophet": prophet_forecast_test["yhat"].tail(365).values
})
predicted_values

Unnamed: 0,Date,Actual,ARIMA,LSTM,Prophet
0,2019-11-22,3110.290039,3103.802830,3066.595703,3100.114948
1,2019-11-23,3110.290039,3103.794236,3067.002197,3102.558847
2,2019-11-24,3110.290039,3103.794517,3067.052002,3105.045461
3,2019-11-25,3133.639893,3103.794508,3066.872803,3107.506093
4,2019-11-26,3140.520020,3103.794508,3068.035400,3111.546245
...,...,...,...,...,...
360,2020-11-16,3626.909912,3103.794508,3463.477783,3656.940472
361,2020-11-17,3609.530029,3103.794508,3475.557373,3660.771186
362,2020-11-18,3567.790039,3103.794508,3485.763672,3664.680665
363,2020-11-19,3581.870117,3103.794508,3491.388428,3667.327661


In [106]:
import plotly.express as px

df_long = predicted_values.melt(
    id_vars="Date",
    value_vars=["Actual", "ARIMA", "Prophet", "LSTM"],
    var_name="Model",
    value_name="Value"
)


fig = px.line(
    df_long,
    x="Date",
    y="Value",
    color="Model",
    title="Actual vs Forecasts",
    labels={"Value": "Price", "Date": "Date"}
)#fig = px.line(predicted_values, x="Date", y="ARIMA", title='ARIMA')
fig.show()


In [108]:
#Mean Squared Error
from sklearn.metrics import mean_squared_error
mse_Prophet = mean_squared_error(predicted_values["Actual"], predicted_values["Prophet"])
mse_ARIMA = mean_squared_error(predicted_values["Actual"], predicted_values["ARIMA"])
mse_LSTM = mean_squared_error(predicted_values["Actual"], predicted_values["LSTM"])
mse_overall = (mse_Prophet + mse_ARIMA + mse_LSTM) / 3

mse_df = pd.DataFrame({"Model": ["Prophet", "ARIMA", "LSTM", "Mean"], "MSE": [mse_Prophet, mse_ARIMA, mse_LSTM, mse_overall]})

fig = px.bar(mse_df, x="Model", y="MSE", color="Model", title="Mean Squared Errors")
fig.show()

In [107]:
#Root Mean Squared Error
from sklearn.metrics import mean_squared_error
rmse_Prophet = mean_squared_error(predicted_values["Actual"], predicted_values["Prophet"], squared=False)
rmse_ARIMA = mean_squared_error(predicted_values["Actual"], predicted_values["ARIMA"], squared=False)
rmse_LSTM = mean_squared_error(predicted_values["Actual"], predicted_values["LSTM"], squared=False)
rmse_overall = (rmse_Prophet + rmse_ARIMA + rmse_LSTM) / 3

rmse_df = pd.DataFrame({"Model": ["Prophet", "ARIMA", "LSTM", "Mean"], "RMSE": [rmse_Prophet, rmse_ARIMA, rmse_LSTM, rmse_overall]})

fig = px.bar(rmse_df, x="Model", y="RMSE", color="Model", title="Root Mean Squared Errors")
fig.show()


'squared' is deprecated in version 1.4 and will be removed in 1.6. To calculate the root mean squared error, use the function'root_mean_squared_error'.


'squared' is deprecated in version 1.4 and will be removed in 1.6. To calculate the root mean squared error, use the function'root_mean_squared_error'.


'squared' is deprecated in version 1.4 and will be removed in 1.6. To calculate the root mean squared error, use the function'root_mean_squared_error'.

