In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings
from statsmodels.tsa.stattools import adfuller, acf, pacf
from statsmodels.tsa.seasonal import seasonal_decompose
from pmdarima import auto_arima
from sklearn.metrics import mean_squared_error, mean_absolute_error


In [None]:
# Suppress warnings for clean output
warnings.filterwarnings('ignore')


In [None]:
# Load the data
df = pd.read_csv('Dataset/AAPL.csv', parse_dates=['Date'])
df = df[['Date', 'Close']]


In [None]:
# Setting the Date as Index and sorting
df.set_index('Date', inplace=True)
df.sort_index(inplace=True)


In [None]:
# Fill missing values using forward fill method
df['Close'].fillna(method='ffill', inplace=True)


In [None]:
# Plotting the time series
plt.figure(figsize=(10, 6))
plt.plot(df['Close'], label='Close Price')
plt.title('Apple Stock Prices')
plt.xlabel('Date')
plt.ylabel('Close Price')
plt.legend()
plt.show()


In [None]:
# Checking for stationarity using the Dickey-Fuller test
def test_stationarity(timeseries):
    result = adfuller(timeseries)
    print('ADF Statistic: %f' % result[0])
    print('p-value: %f' % result[1])
    print('Critical Values:')
    for key, value in result[4].items():
        print('\t%s: %.3f' % (key, value))
        
    return result[1]


In [None]:
# Perform the Dickey-Fuller test
p_value = test_stationarity(df['Close'])
if p_value > 0.05:
    print("Series is not stationary, differencing required.")
    df['Close_Diff'] = df['Close'].diff().dropna()
    test_stationarity(df['Close_Diff'].dropna())
else:
    print("Series is stationary, no differencing required.")
    df['Close_Diff'] = df['Close']


In [None]:
# Decompose the series to observe trend, seasonality, and residuals
decomposition = seasonal_decompose(df['Close'], model='multiplicative', period=30)
trend = decomposition.trend
seasonal = decomposition.seasonal
residual = decomposition.resid

plt.figure(figsize=(14, 7))
plt.subplot(411)
plt.plot(df['Close'], label='Original')
plt.legend(loc='best')
plt.subplot(412)
plt.plot(trend, label='Trend')
plt.legend(loc='best')
plt.subplot(413)
plt.plot(seasonal, label='Seasonality')
plt.legend(loc='best')
plt.subplot(414)
plt.plot(residual, label='Residuals')
plt.legend(loc='best')
plt.tight_layout()
plt.show()


In [None]:
# Auto ARIMA model fitting
model = auto_arima(df['Close'], 
                   start_p=1, start_q=1,
                   test='adf',  # Use ADF test to find optimal 'd'
                   max_p=3, max_q=3, 
                   m=1,              # Frequency of series
                   d=None,           # Let model determine 'd'
                   seasonal=False,   # No Seasonality
                   trace=True,       # Print status
                   error_action='ignore',  
                   suppress_warnings=True, 
                   stepwise=True)


In [None]:
# Print the summary of the model
print(model.summary())


In [None]:
# Train the model
train_size = int(len(df) * 0.8)
train, test = df.iloc[:train_size], df.iloc[train_size:]


In [None]:
# Fit the model
model.fit(train['Close'])


In [None]:
# Make predictions
forecast, conf_int = model.predict(n_periods=len(test), return_conf_int=True)
forecast_series = pd.Series(forecast, index=test.index)
lower_series = pd.Series(conf_int[:, 0], index=test.index)
upper_series = pd.Series(conf_int[:, 1], index=test.index)


In [None]:
# Plot the predictions
plt.figure(figsize=(10, 6))
plt.plot(train['Close'], label='Training Data')
plt.plot(test['Close'], label='Test Data')
plt.plot(forecast_series, label='Predicted Data')
plt.fill_between(lower_series.index, lower_series, upper_series, color='k', alpha=.15)
plt.title('Apple Stock Price Prediction')
plt.xlabel('Date')
plt.ylabel('Close Price')
plt.legend()
plt.show()


In [None]:
# Calculate error metrics
mae = mean_absolute_error(test['Close'], forecast_series)
rmse = np.sqrt(mean_squared_error(test['Close'], forecast_series))
print(f'Mean Absolute Error: {mae}')
print(f'Root Mean Squared Error: {rmse}')


In [None]:
# Forecasting future prices
future_forecast, future_conf_int = model.predict(n_periods=30, return_conf_int=True)
future_forecast_series = pd.Series(future_forecast, index=pd.date_range(start=test.index[-1] + pd.Timedelta(days=1), periods=30, freq='B'))
lower_future_series = pd.Series(future_conf_int[:, 0], index=future_forecast_series.index)
upper_future_series = pd.Series(future_conf_int[:, 1], index=future_forecast_series.index)


In [None]:
# Plot future predictions
plt.figure(figsize=(10, 6))
plt.plot(df['Close'], label='Historical Data')
plt.plot(future_forecast_series, label='Future Forecast')
plt.fill_between(lower_future_series.index, lower_future_series, upper_future_series, color='k', alpha=.15)
plt.title('Apple Stock Price Future Forecast')
plt.xlabel('Date')
plt.ylabel('Close Price')
plt.legend()
plt.show()
