In [None]:
#import related libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
pd.set_option('display.max_columns', 200)
pd.set_option('display.max_colwidth', 400)
sns.set(context='notebook', style='whitegrid', rc={"figure.figsize": (18,4)})
#visualization setting
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
from matplotlib import rcParams
rcParams['figure.figsize'] = 18,4
# to get rid of the unnecessary statsmodels package warnings
import warnings
warnings.filterwarnings("ignore")
# setting random seed for notebook reproducability
rnd_seed=42
np.random.seed=rnd_seed
np.random.set_state=rnd_seed
#import ARIMA library
from pmdarima import auto_arima

In [None]:
dataset = pd.read_csv("SNP500Dataset.csv")
dataset = dataset.drop(columns=['Unnamed: 0'])  
dataset.head()

In [None]:
dataset.isnull().values.any()

In [None]:
#set the column 'Date' as the index of the dataframe
dataset['Date'] = pd.to_datetime(dataset['Date'])
dataset.set_index(keys=['Date'], drop=True, inplace=True)

In [None]:
#set 0 and negative values to 0.01 so that we can decompose the time series
for i in dataset.columns:
    dataset[i] = dataset[i].apply(lambda x : x if x > 0 else 0.01)

In [None]:
#note 
def forecast(timeseries_df, days_forecasted):
    stepwise_model = auto_arima(timeseries_df, start_p=1, start_q=1,
                           max_p=3, max_q=3, m=12,
                           start_P=0, seasonal=True,
                           d=1, D=1, trace=True,
                           error_action='ignore',  
                           suppress_warnings=True, 
                           stepwise=True) 
    print("Final Model's AIC=", stepwise_model.aic())
    future_forecast = stepwise_model.fit(timeseries_df).predict(n_periods=days_forecasted)
    future_index = pd.date_range(start=timeseries_df.iloc[-1] + pd.DateOffset(days=1), periods=30, freq='MS')
    future_forecast = pd.DataFrame(future_forecast, index=future_index, columns=['Prediction'])
    pd.concat([timeseries_df, future_forecast], axis=1).plot()