In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.pylab import rcParams
import seaborn as sns
rcParams['figure.figsize'] = (14,8)
sns.set_style('darkgrid')


from statsmodels.graphics.tsaplots import plot_acf,plot_pacf
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.stattools import adfuller

import warnings
warnings.filterwarnings('ignore')

In [None]:
df = pd.read_csv("C:/Users/Abhishek/anaconda3/DS/gold.csv")
df.Date = pd.to_datetime(df.Date,dayfirst=True)
df.set_index('Date',inplace=True)
df = df.asfreq('b','ffill')
del df['Currency']
df.head(5)

In [None]:
df.info()

In [None]:
df.describe().T

In [None]:
data = df[df.index > '2021']

In [None]:
fig,axes = plt.subplots(2,2,figsize=[15,7])

axes[0,0].plot(data.Close)
axes[0,0].set_title("Daily",size=16)

axes[0,1].plot(data.Close.resample('M').mean())
axes[0,1].set_title("Monthly",size=16)


axes[1,0].plot(data.Close.resample('Q').mean())
axes[1,0].set_title('Quarterly',size=16)

axes[1,1].plot(data.Close.resample('A').mean())
axes[1,1].set_title('Annualy',size=16)

plt.tight_layout()
plt.show()


In [None]:
decompose_result = seasonal_decompose(data['Close'])
trend = decompose_result.trend
seasonal = decompose_result.seasonal
residual = decompose_result.resid
decompose_result.plot();

In [None]:
def stationarity_check(series):
    print('Results of Dickey Fuller Test:')
    dftest = adfuller(series, autolag='AIC') 

    dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value',
                                             '#Lags Used','Number of Observations Used'])
    for key,value in dftest[4].items():
        dfoutput['Critical Value (%s)'%key] = value
        
    print(dfoutput)

In [None]:
def plot_rolling_stats(series,window):
    
    rol_mean = series.rolling(window).mean()
    rol_std  = series.rolling(window).std()
    
    
    fig = plt.figure(figsize=(10,5))
    orig = plt.plot(series,color='blue',label='Original')
    mean = plt.plot(rol_mean,color='red',label='Rolling mean')
    std  = plt.plot(rol_std,color='black',label='Rolling std')
    
    plt.title('Rolling Mean/Standard Deviation',size=20)
    plt.legend(loc='best')
    plt.show(block=False)

In [None]:
plot_rolling_stats(data.Close,30)
stationarity_check(data.Close)

In [None]:
plot_rolling_stats(data.Close.diff()[1:],30)
stationarity_check(data.Close.diff()[1:]

In [None]:
fig = plt.figure(figsize=(14,5))
ax_1 = fig.add_subplot(121)
plot_pacf(data.Close,lags=20,zero=False,ax=ax_1)

ax_2 = fig.add_subplot(122)
plot_acf(data.Close,lags=20,zero=False,ax=ax_2);

In [None]:
class Data:
    def __init__(self,dataset,window,forecast_steps=1):
        self.dataset = dataset
        self.window = window
        self.steps = forecast_steps
        self.index = 0
    
    def get_train(self):
        training_data = self.dataset[self.index:self.index+self.window]
        return training_data.values
    
    def get_test(self):
        test_index = self.index+self.window
        test_data = self.dataset[test_index:test_index+self.steps]
        return test_data.values
    
    def get_data(self):
        train = self.get_train() 
        test  = self.get_test()
        self.index+= self.steps
        return train, test

In [None]:
steps = 1 ## day ahead
window = 5 ## business week

iterations = int(len(data.Close[window:])/steps)
predictions = []

dataset = Data(data.Close,window,steps)
try:
    for _ in range(iterations):
        train,test = dataset.get_data()
        model = ARIMA(train,order=(2,1,1)).fit()
        predictions.append(model.forecast(steps=steps)[0]) 
except IndexError:
    print('problem')

In [None]:
date_index = pd.date_range(start='2021-01-11',end=data.index[-1].strftime("%Y-%m-%d"),freq='b') 
df_pred = pd.DataFrame({'predictions':predictions},index=date_index)

In [None]:
plt.plot(df_pred,color='red',label='predictions')
plt.plot(data.Close[window:],color='blue',label='original')
plt.title('Original vs Predictions',size=20)
plt.legend(loc='best');

In [None]:
from sklearn.metrics import mean_absolute_percentage_error,mean_absolute_error
print(f"Mean Absolute Error: {mean_absolute_error(data.Close[window:],predictions)}")
print(f"Mean Absolute Percentage Error: {mean_absolute_percentage_error(data.Close[window:],predictions)}")