In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from pmdarima import auto_arima
from sklearn import metrics
from statsmodels.tsa.stattools import adfuller



In [2]:
df = pd.read_csv(r'G:\Study and WorkRelated\Book Proposal\book\Code and Data\Data\FB.csv')


In [3]:
df.head(10)

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2014-12-08,76.18,77.25,75.400002,76.519997,76.519997,25733900
1,2014-12-09,75.199997,76.93,74.779999,76.839996,76.839996,25358600
2,2014-12-10,76.650002,77.550003,76.07,76.18,76.18,32210500
3,2014-12-11,76.519997,78.519997,76.480003,77.730003,77.730003,33462100
4,2014-12-12,77.160004,78.879997,77.019997,77.830002,77.830002,28091600
5,2014-12-15,78.459999,78.580002,76.559998,76.989998,76.989998,29396500
6,2014-12-16,76.190002,77.389999,74.589996,74.690002,74.690002,31554600
7,2014-12-17,75.010002,76.410004,74.900002,76.110001,76.110001,29203900
8,2014-12-18,76.889999,78.400002,76.510002,78.400002,78.400002,34222100
9,2014-12-19,78.75,80.0,78.330002,79.879997,79.879997,43335000


In [30]:
def timeseries_evaluation_metrics_func(y_true, y_pred):
    
    def mean_absolute_percentage_error(y_true, y_pred): 
        y_true, y_pred = np.array(y_true), np.array(y_pred)
        return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
    print('Evaluation metric results:-')
    print(f'MSE is : {metrics.mean_squared_error(y_true, y_pred)}')
    print(f'MSE is : {metrics.mean_absolute_error(y_true, y_pred)}')
    print(f'RMSE is : {np.sqrt(metrics.mean_squared_error(y_true, y_pred))}')
    print(f'MAPE is : {mean_absolute_percentage_error(y_true, y_pred)}')
    print(f'R2 is : {metrics.r2_score(y_true, y_pred)}',end='\n\n')

In [21]:
def Augmented_Dickey_Fuller_Test_func(series, column_name= ''):
    result = adfuller(series.dropna(),autolag='AIC')
    labels = ['ADF test statistic','p value','No lags used','No observations']
    out = pd.Series(result[0:4],index=labels)
    print(f'Performing Augmented Dickey Fuller Test for: {column_name}')
    print(out.to_string())
    if result[1] <= 0.05:
        print("Reject the null hypothesis")
        print("Data is stationary")
    else:
        print("Fail to reject the null hypothesis")
        print("Data is non-stationary")
        

In [22]:
Augmented_Dickey_Fuller_Test_func(df['Close' ])


Performing Augmented Dickey Fuller Test for: 
ADF test statistic      -1.338096
p value                  0.611568
No lags used             0.000000
No observations       1258.000000
Fail to reject the null hypothesis
Data is non-stationary


In [23]:
df['Close_diff'] = df['Close'].diff()

In [24]:
df.dropna(inplace=True)

In [25]:
X = df[['Close_diff' ]]
train, test = X[0:-30], X[-30:]

In [26]:
stepwise_model = auto_arima(train, trace=True,seasonal=False,stepwise=True,suppress_warnings=True)



Fit ARIMA: order=(2, 0, 2); AIC=nan, BIC=nan, Fit time=nan seconds
Fit ARIMA: order=(0, 0, 0); AIC=5929.302, BIC=5939.528, Fit time=0.002 seconds
Fit ARIMA: order=(1, 0, 0); AIC=5930.825, BIC=5946.165, Fit time=0.011 seconds
Fit ARIMA: order=(0, 0, 1); AIC=5930.810, BIC=5946.149, Fit time=0.016 seconds
Fit ARIMA: order=(1, 0, 1); AIC=5926.792, BIC=5947.245, Fit time=0.251 seconds
Fit ARIMA: order=(2, 0, 1); AIC=5928.521, BIC=5954.087, Fit time=0.345 seconds
Fit ARIMA: order=(1, 0, 2); AIC=nan, BIC=nan, Fit time=nan seconds
Total fit time: 0.746 seconds




In [27]:
stepwise_model.summary()

0,1,2,3
Dep. Variable:,y,No. Observations:,1228.0
Model:,"ARMA(1, 1)",Log Likelihood,-2959.396
Method:,css-mle,S.D. of innovations,2.694
Date:,"Wed, 18 Dec 2019",AIC,5926.792
Time:,21:42:26,BIC,5947.245
Sample:,0,HQIC,5934.488
,,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,0.0892,0.056,1.583,0.114,-0.021,0.200
ar.L1.y,0.8630,0.072,11.942,0.000,0.721,1.005
ma.L1.y,-0.8998,0.062,-14.514,0.000,-1.021,-0.778

0,1,2,3,4
,Real,Imaginary,Modulus,Frequency
AR.1,1.1587,+0.0000j,1.1587,0.0000
MA.1,1.1113,+0.0000j,1.1113,0.0000


In [28]:
forecast = stepwise_model.predict(n_periods=30)
forecast = pd.DataFrame(forecast,columns=['close_pred'])

In [31]:
timeseries_evaluation_metrics_func(test, forecast)

Evaluation metric results:-
MSE is : 3.0708298637484064
MSE is : 1.4125073866548123
RMSE is : 1.752378344921098
MAPE is : 111.66497928414312
