In [2]:
#library
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sms
import pandas as pd

%matplotlib inline

In [None]:
#Loading the dataset and converting time column in datetime format 
df=pd.read_excel('abc.xlsx')
#print('Shape of data',df.shape)
## Drop 1st Column
df.drop('Title',axis=1,inplace=True)
df['time']=df['time'].apply(lambda x: pd.Timestamp(x).strftime('%Y-%m-%d %H:%M' ))
df

In [None]:
df.set_index('time',inplace=True)
df


In [None]:
### Testing For Stationarity
# Method 1 -  Rolling Statistics
# Method 2 -  Duckey fuller   <-----

from statsmodels.tsa.stattools import adfuller 
def adf_test(series):
    result=adfuller(series)
    print('ADF Statistics: {}'.format(result[0]))
    print('p- value: {}'.format(result[1]))
    if result[1] <= 0.05:
        print("strong evidence against the null hypothesis, reject the null hypothesis. Data has no unit root and is stationary")
    else:
        print("weak evidence against null hypothesis, time series has a unit root, indicating it is non-stationary ")
        
adf_test(df['views'])

In [None]:
# If model is stationary,below graph To get the p,q & d value from the ACF and PACF plot
from statsmodels.graphics.tsaplots import plot_acf,plot_pacf
acf = plot_acf(df["views"].dropna())
pacf = plot_pacf(df["views"].dropna())

In [None]:
#First differencing for making the data stationary
df['Views f Difference'] = df['views'] - df['views'].shift(1)
df['views'].shift(1)

In [None]:
## Again test dickey fuller test
adf_test(df['Views f Difference'].dropna())

In [None]:
#Second differencing for making the data stationary, if first differencing didnt worked
df['Views s Difference'] = df['Views f Difference'] - df['Views f Difference'].shift(1)
df['Views f Difference'].shift(1)

In [None]:
## Again test dickey fuller test for second differncing variable
adf_test(df['Views s Difference'].dropna())

In [None]:
#Third differencing for making the data stationary, if second differencing didnt worked
df['Views t Difference'] = df['Views s Difference'] - df['Views s Difference'].shift(1)
df['Views s Difference'].shift(1)

In [None]:
## Again test dickey fuller test of third differencing
adf_test(df['Views t Difference'].dropna())

In [None]:
#Once the data is stationary,To get the p,q & d value from the ACF and PACF plot
from statsmodels.graphics.tsaplots import plot_acf,plot_pacf
acf = plot_acf(df["Views f Difference"].dropna())
pacf = plot_pacf(df["Views f Difference"].dropna())

In [None]:
len(df)

In [17]:
x= df.iloc[:x,:] ##train dataset  80% of full dataset
y=df.iloc[x:y,:]###test_dataset   20 % data for training


In [18]:
##prediction
pred_start_date=y.index[0]
pred_end_date=y.index[-1]

In [None]:
## create a ARIMA model
from statsmodels.tsa.arima_model import ARIMA
model_ARIMA=sms.tsa.arima.ARIMA(y['views'],order=(p,d,q))  # value of p , d, q
model=model_ARIMA.fit()
print(model.summary())

In [None]:
##prediction
pred_start_date=y.index[0]
pred_end_date=y.index[-1]
print(pred_start_date)
print(pred_end_date)

In [None]:
pred=model.predict(start=pred_start_date,end=pred_end_date)
residuals=y['views']-pred

In [None]:
pred

In [None]:
y['Predicted_ARIMA']=pred
print(pred)

In [24]:
y.reset_index('time',inplace=True)
df1=pd.DataFrame({'time':y['time'],'View':y['views'],'Predicted_ARIMA':y['Predicted_ARIMA']})


In [None]:

!pip install xlsxwriter

In [None]:

#future dates 
index_future_dates=pd.date_range(start='', end='',freq='5min')   # for start variable take the last time stamp and for end variable the timetill you want to predict
pred=model.predict(start=len(df1),end=len(df1)+25,typ='levels').rename('ARIMA PREDICTION')
pred.index=index_future_dates
print(pred)

In [None]:
writer=pd.ExcelWriter('result.xlsx',engine='xlsxwriter')
pred.to_excel(writer,sheet_name='Sheet1',index=True)
writer.close()