In [None]:
# Import all the important liberaries 
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
pd.set_option('display.max_columns', None)
import io
from google.colab import files
# Import Statsmodels
from statsmodels.tsa.api import VAR
from statsmodels.tsa.stattools import adfuller
from statsmodels.tools.eval_measures import rmse, aic
from pandas.tseries.offsets import DateOffset

In [None]:
# upload the data
uploaded = files.upload()

Saving priority.csv to priority (3).csv


In [None]:
df = pd.read_csv(io.BytesIO(uploaded.get('priority.csv')))

In [None]:
df['date'] = pd.to_datetime(df['date'])
df.dtypes

date            datetime64[ns]
Critical                 int64
High                     int64
low                      int64
Moderate                 int64
Schedule SLA             int64
dtype: object

In [None]:
# MAKING DATE COLUMN INTO DATETIME INDEX
final_df = df.set_index('date')
final_df.head(10)

Unnamed: 0_level_0,Critical,High,low,Moderate,Schedule SLA
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018-01-13,0,4,115,27,0
2018-01-14,0,7,122,39,1
2018-01-15,0,3,256,117,11
2018-01-16,0,5,255,113,4
2018-01-17,0,5,338,110,10
2018-01-18,0,0,283,83,14
2018-01-19,1,3,246,69,11
2018-01-20,1,3,186,30,3
2018-01-21,1,4,86,39,1
2018-01-22,0,3,278,110,9


Check for Stationarity and Make the Time Series Stationary
Since the VAR model requires the time series you want to forecast to be stationary, it is customary to check all the time series in the system for stationarity.

Just to refresh, a stationary time series is one whose characteristics like mean and variance does not change over time.

In [None]:
# FOR CHECKING TIME SERIES STATIONARY WE DO 'ADFULLER TEST' WHICH PERFORM THE HYPOTHITICAL TEST. IF THE p-value OF EACH COLUMNS IS GREATER THAN SIGNIFICANT VALUE i.e. 0.05 
#THAN ITS MEANS 'SERIES IS NOT STATIONARY' ELSE "SERIES IS STATIONARY"


def adfuller_test(series, signif=0.05, name='', verbose=False):
    """Perform ADFuller to test for Stationarity of given series and print report"""
    r = adfuller(series, autolag='AIC')
    output = {'test_statistic':round(r[0], 4), 'pvalue':round(r[1], 4), 'n_lags':round(r[2], 4), 'n_obs':r[3]}
    p_value = output['pvalue']
    return p_value

In [None]:
# IF THE SERIES IS NOT STATIONARY THAN MAKE IT STATIONARY BY DOING DIFFERENCE TILL IT BECOME STATIONARY.
# IN THIS FUNCTIO

def p_values(df):
  check_status = False 
  while  not check_status:    
    column_list = df.columns
    p_value_list = []
    for item in column_list:
      df_temp = df[item]
      p_value = adfuller_test(df_temp,item)
      p_value_list.append(p_value)  
    check_p_value = [item for item in p_value_list if item >0.05]
    if len(check_p_value)>0:
      df = df.diff().dropna()
      check_status = False
    else:
      check_status = True
  return df


p_values(final_df)

Unnamed: 0_level_0,Critical,High,low,Moderate,Schedule SLA
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018-01-14,0.0,3.0,7.0,12.0,1.0
2018-01-15,0.0,-4.0,134.0,78.0,10.0
2018-01-16,0.0,2.0,-1.0,-4.0,-7.0
2018-01-17,0.0,0.0,83.0,-3.0,6.0
2018-01-18,0.0,-5.0,-55.0,-27.0,4.0
...,...,...,...,...,...
2018-11-23,0.0,2.0,137.0,63.0,4.0
2018-11-24,0.0,-2.0,-73.0,-48.0,-5.0
2018-11-25,0.0,1.0,-114.0,-22.0,-1.0
2018-11-27,0.0,-5.0,-2.0,-6.0,0.0


In [None]:
def list_aic(count):
  df = p_values(final_df)
  list_aic=[]
  model = VAR(df)
  for i in np.arange(1,count):
    result = model.fit(i)
    list_aic.append([i,result.aic,result.bic,result.fpe,result.hqic])
  aic_df= pd.DataFrame(list_aic,columns=['count','aic','bic','fpe','hqic'])
  aic_df = aic_df.sort_values(by=['aic','bic']).reset_index()
  lag = aic_df['count'][0]
  return lag

list_aic(9) 



7

In [None]:
def future_date(df):
  future_dates=[df.index[-1]+ DateOffset(days=x)for x in range(0,8)]
  return future_dates

future_date(final_df)[1:]

[Timestamp('2018-11-29 00:00:00'),
 Timestamp('2018-11-30 00:00:00'),
 Timestamp('2018-12-01 00:00:00'),
 Timestamp('2018-12-02 00:00:00'),
 Timestamp('2018-12-03 00:00:00'),
 Timestamp('2018-12-04 00:00:00'),
 Timestamp('2018-12-05 00:00:00')]

In [None]:
def forecast_at_1d():
  df_differenced = p_values(final_df)
  model = VAR(df_differenced)
  model_fitted = model.fit(list_aic(9))
  lag_order = model_fitted.k_ar

# Input data for forecasting
  forecast_input_1 = df_differenced.values[-lag_order:]
  
  fc_var = model_fitted.forecast(y=forecast_input_1, steps=7)
  df_forecast_var = pd.DataFrame(fc_var,index=future_date(final_df)[1:],columns=final_df.columns + '_1d')
  return df_forecast_var


forecast_at_1d()




Unnamed: 0,Critical_1d,High_1d,low_1d,Moderate_1d,Schedule SLA_1d
2018-11-29,-0.028297,2.95155,62.367056,12.338334,1.774094
2018-11-30,-0.059157,1.477447,-51.156532,-19.356412,-3.586661
2018-12-01,0.109174,-1.585122,105.568283,47.486559,6.863094
2018-12-02,-0.135909,0.210426,-54.165835,-28.440754,-2.396148
2018-12-03,-0.117382,0.536573,-82.466936,-28.614637,-0.835129
2018-12-04,0.160487,-0.95279,17.177639,3.150482,-5.314238
2018-12-05,-0.070449,-0.291012,23.642516,14.114442,4.131463


In [None]:
def forecasted(df):
    """Revert back the differencing to get the forecast to original scale."""
    df_fc = forecast_at_1d()
    columns = df.columns
    for col in columns:
        # Roll back 1st Diff
        df_fc[str(col)+'_forecast'] = df[col].iloc[-1] + df_fc[str(col)+'_1d'].cumsum()
    return df_fc

In [None]:
def forecast_feature(df):
  forecast_col = [col for col in forecasted(final_df) if col.endswith('forecast')]
  return forecast_col

In [None]:
def VAR_forecast(df):
  forecast_result = forecasted(df).loc[:,forecast_col].apply(np.ceil).abs().astype(int)
  return forecast_result


In [None]:
VAR_forecast(final_df)



Unnamed: 0,Critical_forecast,High_forecast,low_forecast,Moderate_forecast,Schedule SLA_forecast
2018-11-29,0,3,63,14,2
2018-11-30,0,5,12,6,1
2018-12-01,1,3,117,42,6
2018-12-02,0,4,63,14,3
2018-12-03,0,4,19,15,2
2018-12-04,0,3,2,12,3
2018-12-05,0,3,21,2,1


In [None]:
df

Unnamed: 0,A,B,C,D,A_diff
0,5,11,4,5,
1,3,2,3,4,2.0
2,6,4,8,2,5.0
3,4,3,5,8,3.0


In [None]:
df_diff

Unnamed: 0,A,B,C,D
0,,,,
1,-2.0,-9.0,-1.0,-1.0
2,3.0,2.0,5.0,-2.0
3,-2.0,-1.0,-3.0,6.0


In [None]:
def list_aic(df,count):
  list_aic=[]
  model = VAR(df)
  for i in np.arange(1,count):
    result = model.fit(i)
    list_aic.append([i,result.aic,result.bic,result.fpe,result.hqic])
  aic_df= pd.DataFrame(list_aic,columns=['count','aic','bic','fpe','hqic'])
  aic_df = aic_df.sort_values(by=['aic','bic']).reset_index()
  lag = aic_df['count'][0]
  return lag

list_aic(df_differenced,9) 