# **Model Forecast - Comparison and Improvement**

In [None]:
#Necessary Pip Installs 
!pip install pmdarima
!pip install prophet

In [None]:
#Imports
import pandas as pd
import numpy as np
import math
%matplotlib inline

# Load specific forecasting tools
# from statsmodels.tsa.arima_model import ARMA,ARMAResults,ARIMA,ARIMAResults
import statsmodels.api as sm
from matplotlib import pyplot
#ARIMA
from statsmodels.graphics.tsaplots import plot_acf,plot_pacf # for determining (p,q) orders
from statsmodels.tsa.statespace.tools import diff
from pmdarima import auto_arima # for determining ARIMA orders  
from statsmodels.tsa.arima.model import ARIMA
#Prophet
from prophet import Prophet
from prophet.diagnostics import cross_validation
from prophet.diagnostics import performance_metrics
from prophet.plot import plot_cross_validation_metric
from prophet.plot import plot_plotly, plot_components_plotly
#Metrics
from sklearn.metrics import mean_squared_error
from statsmodels.tools.eval_measures import rmse
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_absolute_percentage_error

from statsmodels.tsa.stattools import adfuller

In [None]:
#Import the Dataset
df = pd.read_excel('/content/drive/MyDrive/Github/m_tech/Datasets/input.xlsx')
df = df.dropna()
# arima_pred_df, arima_metrics_df = arima_model(df, 'base')
auto_arima_summary = arima_model(df, 'base')
print(auto_arima_summary)

In [None]:
def arima_model(arima_df,model_type):
  arima_df = arima_preprocess(arima_df)
  while True:
    check_stationarity = adf_test(arima_df['SalesVolume'])
    if not check_stationarity:
      arima_df['SalesVolume'] = diff(arima_df['SalesVolume'],k_diff=1)
      arima_df = arima_df.iloc[1:,:]
      continue
    else:
      break
  return auto_arima(arima_df['SalesVolume'],seasonal=False).summary()
  

In [None]:
#Preprocessing before feeding into the ARIMA models
def arima_preprocess(input_df):
  """
  Pass in the input df returns a dataframe after processing the date field
  """
  input_df['Date'] = pd.DatetimeIndex(input_df['Date'])
  input_df = input_df.groupby('Date').sum()
  #Settting a Monthly frequency and also to the first day of the month
  input_df = input_df.asfreq('MS')
  return input_df

In [None]:
#Stationarity Check
def adf_test(series,title=''):
    """
    Pass in a time series and an optional title, returns an ADF report
    """
    print(f'Augmented Dickey-Fuller Test: {title}')
    result = adfuller(series.dropna(),autolag='AIC') # .dropna() handles differenced data
    
    labels = ['ADF test statistic','p-value','# lags used','# observations']
    out = pd.Series(result[0:4],index=labels)

    for key,val in result[4].items():
        out[f'critical value ({key})']=val
        
    print(out.to_string())          # .to_string() removes the line "dtype: float64"
    
    if result[1] <= 0.05:
        print("Strong evidence against the null hypothesis")
        print("Reject the null hypothesis")
        print("Data has no unit root and is stationary")
        return True
    else:
        print("Weak evidence against the null hypothesis")
        print("Fail to reject the null hypothesis")
        print("Data has a unit root and is non-stationary")
        return False

In [None]:
#Metrics Evaluation
def metrics(model,test,predictions):
  print(f'{model} MAPE Error:',  mean_absolute_percentage_error(test['d1'], predictions))
  print(f'{model} MSE Error: ',mean_squared_error(test['SalesVolume'], predictions))
  print(f'{model} RMSE Error: ',rmse(test['SalesVolume'], predictions))
  print(f'{model} MAE Error: ',mean_absolute_error(test['SalesVolume'], predictions))

In [None]:
def auto_arima_train():
  # Setting the last 7 months for testing
  train = arima_df.iloc[:309]
  test = arima_df.iloc[309:]
  model=sm.tsa.statespace.SARIMAX(arima_df['SalesVolume'],order=(4, 0, 2))
  results=model.fit()
  start=len(train)
  end=len(train)+len(test)-1
  predictions_SARIMAX = results.predict(start=start, end=end).rename('SARIMAX(4, 0, 2) Predictions')

In [None]:
metrics('SARIMAX(4, 0, 2)',test,predictions_SARIMAX)

In [None]:
title = 'Sales volume'
ylabel='Volume'
xlabel='' # we don't really need a label here

ax = test['d1'].plot(legend=True,figsize=(12,6),title=title)
predictions_SARIMAX.plot(legend=True)
ax.autoscale(axis='x',tight=True)
ax.set(xlabel=xlabel, ylabel=ylabel);

In [None]:
input_df = df[:311].rename(columns={'Date':'ds', 'SalesVolume':'y'})
input_df.tail(5)

In [None]:
#Train and fit the model
def model_train_predict(input_df,params):
  m = Prophet( seasonality_mode='additive', 
            yearly_seasonality= True,
            daily_seasonality = False,
            weekly_seasonality = False,
            **params)
  m.fit(input_df)
  # Forcasting into the future
  future = m.make_future_dataframe(periods=9,freq='MS')
  forecast = m.predict(future)
  return m, forecast

In [None]:
default_params = {  
    'changepoint_prior_scale': 0.05,
    'changepoint_range' : 0.8,
    'seasonality_prior_scale': 10.0,
}

In [None]:
model , forecast = model_train_predict(input_df,default_params)

In [None]:
def evaluation_metrics(forecast):
  metric_df = forecast[['ds','yhat']][310:]
  metric_df['Actual'] = df['SalesVolume'][310:]
  mae = mean_absolute_error(metric_df['yhat'], metric_df['Actual'])
  mape = mean_absolute_percentage_error(metric_df['yhat'], metric_df['Actual'])
  rmse = math.sqrt(mean_squared_error(metric_df['yhat'], metric_df['Actual']))
  print("Mean absolute error:", mae)
  print("Mean absolute percentage error:", mape)
  print("Root mean squared error:", rmse)

In [None]:
evaluation_metrics(forecast)

In [None]:
predictions_SARIMAX

In [None]:
metric_df = forecast[['ds','yhat']][310:]

In [None]:
metric_df

In [None]:
df[310:]