In [1]:
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import statsmodels.api as sm
import numpy as np
import scipy.stats as scs
from datetime import datetime
from statsmodels.tsa.arima_model import ARIMA
from dateutil.relativedelta import relativedelta
import warnings
import itertools
plt.style.use('fivethirtyeight')

df = pd.read_csv('MonthWiseSalesSum2.csv')

dates = df['Month']
sales = df['Sales']
months_forecast = 24
forecast_start_date = "2019-08-01"
date_format = "%Y-%m-%d"

train = pd.DataFrame({'Month': dates, 'Sales': sales})
train["Sales"] = train["Sales"].astype("float64")
train["Month"] = pd.to_datetime(train["Month"])
train = train.set_index(["Month"])
train.index = pd.DatetimeIndex(train.index.values, freq=train.index.inferred_freq)
train['Dates'] = train.index 

train['first_difference'] = train.Sales - train.Sales.shift(1)

train['seasonal_difference'] = train.Sales - train.Sales.shift(12)

train['seasonal_first_difference'] = train.first_difference - train.first_difference.shift(12)

train['seasonal_second_difference'] = train.Sales - train.Sales.shift(12) 


train2 = pd.DataFrame({'Month': train["Dates"], 'Sales': train["Sales"]})
train2["Sales"] = train2["Sales"].astype("float64")
train2["Month"] = pd.to_datetime(train2["Month"])
train2 = train2.set_index(["Month"])
train2.index = pd.DatetimeIndex(train2.index.values, freq=train2.index.inferred_freq) 

# Define the p, d and q parameters to take any value between 0 and 2
p = d = q = range(0, 2)

# Generate all different combinations of p, q and q triplets
pdq = list(itertools.product(p, d, q))

# Generate all different combinations of seasonal p, q and q triplets
seasonal_pdq = [(x[0], x[1], x[2], 12) for x in list(itertools.product(p, d, q))]

AIC = []
parm_ = []
parm_s = []
for param in pdq:
    for param_seasonal in seasonal_pdq:
		
        try:
			
            mod = sm.tsa.statespace.SARIMAX(train2, order=param, seasonal_order=param_seasonal, enforce_stationarity=False, 
                                            enforce_invertibility=False)

            results = mod.fit()
            AIC.append(results.aic)
            parm_.append(param)
            parm_s.append(param_seasonal)
        except:
            continue

pos = AIC.index(min(AIC))
order_param = parm_[pos]
seasonal_param = parm_s[pos]

mod = sm.tsa.statespace.SARIMAX(train.Sales, trend='n', order=order_param, seasonal_order=seasonal_param)
results = mod.fit()

start = pd.datetime.strptime(forecast_start_date, date_format)
date_list = [start + relativedelta(months=x) for x in range(0,months_forecast)]
future = pd.DataFrame(index=date_list, columns= train.columns)
train = pd.concat([train, future])

train['forecast'] = results.predict(start = len(df), end = len(train), dynamic= True)  
train[['Sales', 'forecast']].plot(figsize=(12, 5), kind='line')
plt.ylabel("Sales")
plt.xlabel("Years")
plt.title("Forecasted Sales For The Next "+str(months_forecast)+" months")
plt.show()
plt.savefig("sarimax_model_predictions.png")

dates = train.index
sales = train['Sales']
future = train['forecast']
out_df = pd.DataFrame({'Month': dates, 'Sales': sales, 'Forecast': future})
out_df["Sales"] = out_df["Sales"].astype("float64")
out_df["Forecast"] = out_df["Forecast"].astype("float64")
out_df["Month"] = pd.to_datetime(out_df["Month"])
out_df.reset_index(drop=True,inplace=True)

export_csv = out_df.to_csv (r'C:\Users\samva\Desktop\export_dataframe_of_sales_forecast2.csv', index = None, header=True)

FileNotFoundError: [Errno 2] File b'MonthWiseSalesSum2.csv' does not exist: b'MonthWiseSalesSum2.csv'

In [None]:
print(results.summary())

In [None]:
out_df.tail(48)