In [None]:
import pandas as pd
import numpy as np
from matplotlib import pyplot

from sklearn.metrics import mean_squared_error
from plotly.subplots import make_subplots

from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.arima.model import ARIMA

In [None]:
# Read in the csv file using pandas
data = pd.read_csv('sales_data.csv',
                   parse_dates ={"date" : ["Year","Month"]})

In [None]:
FID2C0_data = data[(data['Channel']=='ECOM')
&(data['Country']=='Slovakia')
&(data['Product Group Desc.'].str.contains("4"))]

In [None]:
#Drop unimportant columns
FID2C0 = FID2C0_data.iloc[:,[0,5]]
FID2C0.rename(columns={'Sum of Sold QTY':'value'}, inplace=True)

In [None]:
# Set the 'date' column as the index of the dataframe
FID2C0_sales = FID2C0.copy()
FID2C0_sales.set_index('date', inplace=True)

In [None]:
#Resample to quarterly frequency
FID2C0_mon = FID2C0_sales.resample('Q').sum()
FID2C0_mon.shape

In [None]:
# check for trend/seasonality/residual 
result = seasonal_decompose(FID2C0_mon.value, freq=4)
result.plot()

In [None]:
# evaluate an ARIMA model for a given order (p,d,q)
def evaluate_arima_model(X, arima_order):
	# prepare training dataset
	train_size = int(len(X) * 0.7)
	train, test = X[0:train_size], X[train_size:]
	history = [x for x in train]
	# make predictions
	predictions = list()
	for t in range(len(test)):
		model = ARIMA(history, order=arima_order)
		model_fit = model.fit()
		yhat = model_fit.forecast()[0]
		predictions.append(yhat)
		history.append(test[t])
	# calculate out of sample error
	error = mean_squared_error(test, predictions)
	return error

In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
# evaluate combinations of p, d and q values for an ARIMA model
def evaluate_models(dataset, p_values, d_values, q_values):
	dataset = dataset.astype('float32')
	best_score, best_cfg = float("inf"), None
	for p in p_values:
		for d in d_values:
			for q in q_values:
				order = (p,d,q)
				try:
					mse = evaluate_arima_model(dataset, order)
					if mse < best_score:
						best_score, best_cfg = mse, order
					print('ARIMA%s MSE=%.3f' % (order,mse))
				except:
					continue
	print('Best ARIMA%s MSE=%.3f' % (best_cfg, best_score))

In [None]:
# evaluate parameters
p_values = [0, 1, 2, 4, 6,8,10]
d_values = range(0, 3)
q_values = range(0, 3)
warnings.filterwarnings("ignore")
evaluate_models(FID2C0_mon.values, p_values, d_values, q_values)

In [None]:
import numpy
# fit model
model = ARIMA(FID2C0_mon.value, order=(0,0,2))
model_fit = model.fit()

In [None]:
# multi-step out-of-sample forecast
forecast = model_fit.forecast(steps=8)
print(forecast.round(0))

In [None]:
#export to XLXs
import xlwt
from tempfile import TemporaryFile
book = xlwt.Workbook()
sheet1 = book.add_sheet('sheet1')

for i,e in enumerate(forecast.round(2)):
    sheet1.write(i,1,e)

name = "random.xls"
book.save(name)
book.save(TemporaryFile())