In [1]:

pip install matplotlib

Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
import warnings
import itertools
warnings.filterwarnings("ignore")
plt.style.use('fivethirtyeight')
import statsmodels.api as sm




In [None]:
df = pd.read_csv("SuperStore.csv")
df.head()


In [None]:
df.shape

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
df.isnull().sum()

In [None]:
furniture = df.loc[df['Category'] == 'Furniture']
print(furniture)
furniture.head()

In [None]:
furniture.shape

In [None]:
df['Category'].unique()

In [None]:
furniture['Order Date'].min(), furniture['Order Date'].max()


# Data preprocessing (removing columns that we dont need)



In [None]:
cols = ['Row ID', 'Order ID', 'Ship Date', 'Ship Mode', 'Customer ID', 'Customer Name', 'Segment', 'Country', 'City', 'State', 'Postal Code', 'Region', 'Product ID', 'Category', 'Sub-Category', 'Product Name', 'Quantity', 'Discount', 'Profit']
furniture.drop(cols, axis=1, inplace=True)
furniture = furniture.sort_values('Order Date')


In [None]:
furniture.head(10)

In [None]:
furniture = furniture.groupby('Order Date')['Sales'].sum().reset_index()


In [None]:
furniture.head()

# Indexing with time series data¶

In [None]:
furniture["Order Date"] = pd.to_datetime(furniture["Order Date"])
furniture.set_index("Order Date", inplace = True)
furniture.index

In [None]:
y = furniture['Sales'].resample('MS').mean()

# Visualizing furniture sales time series data


In [None]:

y.plot()


In [None]:
from pylab import rcParams
rcParams['figure.figsize'] = 18, 8

decomposition = sm.tsa.seasonal_decompose(y, model='additive')
fig = decomposition.plot()

In [None]:
from statsmodels.tsa.stattools import adfuller



In [None]:
dftest= adfuller(y, autolag='AIC')
dfoutput= pd.Series(dftest[0:4], index=['ADF Test Statistic','p-value','#Lags Used','Number of Observations'])
for key, value in dftest[4].items():
 dfoutput['critical value(%s)'%key]= value



In [None]:
print(dfoutput)

In [None]:
from statsmodels.graphics.tsaplots import plot_acf,plot_pacf
import statsmodels.api as sm
fig = plt.figure(figsize=(12,8))
ax1 = fig.add_subplot(211)
fig = sm.graphics.tsa.plot_acf(y,lags=40,ax=ax1)
ax2 = fig.add_subplot(212)
fig = sm.graphics.tsa.plot_pacf(y,lags=40,ax=ax2)

from autocorrelation graph we can see that that p and q value that required for ARIMA MODEL IS 
***for Parameter P we see PACF GRAPH and the value is 1
** for parameter q we see ACF graph and the value is 2 i.e the first time that drops to zero


In [None]:
import statsmodels.api as sm


In [None]:

mod = sm.tsa.statespace.SARIMAX(y,
                                order=(3, 0, 2),
                                seasonal_order=(1, 1, 1, 12),
                                enforce_stationarity=False,
                                enforce_invertibility=False)

results = mod.fit()

print(results.summary().tables[1])


In [None]:
results.plot_diagnostics(figsize=(12, 10))
plt.show()

In [None]:
pred = results.get_prediction(start=pd.to_datetime('2017-01-01'), dynamic=False)
pred_ci = pred.conf_int()

ax = y['2014':].plot(label='observed')
pred.predicted_mean.plot(ax=ax, label='One-step ahead Forecast', alpha=.7, figsize=(14, 7))

ax.fill_between(pred_ci.index,
                pred_ci.iloc[:, 0],
                pred_ci.iloc[:, 1], color='k', alpha=.2)

ax.set_xlabel('Date')
ax.set_ylabel('Furniture Sales')
plt.legend()

In [None]:
y_forecasted = pred.predicted_mean
y_truth = y['2017-01-01':]

# Compute the mean square error
mse = ((y_forecasted - y_truth) ** 2).mean()
print('The Mean Squared Error of our forecasts is {}'.format(round(mse, 2)))

In [None]:
print('The Root Mean Squared Error of our forecasts is {}'.format(round(np.sqrt(mse), 2)))

In [None]:
pred_uc = results.get_forecast(steps=100)
pred_ci = pred_uc.conf_int()

ax = y.plot(label='observed', figsize=(14, 7))
pred_uc.predicted_mean.plot(ax=ax, label='Forecast')
ax.fill_between(pred_ci.index,
                pred_ci.iloc[:, 0],
                pred_ci.iloc[:, 1], color='k', alpha=.25)
ax.set_xlabel('Date')
ax.set_ylabel('Furniture Sales')

plt.legend()
plt.show()

# fb prophet

In [None]:
from fbprophet import Prophet
furniture = furniture.rename(columns={'Order Date': 'ds', 'Sales': 'y'})
furniture_model = Prophet(interval_width=0.95)
furniture_model.fit(furniture)
furniture_forecast = furniture_model.make_future_dataframe(periods=36, freq='MS')
furniture_forecast = furniture_model.predict(furniture_forecast)
plt.figure(figsize=(18, 6))
furniture_model.plot(furniture_forecast, xlabel = 'Date', ylabel = 'Sales')
plt.title('Furniture Sales');