In [1]:
# Importing the libraries 
import warnings 
import itertools
'''import itertools: This imports the Python itertools module, which provides various functions for 
working with iterators and iterable objects. In this code, itertools.product is used to generate 
combinations of parameters for the SARIMA model.'''
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
'''import matplotlib.pyplot as plt: This imports the pyplot module from the Matplotlib library 
and aliases it as plt. Matplotlib is a popular library for creating visualizations in Python.'''

warnings.filterwarnings("ignore")
'''warnings.filterwarnings("ignore"): This line sets the warning filter to ignore all warning messages. 
It prevents warning messages from being printed during the program's execution.'''

plt.style.use('fivethirtyeight')
'''plt.style.use('fivethirtyeight'): This line sets the style of the Matplotlib plots to use the "fivethirtyeight" 
style, which mimics the visual style of the FiveThirtyEight website.'''

import statsmodels.api as sm
'''import statsmodels.api as sm: This imports the api module from the Statsmodels library and aliases it as sm. 
Statsmodels is a library for statistical modeling and hypothesis testing in Python.'''

import matplotlib 

matplotlib.rcParams['axes.labelsize'] = 14
'''matplotlib.rcParams['axes.labelsize'] = 14: This line sets the default font size for axis labels in 
Matplotlib plots to 14.'''

matplotlib.rcParams['xtick.labelsize'] = 12
'''This line sets the default font size for x-axis tick labels in Matplotlib plots to 12.'''

matplotlib.rcParams['ytick.labelsize'] = 12
'''matplotlib.rcParams['ytick.labelsize'] = 12: This line sets the default font size for y-axis tick 
labels in Matplotlib plots to 12.'''


matplotlib.rcParams['text.color'] = 'k'
# '''This line sets the default color for text (such as labels and annotations) in Matplotlib plots to black.'''

'This line sets the default color for text (such as labels and annotations) in Matplotlib plots to black.'

# Time series analysis and forecasting for furniture sales


In [3]:
# reading the dataset 
df = pd.read_excel('Superstore.xls')

In [4]:
df.head()

Unnamed: 0,Row ID,Order ID,Order Date,Ship Date,Ship Mode,Customer ID,Customer Name,Segment,Country,City,...,Postal Code,Region,Product ID,Category,Sub-Category,Product Name,Sales,Quantity,Discount,Profit
0,1,CA-2016-152156,2016-11-08,2016-11-11,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,...,42420,South,FUR-BO-10001798,Furniture,Bookcases,Bush Somerset Collection Bookcase,261.96,2,0.0,41.9136
1,2,CA-2016-152156,2016-11-08,2016-11-11,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,...,42420,South,FUR-CH-10000454,Furniture,Chairs,"Hon Deluxe Fabric Upholstered Stacking Chairs,...",731.94,3,0.0,219.582
2,3,CA-2016-138688,2016-06-12,2016-06-16,Second Class,DV-13045,Darrin Van Huff,Corporate,United States,Los Angeles,...,90036,West,OFF-LA-10000240,Office Supplies,Labels,Self-Adhesive Address Labels for Typewriters b...,14.62,2,0.0,6.8714
3,4,US-2015-108966,2015-10-11,2015-10-18,Standard Class,SO-20335,Sean O'Donnell,Consumer,United States,Fort Lauderdale,...,33311,South,FUR-TA-10000577,Furniture,Tables,Bretford CR4500 Series Slim Rectangular Table,957.5775,5,0.45,-383.031
4,5,US-2015-108966,2015-10-11,2015-10-18,Standard Class,SO-20335,Sean O'Donnell,Consumer,United States,Fort Lauderdale,...,33311,South,OFF-ST-10000760,Office Supplies,Storage,Eldon Fold 'N Roll Cart System,22.368,2,0.2,2.5164


In [5]:
furniture = df.loc[df['Category'] == 'Furniture']
# df['Category']: This extracts the 'Category' column from the DataFrame df.
# df['Category'] == 'Furniture': This creates a boolean Series with True for rows where the 'Category' 
# column is equal to 'Furniture' and False for rows where it is not.

# df.loc[...]: This is a pandas DataFrame method that is used to select rows based on a certain condition.
# It takes a boolean Series as an argument and returns only the rows where the condition is True.

In [6]:
furniture['Order Date'].min()

Timestamp('2014-01-06 00:00:00')

In [7]:
furniture['Order Date'].max()

Timestamp('2017-12-30 00:00:00')

In [8]:
cols = ['Row ID', 'Order ID', 'Ship Date', 'Ship Mode', 'Customer ID', 'Customer Name', 'Segment', 'Country', 'City', 'State', 'Postal Code', 'Region', 'Product ID', 'Category', 'Sub-Category', 'Product Name', 'Quantity', 'Discount', 'Profit']
furniture.drop(cols, axis=1, inplace=True)
furniture = furniture.sort_values('Order Date')

In [9]:
furniture.isnull().sum()

Order Date    0
Sales         0
dtype: int64

In [10]:
furniture = furniture.groupby('Order Date')['Sales'].sum().reset_index()


In [11]:
furniture.head()


Unnamed: 0,Order Date,Sales
0,2014-01-06,2573.82
1,2014-01-07,76.728
2,2014-01-10,51.94
3,2014-01-11,9.94
4,2014-01-13,879.939
