Introduction

Importing libraries pandas, numpy, matplotlib, seaborn

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

Reading sales table data

In [None]:
sales_table = pd.read_csv('../data/generated/sales_data_sample_CLEAN.csv', encoding='CP1252')

Exploring orders stats

In [None]:
round(sales_table['ORDERLINENUMBER'].mean(), 2)

Average line number per order is 6.47

In [None]:
round((sales_table.groupby(['ORDERNUMBER'])['SALES'].sum()).mean(), 2)

Average sales per order is 32679.57

Checking year to year sales

In [None]:
sales_per_year = sales_table.groupby(['YEAR_ID'])['SALES'].sum()
sales_per_year

In [None]:
plt.figure(figsize=(10, 6))
sns.barplot(data=sales_per_year)
plt.title('')
plt.xticks(rotation=45)

for i, (year, sales) in enumerate(sales_per_year.items()):
    plt.text(i, sales, f"${sales:,.0f}", 
             ha='center', va='bottom', fontsize=9)
plt.tight_layout()
plt.show()

In [None]:
sales_table.loc[sales_table['YEAR_ID']==2005, 'MONTH_ID'].unique()

Year 2005 data is only till May

Projecting 2005 for the end of the year sales

In [None]:
sales_per_year[2005] = sales_per_year[2005] * (12/5)
sales_per_year

In [None]:
plt.figure(figsize=(10, 6))
sns.barplot(data=sales_per_year)
plt.title('')
plt.xticks(rotation=45)

for i, (year, sales) in enumerate(sales_per_year.items()):
    plt.text(i, sales, f"${sales:,.0f}", 
    ha='center', va='bottom', fontsize=9)
plt.tight_layout()
plt.show()

In [None]:
sales_per_quarter = sales_table.groupby(['YEAR_ID', 'QTR_ID'])['SALES'].sum().reset_index()
sales_per_quarter

Looks like there was rising in sales 2003 -> 2004, but then 2005 sales is projected to be a bit lower than 2004 sales

In [None]:
sales_per_quarter['Year_Quarter'] = sales_per_quarter['YEAR_ID'].astype(str) + ' Q' + sales_per_quarter['QTR_ID'].astype(str)

plt.figure(figsize=(10, 6))
sns.barplot(data=sales_per_quarter, x='Year_Quarter', y='SALES')
plt.title('')
plt.xticks(rotation=45)

for i, row in sales_per_quarter.iterrows():
    plt.text(i, row['SALES'], f"${row['SALES']:,.0f}", 
             ha='center', va='bottom', fontsize=9)
plt.tight_layout()
plt.show()


Looks like there's a trend of rising sales in Q3 over Q1, Q2 sales. Then sales are doubled in Q4 over Q3 sales

In [None]:
sales_per_month = sales_table.groupby(['YEAR_ID', 'MONTH_ID'])['SALES'].sum().reset_index()
sales_per_month

In [None]:
sales_per_month['Year_Month'] = sales_per_month['YEAR_ID'].astype(str) + ' M' + sales_per_month['MONTH_ID'].astype(str)

plt.figure(figsize=(10, 6))
sns.barplot(data=sales_per_month, x='Year_Month', y='SALES')
plt.title('')
plt.xticks(rotation=90)

for i, row in sales_per_month.iterrows():
    plt.text(i, row['SALES'], f"${row['SALES']:,.0f}", 
             ha='center', va='bottom', fontsize=9)
plt.tight_layout()
plt.show()


Looks like October and November are the best selling months. With November scoring double sales than October.