In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import sys
sys.path.insert(0, '..')
from fashion import utils

font = {'size': 15}
matplotlib.rc('font', **font)

# load the data

In [None]:
def load_sales(path):

    # load
    df = pd.read_csv(path)

    # translate the columns
    new_columns = ['Store_Key', 'Receipt_Key', 'Date', 'Hour', 'EAN', 'Volume', 'Net_Income']
    df.rename(inplace=True, columns=dict(zip(df.columns, new_columns)))

    return df

sales = load_sales(utils.loc / 'data' / '20200120_sales17.csv')

# initial sales analysis

In [None]:
# analyse the net income per sale in complete and restricted range
bins=100
for sale_min, sale_max in [(np.min(sales.Net_Income), np.max(sales.Net_Income)),
                          (0, 1000)]:
    hist, edges = np.histogram(sales.Net_Income, bins=bins, range=(sale_min, sale_max))
    centres = 0.5*(edges[:-1] + edges[1:])

    fig, ax = plt.subplots(1, 2, figsize=(12,6))
    ax[0].hist(centres, bins=bins, range=(sale_min,sale_max), weights=hist)
    ax[0].set_yscale('log')
    ax[0].set_xlabel('Net income per sale (€)')
    ax[0].set_ylabel('Number of sales')
    ax[1].hist(centres, bins=bins, range=(sale_min,sale_max), weights=abs(hist*centres))
    ax[1].set_yscale('log')
    ax[1].set_xlabel('Net income per sale (€)')
    ax[1].set_ylabel('(Number of sales) x (Net Income of the sale)')
    plt.show()


Notes:
- There are some sales with VERY high net income, 10s of thousands of euros. Are these errors, i.e. 100,00 comma left out?
- Still, the majority of income comes from the low net income transactions, not the large ones. (Note y-axis scale)
- Similarly in the restricted range, it looks as if the only purchases that matter on the large scale are the sub 100 ones.
- Note that top right plot is very skewed as the centre of the bin is not representative of the mean cost of items inside the bin

- This must be a smaller company, the total sales reach 200-300M euros in 2017, small in comparsion to the italian market of 50B per year in 2014


In [None]:
sales