# Clean Go at Par Formation

In [1]:
import pandas as pd
#import matplotlib.pyplot as plt
#import matplotlib.dates as mdates
import numpy as np
from datetime import datetime
from datetime import date

## Read the file in, from the "inputs" folder to Pandas

In [2]:
#DEAR -> REPORTS -> SALES BY PRODUCT DETAILS -> [select time frame] -> EXPORT -> EXCEL WITHOUT TOTALS

df = pd.read_excel('inputs/2022startthrough29may22.xlsx',engine='openpyxl',header=0,skiprows=5, parse_dates=False)#parse_dates didn't work, see below

## Start Cleaning and Featuring

In [3]:
#initial cleaning setup
retail_names = ['Airport','Pleasant Valley','Oak Hill'] # make sure this matches the above list length - no known automation possible
list_of_retail_stores = ['Customer Square ' + str(x+1) for x in range(len(retail_names))] #we have 3 stores active, we won't count the Warehouse
store_map = dict(zip(list_of_retail_stores,retail_names))

#ws = df[df.Customer.isin(list_of_retail_stores) == False] #WHOLESALE
df = df[df.Customer.isin(list_of_retail_stores) & df.Product.notnull()].reset_index(drop=True) #remove wholesale and weird null anomoly entries

### Check for duplicate products / DEAR errors (Backend inventory management system)

In [4]:
#df['SKU'] = df.SKU.apply(str)
temp = df.groupby(["Product","SKU"]).count().reset_index()
temp[temp['Product'].duplicated()]

Unnamed: 0,Product,SKU,Category,Invoice Date,Customer,Quantity,Sale,COGS,Profit


### Features Adding and Dropping

In [5]:
#start cleaning some of the unused or needed columns / rows, converting "Invoice Date" to datetime objects
df = df.drop(['SKU', 'COGS','Profit'], axis=1, inplace=False)#not trusted data columns or useful, we'll ignore them for now
df = df.rename(columns={'Invoice Date':'Date'})#renaming for legibility 

In [6]:
df['Date'] = pd.to_datetime(df['Date'], format='%d-%b-%Y')#get these dates converted from string to something more powerful
df['Month'] = df.Date.apply(lambda x: x.month)#.month_name() also an option but i dont trust yet
df['Weekday'] = df.Date.apply(lambda x: x.dayofweek)#Monday is 0, Sunday is 6, day_name() also an option but i dont trust yet
df['Week'] = df.Date.apply(lambda x: x.isocalendar()[1])#x.isocalendar().week
df["Unit"] = df.Sale / df.Quantity
df.head(2)

Unnamed: 0,Product,Category,Date,Customer,Quantity,Sale,Month,Weekday,Week,Unit
0,Matthiasson Cabernet Sauvignon 750ml,Red New World,2022-01-14,Customer Square 2,1,67.99,1,4,2,67.99
1,Matthiasson Cabernet Sauvignon 750ml,Red New World,2022-01-21,Customer Square 3,1,67.99,1,4,3,67.99


In [7]:
itnl_whiskey = ['Whiskey French', 'Whiskey Indian', 'Whiskey Mexican', 'Whiskey Taiwaneese', 'Whisky German']
df['Category'] = df.apply(lambda row: "Itnl Whiskey" if row.Category in itnl_whiskey else row.Category,axis=1)
#df[df.Category == 'Itnl Whiskey']

## Make the Par Suggestion DataFrame

In [8]:
df.head(2)

Unnamed: 0,Product,Category,Date,Customer,Quantity,Sale,Month,Weekday,Week,Unit
0,Matthiasson Cabernet Sauvignon 750ml,Red New World,2022-01-14,Customer Square 2,1,67.99,1,4,2,67.99
1,Matthiasson Cabernet Sauvignon 750ml,Red New World,2022-01-21,Customer Square 3,1,67.99,1,4,3,67.99


In [9]:
##explain
pars = df[df.Product != 'eGift Card'].groupby(['Product','Week','Customer']).Quantity.sum().reset_index()
pars[(pars.Customer == 'Customer Square 2') & (pars.Product == 'El Jimador Reposado 375ml')]

Unnamed: 0,Product,Week,Customer,Quantity
11689,El Jimador Reposado 375ml,2,Customer Square 2,1
11691,El Jimador Reposado 375ml,3,Customer Square 2,4
11692,El Jimador Reposado 375ml,4,Customer Square 2,3
11694,El Jimador Reposado 375ml,5,Customer Square 2,1
11696,El Jimador Reposado 375ml,6,Customer Square 2,1
11698,El Jimador Reposado 375ml,7,Customer Square 2,1
11703,El Jimador Reposado 375ml,11,Customer Square 2,1
11708,El Jimador Reposado 375ml,15,Customer Square 2,1
11710,El Jimador Reposado 375ml,16,Customer Square 2,1
11713,El Jimador Reposado 375ml,18,Customer Square 2,2


In [10]:
gather = []
for product in pars.Product.unique():
    for store in list_of_retail_stores:
        qs = pars[(pars.Product == product) & (pars.Customer == store)].Quantity.to_list()
        tl = len(qs)#temporary length
        while tl < 52: # eventually future proof this for any date range given
            qs.append(0)
        qs = np.array(qs)
        gather.append([product,store,tl,qs.mean(),qs.std(),qs.min(),qs.max(),np.percentile(qs,25),np.percentile(qs,50),np.percentile(qs,75)])
        
par_suggest = pd.DataFrame(gather,columns=['Product','Customer','Count','Mean','Std','Min','Max','25','50','75'])
#output.to_csv('parguide.csv',index=False)
#par_suggest.head()

MemoryError: 

In [None]:
par_suggest[(par_suggest.Customer == 'Customer Square 2') & (par_suggest.Product == 'El Jimador Reposado 375ml')].head(10)

## Read in current pars

In [None]:
#DEAR -> PRODUCTS -> EXPORT -> STOCK REORDER LOCATIONS

read_pars = pd.read_csv('StockReorderlocations_2022-01-21.csv',header=0,dtype=str)#parse_dates
read_pars.rename(columns={'ProductName':'Product','Location':'Customer'},inplace=True)
read_pars.drop(['StockLocator','PickZones'],inplace=True,axis=1)
read_pars.MinimumBeforeReorder = read_pars.MinimumBeforeReorder.astype(float)
read_pars.MinimumBeforeReorder = read_pars.MinimumBeforeReorder.astype(int)
read_pars.ReorderQuantity = read_pars.ReorderQuantity.astype(float)
read_pars.ReorderQuantity = read_pars.ReorderQuantity.astype(int)
#read_pars.info()

In [None]:
fixer = dict(zip(retail_names,list_of_retail_stores))
read_pars['Customer'] = read_pars.apply(lambda x: fixer[x.Customer] if x.Customer != 'Warehouse' else 'Warehouse',axis=1 )
#read_pars

In [None]:
#print(par_suggest.head())
#print(read_pars.head())
mess = par_suggest.merge(right=read_pars,on=['Product','Customer'])
mess['Unit'] = mess.apply(lambda x: df.loc[df.Product == x.Product].iloc[0].Unit,axis=1)
mess['RoundedMean'] = mess['Mean'].round()
mess['DiffMean'] = (mess['MinimumBeforeReorder'] - mess['RoundedMean']).round() 
mess['Diff75'] = (mess['MinimumBeforeReorder'] - mess['75']).round()
mess['MeanSavings'] = mess['DiffMean'] * mess['Unit']
mess['75Savings'] = mess['Diff75'] * mess['Unit']
mess.to_csv('outputs/paranalysis.csv',index=False)
#print(mess.info())
mess.head()

In [None]:
#let's generate something we can upload to DEAR
par_gen = mess[['Action','ProductSKU','Product','Customer','75','ReorderQuantity','75Savings']].copy()
par_gen = par_gen.rename(columns={'Product':'ProductName','Customer':'Location','75':'MinimumBeforeReorder'})

In [None]:
par_gen = par_gen[par_gen.Location == 'Customer Square 2']
par_gen.MinimumBeforeReorder = par_gen.MinimumBeforeReorder.round().astype(int)
par_gen['Stock Locator'] = par_gen['PickZones'] = ''
par_gen.to_csv('outputs/PVParGenerator.csv',index=False)
#par_gen.info()