In [59]:
# https://preppindata.blogspot.com/2021/05/2021-week-21-getting-trolleyed.html

import pandas as pd
import numpy as np

### Input data and Bring all the sheets together

In [72]:

xls = pd.ExcelFile('data\PD 2021 Wk 21 Input.xlsx')
sheet_names = xls.sheet_names  # Get all sheet names
sheet_names

['Month 1',
 'Month 2',
 'Month 3',
 'Month 4',
 'Month 5',
 'Month 6',
 'Month 7',
 'Month 8',
 'Month 9',
 'Month 10']

In [61]:
df = pd.concat(pd.read_excel('data\PD 2021 Wk 21 Input.xlsx', sheet_name=None))
df = df.reset_index(level=1, drop=True).rename_axis('Month').reset_index()
df

Unnamed: 0,Month,Day of Month,first_name,last_name,email,Product,Price,Destination
0,Month 1,9,Daffie,Clemont,dclemont0@unc.edu,Emulsifier,$10.14,New York
1,Month 1,19,Lucio,Muzzall,lmuzzall1@dell.com,Chambord Royal,$33.89,London
2,Month 1,25,Corbie,Shrigley,cshrigley2@sourceforge.net,Apples - Sliced / Wedge,$1.64,Perth
3,Month 1,9,Sioux,Couth,scouth3@bluehost.com,Vinegar - White Wine,$19.84,Paris
4,Month 1,21,Almira,Rickards,arickards4@godaddy.com,Food Colouring - Pink,$20.15,Edinburgh
...,...,...,...,...,...,...,...,...
9995,Month 10,25,Lock,Begbie,lbegbiern@springer.com,Wine - Redchard Merritt,$49.38,London
9996,Month 10,3,Halsey,Joris,hjorisro@virginia.edu,Wine - Charddonnay Errazuriz,$12.34,Perth
9997,Month 10,10,Selma,Benoy,sbenoyrp@hubpages.com,Mushroom - Crimini,$33.77,Paris
9998,Month 10,13,Philippine,Gerriets,pgerrietsrq@foxnews.com,"C - Plus, Orange",$29.72,Edinburgh


### Use the Day of Month and Table Names (sheet name in other tools) to form a date field for the purchase called 'Date'

In [62]:
df['Month_Num'] = df['Month'].str.extract(r'(\d+)')
df['Date'] = pd.to_datetime(df['Month_Num'] + '/' + df['Day of Month'].astype(str) + '/2021')
df

Unnamed: 0,Month,Day of Month,first_name,last_name,email,Product,Price,Destination,Month_Num,Date
0,Month 1,9,Daffie,Clemont,dclemont0@unc.edu,Emulsifier,$10.14,New York,1,2021-01-09
1,Month 1,19,Lucio,Muzzall,lmuzzall1@dell.com,Chambord Royal,$33.89,London,1,2021-01-19
2,Month 1,25,Corbie,Shrigley,cshrigley2@sourceforge.net,Apples - Sliced / Wedge,$1.64,Perth,1,2021-01-25
3,Month 1,9,Sioux,Couth,scouth3@bluehost.com,Vinegar - White Wine,$19.84,Paris,1,2021-01-09
4,Month 1,21,Almira,Rickards,arickards4@godaddy.com,Food Colouring - Pink,$20.15,Edinburgh,1,2021-01-21
...,...,...,...,...,...,...,...,...,...,...
9995,Month 10,25,Lock,Begbie,lbegbiern@springer.com,Wine - Redchard Merritt,$49.38,London,10,2021-10-25
9996,Month 10,3,Halsey,Joris,hjorisro@virginia.edu,Wine - Charddonnay Errazuriz,$12.34,Perth,10,2021-10-03
9997,Month 10,10,Selma,Benoy,sbenoyrp@hubpages.com,Mushroom - Crimini,$33.77,Paris,10,2021-10-10
9998,Month 10,13,Philippine,Gerriets,pgerrietsrq@foxnews.com,"C - Plus, Orange",$29.72,Edinburgh,10,2021-10-13


### Create 'New Trolley Inventory?' field to show whether the purchase was made on or after 1st June 2021 (the first date with the revised inventory after the project closed)

In [63]:
# Create the 'New Trolley Inventory?' column based on the cutoff date
df['New Trolley Inventory?'] = df['Date'] >= pd.to_datetime('2021-06-01')
df

Unnamed: 0,Month,Day of Month,first_name,last_name,email,Product,Price,Destination,Month_Num,Date,New Trolley Inventory?
0,Month 1,9,Daffie,Clemont,dclemont0@unc.edu,Emulsifier,$10.14,New York,1,2021-01-09,False
1,Month 1,19,Lucio,Muzzall,lmuzzall1@dell.com,Chambord Royal,$33.89,London,1,2021-01-19,False
2,Month 1,25,Corbie,Shrigley,cshrigley2@sourceforge.net,Apples - Sliced / Wedge,$1.64,Perth,1,2021-01-25,False
3,Month 1,9,Sioux,Couth,scouth3@bluehost.com,Vinegar - White Wine,$19.84,Paris,1,2021-01-09,False
4,Month 1,21,Almira,Rickards,arickards4@godaddy.com,Food Colouring - Pink,$20.15,Edinburgh,1,2021-01-21,False
...,...,...,...,...,...,...,...,...,...,...,...
9995,Month 10,25,Lock,Begbie,lbegbiern@springer.com,Wine - Redchard Merritt,$49.38,London,10,2021-10-25,True
9996,Month 10,3,Halsey,Joris,hjorisro@virginia.edu,Wine - Charddonnay Errazuriz,$12.34,Perth,10,2021-10-03,True
9997,Month 10,10,Selma,Benoy,sbenoyrp@hubpages.com,Mushroom - Crimini,$33.77,Paris,10,2021-10-10,True
9998,Month 10,13,Philippine,Gerriets,pgerrietsrq@foxnews.com,"C - Plus, Orange",$29.72,Edinburgh,10,2021-10-13,True


### Remove lots of the detail of the product name:
- Only return any names before the '-' (hyphen)
- If a product doesn't have a hyphen return the full product name

In [64]:
df['Product'] = df['Product'].apply(lambda x: x.split('-')[0].strip() if '-' in x else x)
df

Unnamed: 0,Month,Day of Month,first_name,last_name,email,Product,Price,Destination,Month_Num,Date,New Trolley Inventory?
0,Month 1,9,Daffie,Clemont,dclemont0@unc.edu,Emulsifier,$10.14,New York,1,2021-01-09,False
1,Month 1,19,Lucio,Muzzall,lmuzzall1@dell.com,Chambord Royal,$33.89,London,1,2021-01-19,False
2,Month 1,25,Corbie,Shrigley,cshrigley2@sourceforge.net,Apples,$1.64,Perth,1,2021-01-25,False
3,Month 1,9,Sioux,Couth,scouth3@bluehost.com,Vinegar,$19.84,Paris,1,2021-01-09,False
4,Month 1,21,Almira,Rickards,arickards4@godaddy.com,Food Colouring,$20.15,Edinburgh,1,2021-01-21,False
...,...,...,...,...,...,...,...,...,...,...,...
9995,Month 10,25,Lock,Begbie,lbegbiern@springer.com,Wine,$49.38,London,10,2021-10-25,True
9996,Month 10,3,Halsey,Joris,hjorisro@virginia.edu,Wine,$12.34,Perth,10,2021-10-03,True
9997,Month 10,10,Selma,Benoy,sbenoyrp@hubpages.com,Mushroom,$33.77,Paris,10,2021-10-10,True
9998,Month 10,13,Philippine,Gerriets,pgerrietsrq@foxnews.com,C,$29.72,Edinburgh,10,2021-10-13,True


### Make price a numeric field

In [65]:
df['Price'] = df['Price'].str.replace('$', '').astype(float)
df

Unnamed: 0,Month,Day of Month,first_name,last_name,email,Product,Price,Destination,Month_Num,Date,New Trolley Inventory?
0,Month 1,9,Daffie,Clemont,dclemont0@unc.edu,Emulsifier,10.14,New York,1,2021-01-09,False
1,Month 1,19,Lucio,Muzzall,lmuzzall1@dell.com,Chambord Royal,33.89,London,1,2021-01-19,False
2,Month 1,25,Corbie,Shrigley,cshrigley2@sourceforge.net,Apples,1.64,Perth,1,2021-01-25,False
3,Month 1,9,Sioux,Couth,scouth3@bluehost.com,Vinegar,19.84,Paris,1,2021-01-09,False
4,Month 1,21,Almira,Rickards,arickards4@godaddy.com,Food Colouring,20.15,Edinburgh,1,2021-01-21,False
...,...,...,...,...,...,...,...,...,...,...,...
9995,Month 10,25,Lock,Begbie,lbegbiern@springer.com,Wine,49.38,London,10,2021-10-25,True
9996,Month 10,3,Halsey,Joris,hjorisro@virginia.edu,Wine,12.34,Perth,10,2021-10-03,True
9997,Month 10,10,Selma,Benoy,sbenoyrp@hubpages.com,Mushroom,33.77,Paris,10,2021-10-10,True
9998,Month 10,13,Philippine,Gerriets,pgerrietsrq@foxnews.com,C,29.72,Edinburgh,10,2021-10-13,True


### Work out the average selling price per product

In [66]:
df['Avg Price per Product'] = df.groupby('Product')['Price'].transform('mean')
df

Unnamed: 0,Month,Day of Month,first_name,last_name,email,Product,Price,Destination,Month_Num,Date,New Trolley Inventory?,Avg Price per Product
0,Month 1,9,Daffie,Clemont,dclemont0@unc.edu,Emulsifier,10.14,New York,1,2021-01-09,False,21.600000
1,Month 1,19,Lucio,Muzzall,lmuzzall1@dell.com,Chambord Royal,33.89,London,1,2021-01-19,False,16.034000
2,Month 1,25,Corbie,Shrigley,cshrigley2@sourceforge.net,Apples,1.64,Perth,1,2021-01-25,False,19.782000
3,Month 1,9,Sioux,Couth,scouth3@bluehost.com,Vinegar,19.84,Paris,1,2021-01-09,False,23.054821
4,Month 1,21,Almira,Rickards,arickards4@godaddy.com,Food Colouring,20.15,Edinburgh,1,2021-01-21,False,26.033333
...,...,...,...,...,...,...,...,...,...,...,...,...
9995,Month 10,25,Lock,Begbie,lbegbiern@springer.com,Wine,49.38,London,10,2021-10-25,True,23.845260
9996,Month 10,3,Halsey,Joris,hjorisro@virginia.edu,Wine,12.34,Perth,10,2021-10-03,True,23.845260
9997,Month 10,10,Selma,Benoy,sbenoyrp@hubpages.com,Mushroom,33.77,Paris,10,2021-10-10,True,28.431471
9998,Month 10,13,Philippine,Gerriets,pgerrietsrq@foxnews.com,C,29.72,Edinburgh,10,2021-10-13,True,14.250000


### Workout the Variance (difference) between the selling price and the average selling price

In [67]:
df['Variance'] = df['Price'] - df['Avg Price per Product']
df

Unnamed: 0,Month,Day of Month,first_name,last_name,email,Product,Price,Destination,Month_Num,Date,New Trolley Inventory?,Avg Price per Product,Variance
0,Month 1,9,Daffie,Clemont,dclemont0@unc.edu,Emulsifier,10.14,New York,1,2021-01-09,False,21.600000,-11.460000
1,Month 1,19,Lucio,Muzzall,lmuzzall1@dell.com,Chambord Royal,33.89,London,1,2021-01-19,False,16.034000,17.856000
2,Month 1,25,Corbie,Shrigley,cshrigley2@sourceforge.net,Apples,1.64,Perth,1,2021-01-25,False,19.782000,-18.142000
3,Month 1,9,Sioux,Couth,scouth3@bluehost.com,Vinegar,19.84,Paris,1,2021-01-09,False,23.054821,-3.214821
4,Month 1,21,Almira,Rickards,arickards4@godaddy.com,Food Colouring,20.15,Edinburgh,1,2021-01-21,False,26.033333,-5.883333
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,Month 10,25,Lock,Begbie,lbegbiern@springer.com,Wine,49.38,London,10,2021-10-25,True,23.845260,25.534740
9996,Month 10,3,Halsey,Joris,hjorisro@virginia.edu,Wine,12.34,Perth,10,2021-10-03,True,23.845260,-11.505260
9997,Month 10,10,Selma,Benoy,sbenoyrp@hubpages.com,Mushroom,33.77,Paris,10,2021-10-10,True,28.431471,5.338529
9998,Month 10,13,Philippine,Gerriets,pgerrietsrq@foxnews.com,C,29.72,Edinburgh,10,2021-10-13,True,14.250000,15.470000


### Rank the Variances (1 being the largest positive variance) per destination and whether the product was sold before or after the new trolley inventory project delivery
- Return only ranks 1-5 

In [68]:
df['Variance Rank by Destination'] = df.groupby('Destination')['Variance'].rank(ascending=False).astype(int)
df = df[df['Variance Rank by Destination']<=5]

In [70]:
# reorder column
df = df[['New Trolley Inventory?','Variance Rank by Destination','Variance','Avg Price per Product','Date','Product','first_name', 'last_name', 'email','Price','Destination']]
df.head()

Unnamed: 0,New Trolley Inventory?,Variance Rank by Destination,Variance,Avg Price per Product,Date,Product,first_name,last_name,email,Price,Destination
6140,True,4,35.939886,22.000114,2021-07-31,Pastry,Elwyn,Durdy,edurdy3w@mozilla.org,57.94,New York
6202,True,2,38.3562,19.9738,2021-07-15,Cake,Dione,Morales,dmorales5m@posterous.com,58.33,London
6223,True,5,34.73474,23.84526,2021-07-05,Wine,Haily,Raffles,hraffles67@guardian.co.uk,58.58,New York
6265,True,3,36.098271,22.301729,2021-07-27,Juice,Ingaborg,Neachell,ineachell7d@cnbc.com,58.4,New York
6409,True,2,37.673571,20.596429,2021-07-14,Grapes,Gualterio,Barks,gbarksbd@angelfire.com,58.27,Perth


### Output the data

In [71]:
df.to_csv(r'output/2021-week21-output.csv')