In [44]:
#https://preppindata.blogspot.com/2021/01/2021-week-3.html

import pandas as pd

### Input the data source by pulling together all the tables

In [45]:
# use sheet_name = None to read all tabs in the excel file

df = pd.concat(pd.read_excel(r'data/PD 2021 Wk 3 Input.xlsx', sheet_name = None)).reset_index()
df

Unnamed: 0,level_0,level_1,Date,New - Saddles,New - Mudguards,New - Wheels,New - Bags,Existing - Saddles,Existing - Mudguards,Existing - Wheels,Existing - Bags
0,Manchester,0,2021-01-21,13,42,19,38,17,48,19,13
1,Manchester,1,2021-02-21,1,9,14,6,2,4,19,24
2,Manchester,2,2021-03-21,8,22,6,35,0,48,17,16
3,Manchester,3,2021-04-21,3,9,8,16,18,50,18,25
4,Manchester,4,2021-05-21,2,8,5,34,17,3,12,19
5,Manchester,5,2021-06-21,11,2,6,8,2,8,3,1
6,Manchester,6,2021-07-21,16,5,15,37,19,1,7,28
7,Manchester,7,2021-08-21,10,7,18,27,10,4,8,9
8,Manchester,8,2021-09-21,15,25,1,38,18,9,0,23
9,Manchester,9,2021-10-21,9,11,11,0,18,10,17,7


### Create a Store column from the data & Remove any unnecessary data fields

In [46]:
# rename the df and remove the index column
df = df.rename(columns = {'level_0':'store'})
df = df.drop(['level_1'],axis=1)
df.head(5)

Unnamed: 0,store,Date,New - Saddles,New - Mudguards,New - Wheels,New - Bags,Existing - Saddles,Existing - Mudguards,Existing - Wheels,Existing - Bags
0,Manchester,2021-01-21,13,42,19,38,17,48,19,13
1,Manchester,2021-02-21,1,9,14,6,2,4,19,24
2,Manchester,2021-03-21,8,22,6,35,0,48,17,16
3,Manchester,2021-04-21,3,9,8,16,18,50,18,25
4,Manchester,2021-05-21,2,8,5,34,17,3,12,19


### Pivot 'New' columns and 'Existing' columns 

In [47]:
df_pivot = df.melt(id_vars = ['store','Date'],
                   var_name = 'type', #new column for group
                   value_name = 'values') #new column for values

df_pivot

Unnamed: 0,store,Date,type,values
0,Manchester,2021-01-21,New - Saddles,13
1,Manchester,2021-02-21,New - Saddles,1
2,Manchester,2021-03-21,New - Saddles,8
3,Manchester,2021-04-21,New - Saddles,3
4,Manchester,2021-05-21,New - Saddles,2
...,...,...,...,...
475,Birmingham,2021-08-21,Existing - Bags,11
476,Birmingham,2021-09-21,Existing - Bags,24
477,Birmingham,2021-10-21,Existing - Bags,16
478,Birmingham,2021-11-21,Existing - Bags,12


### Split the former column headers to form:
- Customer Type
- Product

In [48]:
df_pivot[['Customer Type','Product']] = df_pivot['type'].str.split(' - ', expand = True)
df_pivot.head(5)

Unnamed: 0,store,Date,type,values,Customer Type,Product
0,Manchester,2021-01-21,New - Saddles,13,New,Saddles
1,Manchester,2021-02-21,New - Saddles,1,New,Saddles
2,Manchester,2021-03-21,New - Saddles,8,New,Saddles
3,Manchester,2021-04-21,New - Saddles,3,New,Saddles
4,Manchester,2021-05-21,New - Saddles,2,New,Saddles


### Rename the measure created by the Pivot as 'Products Sold'

In [49]:
df_pivot.rename(columns = {'values': 'Products Sold'}, inplace=True)
df_pivot.head(5)

Unnamed: 0,store,Date,type,Products Sold,Customer Type,Product
0,Manchester,2021-01-21,New - Saddles,13,New,Saddles
1,Manchester,2021-02-21,New - Saddles,1,New,Saddles
2,Manchester,2021-03-21,New - Saddles,8,New,Saddles
3,Manchester,2021-04-21,New - Saddles,3,New,Saddles
4,Manchester,2021-05-21,New - Saddles,2,New,Saddles


### Turn Date into Quarter

In [60]:
df_pivot['Quarter'] = df_pivot['Date'].dt.quarter
df_pivot.head(5)

Unnamed: 0,store,Date,type,Products Sold,Customer Type,Product,Quarter
0,Manchester,2021-01-21,New - Saddles,13,New,Saddles,1
1,Manchester,2021-02-21,New - Saddles,1,New,Saddles,1
2,Manchester,2021-03-21,New - Saddles,8,New,Saddles,1
3,Manchester,2021-04-21,New - Saddles,3,New,Saddles,2
4,Manchester,2021-05-21,New - Saddles,2,New,Saddles,2


### Aggregate to form two separate outputs of the number of products sold by:

- 1. Product, Quarter
- 2. Store, Customer Type, Product

In [69]:
# 1. Product, Quarter
output = df_pivot.groupby(['Product','Quarter'])['Products Sold'].agg('sum').reset_index()

#2. Store, Customer Type, Product
output2 = df_pivot.groupby(['store','Customer Type','Product'])['Products Sold'].agg('sum').reset_index()

### Output each data set as a csv file

In [70]:
output.to_csv(r'output/2021-week3-output1.csv')
output2.to_csv(r'output/2021-week3-output2.csv')