### Prepping Data Challenge: Departmental December - Sales (week 50)

### Requirements
- Fill in the Salesperson names for each row (the name appears at the bottom of each monthly grouping)
- Bring out the YTD information from the October tracker and use it to create YTD totals for November too
- Reshape the data so all the bike types are in a single column
- Output the data

In [1]:
import pandas as pd
import numpy as np

In [2]:
#Input the data
with pd.ExcelFile(r"\Dataprep\2021\Sales Department Input.xlsx") as xl:
    df = pd.concat([pd.read_excel(xl, s).assign(sheet=s) for s in xl.sheet_names]).rename(columns={'Unnamed: 7' : 'YTD Total'})

In [3]:
df['Month'] = np.where(df['Salesperson'].notna(), df['Date'].shift(1) - pd.offsets.MonthBegin(1, normalize=True),df['Date']).astype('datetime64[M]')
df['Year'] = df['Month'].dt.year

In [4]:
df.head(10)

Unnamed: 0,RowID,Date,Salesperson,Road,Gravel,Mountain,Total,YTD Total,sheet,Month,Year
0,1,2021-10-01,,60.0,21.0,25.0,106,,October,2021-10-01,2021
1,2,2021-10-02,,48.0,84.0,63.0,195,,October,2021-10-01,2021
2,3,2021-10-03,,87.0,26.0,40.0,153,,October,2021-10-01,2021
3,4,2021-10-04,,4.0,30.0,55.0,89,,October,2021-10-01,2021
4,5,2021-10-05,,57.0,39.0,62.0,158,,October,2021-10-01,2021
5,6,2021-10-06,,31.0,45.0,27.0,103,,October,2021-10-01,2021
6,7,2021-10-07,,63.0,2.0,4.0,69,,October,2021-10-01,2021
7,8,2021-10-08,,49.0,16.0,51.0,116,,October,2021-10-01,2021
8,9,2021-10-09,,84.0,52.0,64.0,200,,October,2021-10-01,2021
9,10,2021-10-10,,85.0,2.0,51.0,138,,October,2021-10-01,2021


In [5]:
#Fill in the Salesperson names for each row (the name appears at the bottom of each monthly grouping)
df['Salesperson'] = df['Salesperson'].bfill()

In [6]:
#Bring out the YTD information from the October tracker and use it to create YTD totals for November too
df2 = df[df['Date'].notna()].drop(columns=['RowID', 'Total', 'YTD Total', 'sheet']).melt(id_vars=['Salesperson', 'Date', 'Month', 'Year'], var_name='Bike Type', value_name='Sales')
df3 = df2.groupby(['Month', 'Year', 'Salesperson'])['Sales'].sum().reset_index().merge(df[df['YTD Total'].notna()][['Month', 'Salesperson', 'YTD Total']],
                how='outer', on=['Month', 'Salesperson']).sort_values(by=['Salesperson', 'Month'])

In [7]:
# get the cumulative sum of the YTD total + future months
df3['YTD Total2'] = np.where(df3['YTD Total'].isnull(), df3['Sales'], df3['YTD Total'])
df3['YTD Total'] = df3.groupby(['Salesperson', 'Year'])['YTD Total2'].cumsum()
df3.drop(columns=['Sales', 'YTD Total2'], inplace=True)

In [8]:
df2 = df2.merge(df3, how='left', on=['Month', 'Salesperson'])

In [9]:
output = df2[['Salesperson', 'Date', 'Bike Type', 'Sales', 'YTD Total']].drop_duplicates()

In [10]:
output.head(30)

Unnamed: 0,Salesperson,Date,Bike Type,Sales,YTD Total
0,Carl,2021-10-01,Road,60.0,32768.0
1,Carl,2021-10-02,Road,48.0,32768.0
2,Carl,2021-10-03,Road,87.0,32768.0
3,Carl,2021-10-04,Road,4.0,32768.0
4,Carl,2021-10-05,Road,57.0,32768.0
5,Carl,2021-10-06,Road,31.0,32768.0
6,Carl,2021-10-07,Road,63.0,32768.0
7,Carl,2021-10-08,Road,49.0,32768.0
8,Carl,2021-10-09,Road,84.0,32768.0
9,Carl,2021-10-10,Road,85.0,32768.0


In [11]:
#output the data
output.to_csv('wk50-output.csv', index=False)