### Prepping Data Challenge: C&BSCo - No Sales Today (Week 28)
 
### Requirements
- Input the file 
- Convert any data types required
- Create a new row for each day that doesn't have a sale
- Remove any date record where a sale occurred 
- Create a column for Day of the Week
- For each day of the week, count the numbers of dates where there were no sales
- Rename the count field as Number of Days
- Output the data

In [1]:
import pandas as pd
import numpy as np

In [2]:
#input the file
#Convert any data types required
df = pd.read_csv('wk27-input.csv', parse_dates=['Sale Date'], dayfirst=True)

In [3]:
df.head(10)

Unnamed: 0,Sale Date,Order ID,Sale Value,Product Name,Store Name,Region,Scent Name
0,2022-12-12,937,109.84,Liquid - 25ml,Lewisham,East,Rose
1,2022-10-14,427,207.61,Liquid - 25ml,Lewisham,East,Rose
2,2022-09-09,135,111.96,Liquid - 25ml,Lewisham,East,Rose
3,2022-12-11,791,170.68,Liquid - 25ml,Wimbledon,West,Rose
4,2022-09-08,270,214.12,Liquid - 25ml,Wimbledon,West,Rose
5,2022-01-18,726,29.55,Liquid - 25ml,Dulwich,East,Rose
6,2022-05-29,692,194.32,Liquid - 25ml,Dulwich,East,Rose
7,2022-12-08,672,160.45,Liquid - 25ml,Dulwich,East,Rose
8,2022-01-14,551,125.41,Liquid - 25ml,Dulwich,East,Rose
9,2022-08-02,516,60.75,Liquid - 25ml,Dulwich,East,Rose


In [4]:
#Create a new row for each day that doesn't have a sale
start_date = df['Sale Date'].min()
end_date = df['Sale Date'].max()

df_dates = pd.DataFrame({'Sale Date' : pd.date_range(start=start_date, end=end_date)})

In [5]:
df = df_dates.merge(df, on='Sale Date', how='left').fillna(0)

In [6]:
#Remove any date record where a sale occurred
df = df.loc[df['Sale Value'] == 0]

In [7]:
#Create a column for Day of the Week
df['Day of week'] = pd.to_datetime(df['Sale Date']).dt.day_name()

In [8]:
df.head(10)

Unnamed: 0,Sale Date,Order ID,Sale Value,Product Name,Store Name,Region,Scent Name,Day of week
69,2022-01-08,0.0,0.0,0,0,0,0,Saturday
163,2022-01-15,0.0,0.0,0,0,0,0,Saturday
314,2022-01-30,0.0,0.0,0,0,0,0,Sunday
332,2022-02-02,0.0,0.0,0,0,0,0,Wednesday
375,2022-02-08,0.0,0.0,0,0,0,0,Tuesday
524,2022-02-20,0.0,0.0,0,0,0,0,Sunday
609,2022-02-28,0.0,0.0,0,0,0,0,Monday
745,2022-03-13,0.0,0.0,0,0,0,0,Sunday
1223,2022-04-21,0.0,0.0,0,0,0,0,Thursday
1357,2022-05-01,0.0,0.0,0,0,0,0,Sunday


In [9]:
#For each day of the week, count the numbers of dates where there were no sales
#Rename the count field as Number of Day
df['Number of Days'] = df.groupby('Day of week')['Sale Date'].transform('size')
df.drop_duplicates(subset = ['Day of week'], keep = 'first', inplace = True)

In [10]:
output = df[['Day of week','Number of Days']]

In [11]:
output.head(10)

Unnamed: 0,Day of week,Number of Days
69,Saturday,4
314,Sunday,5
332,Wednesday,1
375,Tuesday,2
609,Monday,3
1223,Thursday,6
3080,Friday,1


In [12]:
#Output data
output.to_csv('wk28-output.csv', index=False)