### Prepping Data Challenge:  Charity Fundraising (week 42)

### Requirements
- Input the data
  - Create new rows for any date missing between the first and last date in the data set provided
- Calculate how many days of fundraising there has been by the date in each row (1st Jan would be 0)
- Calculate the amount raised per day of fundraising for each row
- Workout the weekday for each date
- Average the amount raised per day of fundraising for each weekday
- Output the data

In [1]:
import pandas as pd
import numpy as np
from pandas import date_range

In [2]:
#Input the data
df = pd.read_csv(r"\Dataprep\2021\Prep Generate Rows datasets - Charity Fundraiser.csv", parse_dates=['Date'], dayfirst=True)

In [3]:
df.set_index('Date', inplace=True)

In [4]:
df

Unnamed: 0_level_0,Total Raised to date
Date,Unnamed: 1_level_1
2021-01-01,0
2021-01-08,300
2021-01-13,500
2021-01-18,800
2021-01-24,1300
2021-01-30,1400


In [5]:
df2 = pd.date_range(start=df.index.min(), end=df.index.max(), freq='D')
df3 = df.reindex(df2)
df3.reset_index(inplace=True)
df3.rename(columns={'index':'Date'}, inplace=True)
df3['Total Raised to date'] = df3['Total Raised to date'].ffill()
#df3

In [6]:
df3['Days into fund raising'] = (df3['Date'] - df3['Date'].min()).dt.days
#df3

In [7]:
#Calculate the amount raised per day of fundraising for each row
df3['Value raised per day'] = (df3['Total Raised to date'] / df3['Days into fund raising'])

In [8]:
#Workout the weekday for each date
df3['Weekday'] = df3['Date'].dt.day_name()

In [9]:
# average the amount raised per day of fundraising for each weekday
df3['Avg raised per weekday'] = df3.groupby('Weekday')['Value raised per day'].transform('mean').round(9)
#df3.head()

In [10]:
output = df3[['Weekday','Total Raised to date','Days into fund raising', 'Value raised per day','Avg raised per weekday' ]] \
     .rename(columns={'Weekday':'Date'})

In [11]:
output

Unnamed: 0,Date,Total Raised to date,Days into fund raising,Value raised per day,Avg raised per weekday
0,Friday,0.0,0,,40.77381
1,Saturday,0.0,1,0.0,31.094566
2,Sunday,0.0,2,0.0,30.276268
3,Monday,0.0,3,0.0,32.806373
4,Tuesday,0.0,4,0.0,30.929293
5,Wednesday,0.0,5,0.0,33.442982
6,Thursday,0.0,6,0.0,31.652422
7,Friday,300.0,7,42.857143,40.77381
8,Saturday,300.0,8,37.5,31.094566
9,Sunday,300.0,9,33.333333,30.276268


In [12]:
#output the data
output.to_csv('wk42-output.csv', index=False)