In [108]:
# https://preppindata.blogspot.com/2021/06/2021-week-24-c-co-absence-monitoring.html

import pandas as pd
import numpy as np
from datetime import date, timedelta

### Input data

In [109]:
df_reasons = pd.read_excel(r'data\PD 2021 Wk 24 Input.xlsx', sheet_name='Reasons')
df_reasons

Unnamed: 0,Name,Start Date,Days Off,Reason
0,Andy,2021-04-01,4,Illness
1,Carl,2021-04-04,5,Illness
2,Luke,2021-04-05,7,Accident
3,Tom,2021-04-07,2,Illness
4,Craig,2021-04-08,3,Accident
5,Lorna,2021-04-10,5,Accident
6,Pat,2021-05-11,10,Illness
7,Jenny,2021-05-14,3,Illness
8,Tom,2021-05-18,5,Accident


### Build a data set that has each date listed out between 1st April to 31st May 2021

In [110]:
df_date_range = pd.DataFrame()
df_date_range['Date'] = pd.date_range(date(2021,4,1),date(2021,5,31),freq='d')

df_date_range

Unnamed: 0,Date
0,2021-04-01
1,2021-04-02
2,2021-04-03
3,2021-04-04
4,2021-04-05
...,...
56,2021-05-27
57,2021-05-28
58,2021-05-29
59,2021-05-30


### The following method will be different from the guideline in the website, we can do it in a simple way in python

In [111]:
#calculate the end of sick date
df_reasons['End Date'] = df_reasons['Start Date'] + pd.to_timedelta(df_reasons['Days Off']-1, unit='D')
df_reasons

Unnamed: 0,Name,Start Date,Days Off,Reason,End Date
0,Andy,2021-04-01,4,Illness,2021-04-04
1,Carl,2021-04-04,5,Illness,2021-04-08
2,Luke,2021-04-05,7,Accident,2021-04-11
3,Tom,2021-04-07,2,Illness,2021-04-08
4,Craig,2021-04-08,3,Accident,2021-04-10
5,Lorna,2021-04-10,5,Accident,2021-04-14
6,Pat,2021-05-11,10,Illness,2021-05-20
7,Jenny,2021-05-14,3,Illness,2021-05-16
8,Tom,2021-05-18,5,Accident,2021-05-22


In [118]:
# default as 0 and add sick record from the loop below
df_date_range['Sick Count'] = 0

# Iterate through each sick record and update sick counts for each date
for _, row in df_reasons.iterrows():
    mask = (df_date_range['Date'] >= row['Start Date']) & (df_date_range['Date'] <= row['End Date'])
    df_date_range.loc[mask, 'Sick Count'] += 1
df_date_range.head(30)

Unnamed: 0,Date,Sick Count
0,2021-04-01,1
1,2021-04-02,1
2,2021-04-03,1
3,2021-04-04,2
4,2021-04-05,2
5,2021-04-06,2
6,2021-04-07,3
7,2021-04-08,4
8,2021-04-09,2
9,2021-04-10,3


### Output the data

In [113]:
df_date_range.to_csv(r'output/2021-week24-output.csv')