In [433]:
# https://preppindata.blogspot.com/2021/04/week-17-timesheet-checks.html

import pandas as pd
import numpy as np


### Input the data

In [434]:
df = pd.read_excel(r'data\PD 2021 Wk 17 input.xlsx', sheet_name='Sheet1')
df.head()

Unnamed: 0,"Name, Age, Area of Work",Project,2021-02-01 00:00:00,2021-02-02 00:00:00,2021-02-03 00:00:00,2021-02-04 00:00:00,2021-02-05 00:00:00,2021-02-08 00:00:00,2021-02-09 00:00:00,2021-02-10 00:00:00,2021-02-11 00:00:00,2021-02-12 00:00:00
0,"Dan, 28: Client",Client Meetings,,2.0,,1.0,,1.5,0.5,,,Annual Leave
1,"Dan, 28: Client",Client Issues,1.0,1.5,4.5,3.5,1.0,2.0,1.0,2.0,3.0,Annual Leave
2,"Dan, 28: Client",Monthly Reports,,,,,2.0,1.0,1.0,2.0,1.0,Annual Leave
3,"Dan, 28: Client",Client Emails,2.0,0.5,0.5,0.5,1.0,1.0,1.0,,,Annual Leave
4,"Dan, 28: Client",Client Communications,1.0,1.0,,,,0.5,,,,Annual Leave


### Remove the ‘Totals’ Rows

In [435]:
df = df[~df['Project'].str.contains('Total')]

### Pivot Dates to rows and rename fields 'Date' and 'Hours'

In [436]:
df = df.melt(id_vars=['Name, Age, Area of Work','Project'],
             var_name='Date',
             value_name='Hours')

df

Unnamed: 0,"Name, Age, Area of Work",Project,Date,Hours
0,"Dan, 28: Client",Client Meetings,2021-02-01,
1,"Dan, 28: Client",Client Issues,2021-02-01,1.0
2,"Dan, 28: Client",Monthly Reports,2021-02-01,
3,"Dan, 28: Client",Client Emails,2021-02-01,2.0
4,"Dan, 28: Client",Client Communications,2021-02-01,1.0
...,...,...,...,...
505,"Sam, 45: Chats",Team Meetings,2021-02-12,
506,"Sam, 45: Chats",Minutes,2021-02-12,
507,"Sam, 45: Chats",Coffee Catch Ups,2021-02-12,1
508,"Sam, 45: Chats",Personal development,2021-02-12,


### Split the ‘Name, Age, Area of Work’ field into 3 Fields and Rename

In [437]:
df[['Name','Age','Area of Work']] = df['Name, Age, Area of Work'].str.extract('(.*),(.*): (.*)',expand=True)
df


Unnamed: 0,"Name, Age, Area of Work",Project,Date,Hours,Name,Age,Area of Work
0,"Dan, 28: Client",Client Meetings,2021-02-01,,Dan,28,Client
1,"Dan, 28: Client",Client Issues,2021-02-01,1.0,Dan,28,Client
2,"Dan, 28: Client",Monthly Reports,2021-02-01,,Dan,28,Client
3,"Dan, 28: Client",Client Emails,2021-02-01,2.0,Dan,28,Client
4,"Dan, 28: Client",Client Communications,2021-02-01,1.0,Dan,28,Client
...,...,...,...,...,...,...,...
505,"Sam, 45: Chats",Team Meetings,2021-02-12,,Sam,45,Chats
506,"Sam, 45: Chats",Minutes,2021-02-12,,Sam,45,Chats
507,"Sam, 45: Chats",Coffee Catch Ups,2021-02-12,1,Sam,45,Chats
508,"Sam, 45: Chats",Personal development,2021-02-12,,Sam,45,Chats


### Remove unnecessary fields

In [438]:
df.drop('Name, Age, Area of Work',axis=1,inplace=True)
df

Unnamed: 0,Project,Date,Hours,Name,Age,Area of Work
0,Client Meetings,2021-02-01,,Dan,28,Client
1,Client Issues,2021-02-01,1.0,Dan,28,Client
2,Monthly Reports,2021-02-01,,Dan,28,Client
3,Client Emails,2021-02-01,2.0,Dan,28,Client
4,Client Communications,2021-02-01,1.0,Dan,28,Client
...,...,...,...,...,...,...
505,Team Meetings,2021-02-12,,Sam,45,Chats
506,Minutes,2021-02-12,,Sam,45,Chats
507,Coffee Catch Ups,2021-02-12,1,Sam,45,Chats
508,Personal development,2021-02-12,,Sam,45,Chats


### Remove the row where Dan was on Annual Leave and check the data type of the Hours Field.

In [439]:
df.loc[:,['Hours']] = df['Hours'].astype(str)
df = df.loc[~((df['Name'] == 'Dan') & (df['Hours'] == 'Annual Leave'))]
df.loc[:,['Hours']] = df['Hours'].astype(float)
df = df.loc[df['Hours'].notna()]
df

Unnamed: 0,Project,Date,Hours,Name,Age,Area of Work
1,Client Issues,2021-02-01,1.0,Dan,28,Client
3,Client Emails,2021-02-01,2.0,Dan,28,Client
4,Client Communications,2021-02-01,1.0,Dan,28,Client
8,Grad Scheme Organisation,2021-02-01,1.0,Dan,28,Special Projects
12,Team Meetings,2021-02-01,2.0,Dan,28,Chats
...,...,...,...,...,...,...
494,Client Issues,2021-02-12,2.0,Sam,45,Client
496,Client Emails,2021-02-12,1.5,Sam,45,Client
497,Client Communications,2021-02-12,1.0,Sam,45,Client
503,Social Committee,2021-02-12,1.0,Sam,45,Special Projects


### Total up the number of hours spent on each area of work for each date by each employee.

In [440]:
df_total_hours = df.groupby(['Name','Date','Area of Work'])['Hours'].sum().reset_index()
df_total_hours

Unnamed: 0,Name,Date,Area of Work,Hours
0,Dan,2021-02-01,Chats,3.0
1,Dan,2021-02-01,Client,4.0
2,Dan,2021-02-01,Special Projects,1.0
3,Dan,2021-02-02,Chats,1.5
4,Dan,2021-02-02,Client,5.0
...,...,...,...,...
75,Sam,2021-02-11,Chats,3.0
76,Sam,2021-02-11,Client,4.5
77,Sam,2021-02-12,Chats,1.0
78,Sam,2021-02-12,Client,6.5


### First we are going to work out the avg number of hours per day worked by each employee
- Calculate the total number of hours worked and days worked per person
- Calculate the avg hours and remove unnecessary fields.

In [441]:
# Calculate the total number of hours worked and days worked per person
df_per_person = df.groupby('Name').agg({'Hours':'sum', 'Date':'nunique'}).reset_index()
df_per_person
# Calculate the avg hours and remove unnecessary fields.
df_per_person['avg hours'] = df_per_person['Hours'] / df_per_person['Date'] 
df_per_person = df_per_person[['Name','avg hours']]
df_per_person

Unnamed: 0,Name,avg hours
0,Dan,8.027778
1,George,8.4
2,Sam,7.7


### Now we are going to work out what % of their day (not including Chats) was spend on Client work.
- Filter out Work related to Chats.
- Calculate total number of hours spent working on each area for each employee
- Calculate total number of hours spent working on both areas together for each employee
- Join these totals together
- Calculate the % of total and remove unnecessary fields
- Filter the data to just show Client work
- Join to the table with Avg hours to create your final output

In [442]:
# Filter out Work related to Chats.
df_No_chats = df_total_hours[(df_total_hours['Area of Work']!='Chats')]
df_No_chats

# Calculate total number of hours spent working on each area for each employee
df_No_chats_per_area_person = df_No_chats.groupby(['Name','Area of Work'])['Hours'].sum().reset_index()
df_No_chats_per_area_person


Unnamed: 0,Name,Area of Work,Hours
0,Dan,Client,40.5
1,Dan,Special Projects,13.5
2,George,Client,56.5
3,George,Special Projects,13.0
4,Sam,Client,53.0
5,Sam,Special Projects,8.0


In [443]:
# Calculate total number of hours spent working on both areas together for each employee
df_both_area_person = df_No_chats_per_area_person.groupby(['Name'])['Hours'].sum().reset_index()
df_both_area_person


Unnamed: 0,Name,Hours
0,Dan,54.0
1,George,69.5
2,Sam,61.0


In [444]:
# Join these totals together
df_clients_hours = df_No_chats_per_area_person.merge(df_both_area_person, on='Name')
df_clients_hours

Unnamed: 0,Name,Area of Work,Hours_x,Hours_y
0,Dan,Client,40.5,54.0
1,Dan,Special Projects,13.5,54.0
2,George,Client,56.5,69.5
3,George,Special Projects,13.0,69.5
4,Sam,Client,53.0,61.0
5,Sam,Special Projects,8.0,61.0


In [445]:
# Calculate the % of total and remove unnecessary fields
df_clients_hours['% of Total'] = df_clients_hours['Hours_x'] / df_clients_hours['Hours_y']*100
df_clients_hours['% of Total'] = df_clients_hours['% of Total'].astype(int).astype(str)+'%'
df_clients_hours.drop(['Hours_x','Hours_y'], axis=1,inplace=True)
df_clients_hours
# Filter the data to just show Client work
df_clients_hours = df_clients_hours[df_clients_hours['Area of Work']=='Client'] 
df_clients_hours

Unnamed: 0,Name,Area of Work,% of Total
0,Dan,Client,75%
2,George,Client,81%
4,Sam,Client,86%


In [446]:
# Join to the table with Avg hours to create your final output
df_output = df_clients_hours.merge(df_per_person, on='Name')
df_output

Unnamed: 0,Name,Area of Work,% of Total,avg hours
0,Dan,Client,75%,8.027778
1,George,Client,81%,8.4
2,Sam,Client,86%,7.7


### Output the data

In [447]:
df_output.to_csv(r'output/2021-week17-output.csv')