In [141]:
# https://preppindata.blogspot.com/2021/05/2021-week-18-prep-air-project-overruns.html

import pandas as pd
import numpy as np
import glob
from datetime import datetime, timedelta

### Input the data

In [142]:
df = pd.read_excel(r'data\PD 2021 Wk 18 input.xlsx', sheet_name='Project Timelines')
df.head()

Unnamed: 0,Project,Sub-project,Task,Owner,Scheduled Date,Completed In Days from Scheduled Date
0,New Loyalty Scheme,Marketing,Scope,Tom,2021-04-19,0
1,New Loyalty Scheme,Marketing,Build,Tom,2021-04-21,2
2,New Loyalty Scheme,Marketing,Deliver,Tom,2021-04-30,5
3,New Loyalty Scheme,Operations,Scope,Jenny,2021-04-15,0
4,New Loyalty Scheme,Operations,Build,Jenny,2021-04-23,3


### Workout the 'Completed Date' by adding on how many days it took to complete each task from the Scheduled Date

In [143]:
df['Completed Date'] = df['Scheduled Date'] + pd.to_timedelta(df['Completed In Days from Scheduled Date'],unit='d')
df.head()

Unnamed: 0,Project,Sub-project,Task,Owner,Scheduled Date,Completed In Days from Scheduled Date,Completed Date
0,New Loyalty Scheme,Marketing,Scope,Tom,2021-04-19,0,2021-04-19
1,New Loyalty Scheme,Marketing,Build,Tom,2021-04-21,2,2021-04-23
2,New Loyalty Scheme,Marketing,Deliver,Tom,2021-04-30,5,2021-05-05
3,New Loyalty Scheme,Operations,Scope,Jenny,2021-04-15,0,2021-04-15
4,New Loyalty Scheme,Operations,Build,Jenny,2021-04-23,3,2021-04-26


### Rename 'Completed In Days from Schedule Date' to 'Days Difference to Schedule'

In [144]:
df.rename(columns={'Completed In Days from Scheduled Date':'Days Difference to Schedule'},inplace=True)
df.head()

Unnamed: 0,Project,Sub-project,Task,Owner,Scheduled Date,Days Difference to Schedule,Completed Date
0,New Loyalty Scheme,Marketing,Scope,Tom,2021-04-19,0,2021-04-19
1,New Loyalty Scheme,Marketing,Build,Tom,2021-04-21,2,2021-04-23
2,New Loyalty Scheme,Marketing,Deliver,Tom,2021-04-30,5,2021-05-05
3,New Loyalty Scheme,Operations,Scope,Jenny,2021-04-15,0,2021-04-15
4,New Loyalty Scheme,Operations,Build,Jenny,2021-04-23,3,2021-04-26


### Pivot Task to become column headers with the Completed Date as the values in the column

In [145]:
df_prepivot = df.drop(['Scheduled Date','Days Difference to Schedule'], axis=1)
df_pivot = df_prepivot.pivot(index=['Project','Sub-project', 'Owner'], columns='Task', values='Completed Date').reset_index()
df_pivot.head()

Task,Project,Sub-project,Owner,Build,Deliver,Scope
0,New Loyalty Scheme,Marketing,Tom,2021-04-23,2021-05-05,2021-04-19
1,New Loyalty Scheme,Operations,Jenny,2021-04-26,2021-05-02,2021-04-15
2,New Trolley Inventory,Marketing,Tom,2021-05-07,2021-05-17,2021-05-02
3,New Trolley Inventory,Operations,Jenny,2021-05-07,2021-05-17,2021-04-30
4,Spring Sale,Marketing,Carl,2021-05-05,2021-05-07,2021-04-22


### Calculate the difference between Scope to Build time & the difference between Build to Delivery time

In [146]:
df_pivot['Scope to Build Time'] = df_pivot['Build'] - df_pivot['Scope']
df_pivot['Build to Delivery Time'] = df_pivot['Deliver'] - df_pivot['Build']
df_pivot

Task,Project,Sub-project,Owner,Build,Deliver,Scope,Scope to Build Time,Build to Delivery Time
0,New Loyalty Scheme,Marketing,Tom,2021-04-23,2021-05-05,2021-04-19,4 days,12 days
1,New Loyalty Scheme,Operations,Jenny,2021-04-26,2021-05-02,2021-04-15,11 days,6 days
2,New Trolley Inventory,Marketing,Tom,2021-05-07,2021-05-17,2021-05-02,5 days,10 days
3,New Trolley Inventory,Operations,Jenny,2021-05-07,2021-05-17,2021-04-30,7 days,10 days
4,Spring Sale,Marketing,Carl,2021-05-05,2021-05-07,2021-04-22,13 days,2 days
5,Spring Sale,Operations,Jonathan,2021-04-30,2021-05-06,2021-04-25,5 days,6 days


### Pivot the Build, Deliver and Scope column to re-create the 'Completed Dates' field and Task field
- You will need to rename these

In [147]:
df_completedDate = df_pivot.melt(id_vars=['Project','Sub-project','Owner','Scope to Build Time','Build to Delivery Time'],
                                var_name='Task', 
                                value_name='Completed Date')
df_completedDate

Unnamed: 0,Project,Sub-project,Owner,Scope to Build Time,Build to Delivery Time,Task,Completed Date
0,New Loyalty Scheme,Marketing,Tom,4 days,12 days,Build,2021-04-23
1,New Loyalty Scheme,Operations,Jenny,11 days,6 days,Build,2021-04-26
2,New Trolley Inventory,Marketing,Tom,5 days,10 days,Build,2021-05-07
3,New Trolley Inventory,Operations,Jenny,7 days,10 days,Build,2021-05-07
4,Spring Sale,Marketing,Carl,13 days,2 days,Build,2021-05-05
5,Spring Sale,Operations,Jonathan,5 days,6 days,Build,2021-04-30
6,New Loyalty Scheme,Marketing,Tom,4 days,12 days,Deliver,2021-05-05
7,New Loyalty Scheme,Operations,Jenny,11 days,6 days,Deliver,2021-05-02
8,New Trolley Inventory,Marketing,Tom,5 days,10 days,Deliver,2021-05-17
9,New Trolley Inventory,Operations,Jenny,7 days,10 days,Deliver,2021-05-17


### Join Branch 1 and Branch 2 back together 

In [148]:
df2 = df.copy()
df2.head()

Unnamed: 0,Project,Sub-project,Task,Owner,Scheduled Date,Days Difference to Schedule,Completed Date
0,New Loyalty Scheme,Marketing,Scope,Tom,2021-04-19,0,2021-04-19
1,New Loyalty Scheme,Marketing,Build,Tom,2021-04-21,2,2021-04-23
2,New Loyalty Scheme,Marketing,Deliver,Tom,2021-04-30,5,2021-05-05
3,New Loyalty Scheme,Operations,Scope,Jenny,2021-04-15,0,2021-04-15
4,New Loyalty Scheme,Operations,Build,Jenny,2021-04-23,3,2021-04-26


In [149]:
df_output = df2.merge(df_completedDate,on=['Project','Sub-project','Owner','Task','Completed Date'], how='inner')
df_output.head()

Unnamed: 0,Project,Sub-project,Task,Owner,Scheduled Date,Days Difference to Schedule,Completed Date,Scope to Build Time,Build to Delivery Time
0,New Loyalty Scheme,Marketing,Scope,Tom,2021-04-19,0,2021-04-19,4 days,12 days
1,New Loyalty Scheme,Marketing,Build,Tom,2021-04-21,2,2021-04-23,4 days,12 days
2,New Loyalty Scheme,Marketing,Deliver,Tom,2021-04-30,5,2021-05-05,4 days,12 days
3,New Loyalty Scheme,Operations,Scope,Jenny,2021-04-15,0,2021-04-15,11 days,6 days
4,New Loyalty Scheme,Operations,Build,Jenny,2021-04-23,3,2021-04-26,11 days,6 days


In [150]:
df_output['Completed Date'].dt.weekday

0     0
1     4
2     2
3     3
4     0
5     6
6     3
7     2
8     4
9     6
10    4
11    3
12    6
13    4
14    0
15    4
16    4
17    0
Name: Completed Date, dtype: int32

### Calculate which weekday each task got completed on as we want to know whether these are during the weekend or not for the dashboard

In [151]:
df_output['Completed Weekday'] = df_output['Completed Date'].dt.day_name()
df_output.head()

Unnamed: 0,Project,Sub-project,Task,Owner,Scheduled Date,Days Difference to Schedule,Completed Date,Scope to Build Time,Build to Delivery Time,Completed Weekday
0,New Loyalty Scheme,Marketing,Scope,Tom,2021-04-19,0,2021-04-19,4 days,12 days,Monday
1,New Loyalty Scheme,Marketing,Build,Tom,2021-04-21,2,2021-04-23,4 days,12 days,Friday
2,New Loyalty Scheme,Marketing,Deliver,Tom,2021-04-30,5,2021-05-05,4 days,12 days,Wednesday
3,New Loyalty Scheme,Operations,Scope,Jenny,2021-04-15,0,2021-04-15,11 days,6 days,Thursday
4,New Loyalty Scheme,Operations,Build,Jenny,2021-04-23,3,2021-04-26,11 days,6 days,Monday


### Clean up the data set to remove any fields that are not required.

In [152]:
#reorder the column as well
df_output = df_output[['Completed Weekday','Task','Scope to Build Time','Build to Delivery Time', 'Days Difference to Schedule','Project', 'Sub-project', 'Owner','Scheduled Date','Completed Date']]
df_output


Unnamed: 0,Completed Weekday,Task,Scope to Build Time,Build to Delivery Time,Days Difference to Schedule,Project,Sub-project,Owner,Scheduled Date,Completed Date
0,Monday,Scope,4 days,12 days,0,New Loyalty Scheme,Marketing,Tom,2021-04-19,2021-04-19
1,Friday,Build,4 days,12 days,2,New Loyalty Scheme,Marketing,Tom,2021-04-21,2021-04-23
2,Wednesday,Deliver,4 days,12 days,5,New Loyalty Scheme,Marketing,Tom,2021-04-30,2021-05-05
3,Thursday,Scope,11 days,6 days,0,New Loyalty Scheme,Operations,Jenny,2021-04-15,2021-04-15
4,Monday,Build,11 days,6 days,3,New Loyalty Scheme,Operations,Jenny,2021-04-23,2021-04-26
5,Sunday,Deliver,11 days,6 days,4,New Loyalty Scheme,Operations,Jenny,2021-04-28,2021-05-02
6,Thursday,Scope,13 days,2 days,0,Spring Sale,Marketing,Carl,2021-04-22,2021-04-22
7,Wednesday,Build,13 days,2 days,6,Spring Sale,Marketing,Carl,2021-04-29,2021-05-05
8,Friday,Deliver,13 days,2 days,3,Spring Sale,Marketing,Carl,2021-05-04,2021-05-07
9,Sunday,Scope,5 days,6 days,0,Spring Sale,Operations,Jonathan,2021-04-25,2021-04-25


### Output as a csv file

In [153]:
df_output.to_csv(r'output/2021-week18-output.csv')