In [310]:
# https://preppindata.blogspot.com/2021/05/2021-week-18-prep-air-project-overruns.html

import pandas as pd
import numpy as np


### Input the data

In [311]:
df_schedule = pd.read_excel(r'data\PD 2021 Wk 19 input.xlsx', sheet_name='Project Schedule Updates')
df_LT_project = pd.read_excel(r'data\PD 2021 Wk 19 input.xlsx', sheet_name='Project Lookup Table')
df_LT_subproject = pd.read_excel(r'data\PD 2021 Wk 19 input.xlsx', sheet_name='Sub-Project Lookup Table')
df_LT_task = pd.read_excel(r'data\PD 2021 Wk 19 input.xlsx', sheet_name='Task Lookup Table')
df_LT_owner = pd.read_excel(r'data\PD 2021 Wk 19 input.xlsx', sheet_name='Owner Lookup Table')
df_schedule

Unnamed: 0,Week,Commentary
0,16,[NLS/Op-Sc] Delivered scope for the project. R...
1,17,[NLS/Op-Bu] Build kickoff but long project. je...
2,18,[NLS/Op-De] Long delivery process has begun at...
3,19,[NTI/Mar-Bu] Project build commences. Will be ...
4,20,[NTI/Mar-De] Delivery next week around 8 days....


### There are lots of different ways you can do this challenge so rather than a step-by-step set of requirements, feel free to create each of these data fields in whatever order you like:
- 'Week' with the word week and week number together 'Week x' 
- 'Project' with the full project name
- 'Sub-Project' with the full sub-project name
- 'Task' with the full type of task
- 'Name' with the owner of the task's full name (Week 18's output can help you check these if needed) 
- 'Days Noted' some fields have comments that say how many days tasks might take. This field should note the number of days mentioned if said in the comment otherwise leave as a null. 
- 'Detail' the description from the system output with the project details in the [ ] 

In [312]:
#'Week' with the word week and week number together 'Week x' 
df = df_schedule.copy()
df['Week'] = 'Week ' + df['Week'].astype(str)
df


Unnamed: 0,Week,Commentary
0,Week 16,[NLS/Op-Sc] Delivered scope for the project. R...
1,Week 17,[NLS/Op-Bu] Build kickoff but long project. je...
2,Week 18,[NLS/Op-De] Long delivery process has begun at...
3,Week 19,[NTI/Mar-Bu] Project build commences. Will be ...
4,Week 20,[NTI/Mar-De] Delivery next week around 8 days....


In [313]:
# split the commentary, identify by []
df = df.assign(Commentary=df['Commentary'].str.split(' \[')).explode('Commentary')
df

Unnamed: 0,Week,Commentary
0,Week 16,[NLS/Op-Sc] Delivered scope for the project. R...
1,Week 17,[NLS/Op-Bu] Build kickoff but long project. jen.
1,Week 17,NLS/Mar-Sc] Scope completed. tom.
1,Week 17,NLS/Mar-Bu] Marketing Build complete. tom.
1,Week 17,SPS/Mar-Sc] Completed but late in the week due...
1,Week 17,SPS/Op-Sc] Scope complete but weekend delivery...
2,Week 18,[NLS/Op-De] Long delivery process has begun at...
2,Week 18,NLS/Mar-De] Similar to the operations team. 8 ...
2,Week 18,NTI/Op-Sc] New project scope delivered. Tough ...
2,Week 18,NTI/Mar-Sc] Need to balance resourcing careful...


In [314]:
# parse the project, subproject, task and command
df[['Project Code','Sub-Project Code','Task Code','Detail']] = df['Commentary'].str.extract('\[*([A-Z]{3})\/(\w*)-(\w{2})\]\s(.*)',expand=True)
# the Sub-Project Code is lower case in lookup table
df['Sub-Project Code'] = df['Sub-Project Code'].str.lower()
df['Detail'].to_list()

['Delivered scope for the project. Resourcing fine. jen.',
 'Build kickoff but long project. jen.',
 'Scope completed. tom.',
 'Marketing Build complete. tom.',
 'Completed but late in the week due (3 days needed) to being off sick. car.',
 'Scope complete but weekend delivery. jon.',
 'Long delivery process has begun at least 10 days. jen.',
 'Similar to the operations team. 8 days effort needed. Long process has begun. tom.',
 'New project scope delivered. Tough to juggle resourcing. I really need someone for 4 days of extra time. jen.',
 'Need to balance resourcing carefully with two projects ongoing. tom.',
 'Complete and awaiting marketing delivery. jon.',
 'Build is longer than expected with delay due to sick leave so require an extra 5 days. car.',
 'Delivery process started early. jon.',
 'Project build commences. Will be completed in 5 days. tom.',
 'Longer build than the easy marketing project and some sick leave. Give me an extra 2 days please. jen.',
 "Deliver delayed to ne

In [315]:
# Capture the name string. Lookback the ". ", get any string not including fullstop, end with . or . with space(s)
df['Abbreviation'] = df['Detail'].str.extract(r'(?<=\.\s)([^.]+)\.\s*$')
df['Abbreviation'] = df['Abbreviation'].str.title()

df

Unnamed: 0,Week,Commentary,Project Code,Sub-Project Code,Task Code,Detail,Abbreviation
0,Week 16,[NLS/Op-Sc] Delivered scope for the project. R...,NLS,op,Sc,Delivered scope for the project. Resourcing fi...,Jen
1,Week 17,[NLS/Op-Bu] Build kickoff but long project. jen.,NLS,op,Bu,Build kickoff but long project. jen.,Jen
1,Week 17,NLS/Mar-Sc] Scope completed. tom.,NLS,mar,Sc,Scope completed. tom.,Tom
1,Week 17,NLS/Mar-Bu] Marketing Build complete. tom.,NLS,mar,Bu,Marketing Build complete. tom.,Tom
1,Week 17,SPS/Mar-Sc] Completed but late in the week due...,SPS,mar,Sc,Completed but late in the week due (3 days nee...,Car
1,Week 17,SPS/Op-Sc] Scope complete but weekend delivery...,SPS,op,Sc,Scope complete but weekend delivery. jon.,Jon
2,Week 18,[NLS/Op-De] Long delivery process has begun at...,NLS,op,De,Long delivery process has begun at least 10 da...,Jen
2,Week 18,NLS/Mar-De] Similar to the operations team. 8 ...,NLS,mar,De,Similar to the operations team. 8 days effort ...,Tom
2,Week 18,NTI/Op-Sc] New project scope delivered. Tough ...,NTI,op,Sc,New project scope delivered. Tough to juggle r...,Jen
2,Week 18,NTI/Mar-Sc] Need to balance resourcing careful...,NTI,mar,Sc,Need to balance resourcing carefully with two ...,Tom


In [316]:
#join all the lookup tables
df = df.merge(df_LT_project, on='Project Code', how='left')
df = df.merge(df_LT_subproject, on='Sub-Project Code', how='left')
df = df.merge(df_LT_task, on='Task Code', how='left')
df = df.merge(df_LT_owner, on='Abbreviation', how='left')

df

Unnamed: 0,Week,Commentary,Project Code,Sub-Project Code,Task Code,Detail,Abbreviation,Project,Sub-Project,Task,Name
0,Week 16,[NLS/Op-Sc] Delivered scope for the project. R...,NLS,op,Sc,Delivered scope for the project. Resourcing fi...,Jen,New Loyalty Scheme,Operations,Scope,Jenny
1,Week 17,[NLS/Op-Bu] Build kickoff but long project. jen.,NLS,op,Bu,Build kickoff but long project. jen.,Jen,New Loyalty Scheme,Operations,Build,Jenny
2,Week 17,NLS/Mar-Sc] Scope completed. tom.,NLS,mar,Sc,Scope completed. tom.,Tom,New Loyalty Scheme,Marketing,Scope,Tom
3,Week 17,NLS/Mar-Bu] Marketing Build complete. tom.,NLS,mar,Bu,Marketing Build complete. tom.,Tom,New Loyalty Scheme,Marketing,Build,Tom
4,Week 17,SPS/Mar-Sc] Completed but late in the week due...,SPS,mar,Sc,Completed but late in the week due (3 days nee...,Car,Spring Sale,Marketing,Scope,Carl
5,Week 17,SPS/Op-Sc] Scope complete but weekend delivery...,SPS,op,Sc,Scope complete but weekend delivery. jon.,Jon,Spring Sale,Operations,Scope,Jonathan
6,Week 18,[NLS/Op-De] Long delivery process has begun at...,NLS,op,De,Long delivery process has begun at least 10 da...,Jen,New Loyalty Scheme,Operations,Deliver,Jenny
7,Week 18,NLS/Mar-De] Similar to the operations team. 8 ...,NLS,mar,De,Similar to the operations team. 8 days effort ...,Tom,New Loyalty Scheme,Marketing,Deliver,Tom
8,Week 18,NTI/Op-Sc] New project scope delivered. Tough ...,NTI,op,Sc,New project scope delivered. Tough to juggle r...,Jen,New Trolley Inventory,Operations,Scope,Jenny
9,Week 18,NTI/Mar-Sc] Need to balance resourcing careful...,NTI,mar,Sc,Need to balance resourcing carefully with two ...,Tom,New Trolley Inventory,Marketing,Scope,Tom


In [317]:
# capture any days noted
df['Days Noted'] = df['Detail'].str.extract(r'(\d+) day')

In [318]:
df_output = df[['Week','Project','Sub-Project', 'Task', 'Name','Days Noted','Detail']]
df_output

Unnamed: 0,Week,Project,Sub-Project,Task,Name,Days Noted,Detail
0,Week 16,New Loyalty Scheme,Operations,Scope,Jenny,,Delivered scope for the project. Resourcing fi...
1,Week 17,New Loyalty Scheme,Operations,Build,Jenny,,Build kickoff but long project. jen.
2,Week 17,New Loyalty Scheme,Marketing,Scope,Tom,,Scope completed. tom.
3,Week 17,New Loyalty Scheme,Marketing,Build,Tom,,Marketing Build complete. tom.
4,Week 17,Spring Sale,Marketing,Scope,Carl,3.0,Completed but late in the week due (3 days nee...
5,Week 17,Spring Sale,Operations,Scope,Jonathan,,Scope complete but weekend delivery. jon.
6,Week 18,New Loyalty Scheme,Operations,Deliver,Jenny,10.0,Long delivery process has begun at least 10 da...
7,Week 18,New Loyalty Scheme,Marketing,Deliver,Tom,8.0,Similar to the operations team. 8 days effort ...
8,Week 18,New Trolley Inventory,Operations,Scope,Jenny,4.0,New project scope delivered. Tough to juggle r...
9,Week 18,New Trolley Inventory,Marketing,Scope,Tom,,Need to balance resourcing carefully with two ...


### Output the file

In [319]:
df_output.to_csv(r'output/2021-week19-output.csv')