In [1]:
from setup import *
companies_df = pd.read_excel(os.path.join(data_dir, 'companies_data.xlsx'))

# Activity Types

In [2]:
activity_types = list(companies_df['activity_type'].unique())
print('activity_types:', activity_types)

activity_types: ['WBS', 'TT_Task', 'TT_FinMile', 'TT_Mile', nan]


**TT_Task**: time dependent activity - likely what you want to focus on for clustering  
**TT_Mile**: start milestone  
**TT_FinMile** finish milestone  
a start milestone would be something like 'begin ground works', and a finish milestone would be 'complete ground works', logistically they're equivalent but one of them denotes a start of something, and the other denotes an end of something.  
**WBS (Work Breakdown Structure)**: An organizational tool, and it is used to split the tasks into categorical sections of the project, so a WBS might be 'Station Foundations', and all tasks/milestones under this WBS will be related to that overall section. WBS's are hierarchical, so you might have:
'Station 1'  -> 'Station Foundations' -> 'East Station Foundations' -> 'East Station Digging'  
These would all be different WBS levels, and at each level there may be tasks and milestones, or there may only be tasks/milestones at the lowest level  

# Milestones

## Start 

In [3]:
start_ms = list(companies_df['activity_name'][companies_df['activity_type']=='TT_Mile'].unique())
print('start milestones:', start_ms)

start milestones: ['AS BUILT DOCUMENTATION [submittal to customer]', 'System in service', 'Demin Water available for Construction', 'Commence Weekly Winding Insulation Resistance Checks', 'First Del to pad of Modules (Heating Surfaces)', 'HRSG Structure Ready for Modules & Drums', 'Temporary Power Available for GT Lube Oil Flush & Restore', 'Ready for Pre-assembling Pipework MBA/MBP/ MBX/ EKG Piping', 'Condenser Ready for Preassembling Piping', 'Condenser Ready for PAB Piping', 'Hand back area South of Condenser to Civil Contractor', 'Equipment Delivery by Siemens - MV Switchgear BBA', 'Equipment Delivery by Siemens - LV/DC Switchgear BFA', 'Equipment Delivery by Siemens - Cabinets for I&C 10UBA Building', 'Equipment Delivery by Siemens - Econopac fire Protection System CYE/ SGJ', 'Equipment Delivery by Siemens - ST MA- (LP-ROTOR, INNERCASING)', 'Equipment Delivery by Siemens - STEAM TURBINE MA- (HP/IP) (incl. FCA)', 'Equipment Delivery by Siemens - STEAM TURBINE MA- (HP/IP) EX WORKS',

## Finish

In [4]:
finish_ms = list(companies_df['activity_name'][companies_df['activity_type']=='TT_FinMile'].unique())
print('finish milestones:', finish_ms)

finish milestones: ['Post TB2 P/L', 'MARK-UPS FROM SITE AVAILABLE', 'FINAL OPERATING AND MAINTENANCE DOCUMENTATION (O&M)', '(BOP6) COMPLETION AND SIGN OFF OF ALL CHECK SHEET TO CONFIRM COMPLETION OF HOT COMMISSIONING', '(6c) COMPLETION AND SIGN OFF OF ALL CHECK SHEET TO CONFIRM COMPLETION OF HOT COMMISSIONING', '(6e) COMPLETION OF AS-BUILT DRAWINGS AND OTHER COMPLETION DOCUMENTS', '(6d) COMPLETION OF O&M MANUALS', 'Handover of TOP 10MBP', 'Handover of TOP 10MBU-01', 'Handover of TOP 10BF-02', 'Handover of TOP 10CR-01', 'Handover of TOP 10CR-02', 'Handover of TOP-10CR-01', 'Handover of TOP-10CR-02', 'Handover of TOP 10SAM30', 'Back Engergization Achieved', 'Handover of TOP 10CKA', 'Handover of TOP 10MKC', 'Handover of TOP 10CJT', 'Handover of TOP 10BBT', 'Handover of TOP 10BA-02', 'Handover of TOP 10BA-01', 'Handover of TOP 10CHA', 'Handover of TOP 10BAT', 'Handover of TOP 10BF-01', 'Handover of TOP-10BF-01', 'LV Board ready for energisation via BMT01', 'BMT01 Energized', 'Handover of T

# WBS (Work Breakdown Structure) 

In [5]:
wbs_names = list(companies_df['activity_name'][companies_df['activity_type']=='WBS'].unique())
print('WBS:', wbs_names)

WBS: ['CCGT D1_Integrated Programme Schedule', 'EWC - Early Civil Works', 'Construction', 'Piling Works for Power Island (Turbine Hall and HRSG)', 'Warehouse partial demolition works and A&A Works', 'Site Preparation, Advance Works and Piling Works', 'Statutory Submissions', 'BD Review and Approval', 'Design / Submission Preparation', 'Warehouse partial demolition and A&A Works', 'Advance works and site formation', 'Excavation', '(OEM) Commissioning', 'POST COMMISSIONING DELIVERABLES', 'POST TB2 P/L', 'DOCUMENTATION', 'SDS - Software Deliverable Schedule', 'Software Deliverables Construction/ Commissioning', 'Operation & Maintenance Manuals', 'MILESTONE', 'BOP Option_Key Dates / Contract Schedule 3A (Programme Requirements & Constraints)', 'Key Dates / Contract Schedule 3A (Programme Requirements & Constraints)', 'COLD COMMISSIONING', 'CLP System', '10CHA', '10EK', '10ZTA', '10EGC', '10EG', '10QJA', '10GHD', '10SGA', '10BAT', 'All System', 'LCAL MEP System', 'Balance of Systems for Fir

# Projects Names

In [15]:
companies = list(companies_df['company'].unique())
print('Companies in dataset:', companies)
for company in companies:
    project_names = list(companies_df['project_name'][companies_df['company']==company].unique())
    project_names = [p for p in project_names if str(p)!='nan']
    print(90*'-')
    print(print('Company: {c}'.format(c=str(company))))
    print('Projects:', ','.join(project_names))

Companies in dataset: ['CCGT_D1', 'CCGT_D2', 'DGDA_DG2', 'DUKE_S2X1CC', 'DUKE_WAYNE_COUNTY', 'SIME_DARBY_TASKS']
------------------------------------------------------------------------------------------
Company: CCGT_D1
None
Projects: CCGTD1_IPS
------------------------------------------------------------------------------------------
Company: CCGT_D2
None
Projects: HK1003-Rev.0E-3
------------------------------------------------------------------------------------------
Company: DGDA_DG2
None
Projects: GMOQ-6,OPRA-1,SAMO-1,ICONH,KSU01,EW01,ARENA,INF01,KKR01,MP001-1,STAGE04,STAGE03,STAGE02,STAGE01
------------------------------------------------------------------------------------------
Company: DUKE_S2X1CC
None
Projects: SCC Restore Outage,SP-PGN SU_R0 2-2,SP-PGN Start-up_R0 2,SP-ENG-MASTER-15,SP-CONST-MASTER-15
------------------------------------------------------------------------------------------
Company: DUKE_WAYNE_COUNTY
None
Projects: WCCCP,WCCCP- PGN-SU,TOFRAG,STG,WCEP 10090