In [1]:
# Import numpy
import numpy as np
# Import Pandas
import pandas as pd
# import the requests library to send http requests to the NYT API
import requests
# import json library for parsing JSON and converting JSON to python dictionaries
import json as json

#set some pandas options controling output format
pd.set_option('display.notebook_repr_html',True) # output as flat text and not HTML
pd.set_option('display.max_rows', None) # this is the maximum number of rows we will display
pd.set_option('display.max_columns', None) # this is the maximum number of rows we will display

In [2]:
strStandardActivities = 'input/Import_Activities-Standard_Floors.csv'

strSourceActivities = 'input/Import_Activities-115_Nas_Specialty_Floors.csv'
strSourceLogic = 'input/Import_Logic-115_Nas_Specialty_Floors.csv'
strFloors = "input/Floor_Map.tsv"

strActivityFileOutput = 'output/Export_Activities_From_Pandas.csv'
strLogicFileOutput = 'output/Export_Logic_From_Pandas.csv'

# the string to cache the WBS code for activities we are adding
strWBSCode = 'BH.BL-4.115 Nassau1.Floors.4'

In [3]:
# this function takes a template of activities and duplicates them the specified number of times.
def setActivitiesFromTemplate(intStart, intFinish):
    dfNew = pd.read_csv(strStandardActivities,dtype={'ID': str})
    

    dfNewTasks = pd.DataFrame(columns=['id', 'status', 'wbs', 'act_name', 'duration', 'floor_id'])

    
    # for the number of repeats the user wants to make....
    for i in range(intStart, intFinish):
        # cache the new dataframe with the new string format
        dfCache = pd.DataFrame({
            'id' : 'NA.' + str(i).zfill(2) + dfNew['ID'].str.zfill(3), \
            'status' : 'Not Started', \
            'wbs' : strWBSCode, \
            'act_name' : dfNew['Name'] + ' @ Floor ' + str(i), \
            'duration' : dfNew['Duration'], \
            'floor_id' : str(i).zfill(2)})
        
        # combine the two new dataframes into one.
        dfNewTasks = pd.DataFrame(pd.concat([dfNewTasks, dfCache]))
                
    # reset the index of the new dataframe
    dfNewTasks.reset_index(inplace=True, drop=True) 
    
    # Reorder the columns to make import easier.
    dfNewTasks = dfNewTasks[['id', 'status', 'wbs', 'act_name', 'duration', 'floor_id']]
        
    return dfNewTasks

In [4]:
# import our activity source file
dfSource = pd.read_csv(strSourceActivities,dtype={'ID': str, 'FloorID' : str})
# format the ID string to be 3 characters long with leading zeroes
dfSource['ID'] = dfSource['ID'].str.zfill(3)
# format the FloorID string to be 2 characters long with leading zeroes
dfSource['FloorID'] = dfSource['FloorID'].str.zfill(2)
# preview the dataframe
dfSource.head(1)

Unnamed: 0,ID,Name,Duration,AS,AF,FloorID
0,10,Layout & Top Track @ 11th Floor Amenities,6,2-May-16,6-May-16,11M


In [5]:
# create a new dataframe that contains our activity data for export formatted correctly
# format the activity with the apropriate format. Activity IDs are: 
#        "NA." + dfSource['Floor'] + dfSource['ID']
#        ie:   NA. | 10 | 990    =>    NA.10990
dfActivities = pd.DataFrame({
    'id' : 'NA.' + dfSource['FloorID'] + dfSource['ID'], \
    'status' : 'Not Started', 'wbs' : strWBSCode, \
    'act_name' : dfSource['Name'], 'duration' : dfSource['Duration'], \
    'start' : dfSource['AS'], 'finish' : dfSource['AF'], \
    'floor_id' : dfSource['FloorID']})
# preview the dataframe
dfActivities.head(5)

Unnamed: 0,act_name,duration,finish,floor_id,id,start,status,wbs
0,Layout & Top Track @ 11th Floor Amenities,6,6-May-16,11M,NA.11M010,2-May-16,Not Started,BH.BL-4.115 Nassau1.Floors.4
1,Frame & Core Board Risers/Stairs/Elevators @ 1...,10,20-May-16,11M,NA.11M017,11-May-16,Not Started,BH.BL-4.115 Nassau1.Floors.4
2,Rough Duct Work @ 11th Floor Amenities,10,10-May-16,11M,NA.11M020,6-May-16,Not Started,BH.BL-4.115 Nassau1.Floors.4
3,Rough Mechanical Piping @ 11th Floor Amenities,10,10-May-16,11M,NA.11M030,6-May-16,Not Started,BH.BL-4.115 Nassau1.Floors.4
4,Rough Plumbing @ 11th Floor Amenities,10,10-May-16,11M,NA.11M040,6-May-16,Not Started,BH.BL-4.115 Nassau1.Floors.4


In [6]:
# Reorder the columns to make import easier.
dfActivities = dfActivities[['id', 'status', 'wbs', 'act_name', 'duration', 'start', 'finish', 'floor_id']]
# sort the dataframe by 'id'
dfActivities.sort_values('id', inplace=True)
# export the dataframe, without the index
dfActivities.to_csv(strActivityFileOutput, index=False)
# Review the dataframe we created.
dfActivities

Unnamed: 0,id,status,wbs,act_name,duration,start,finish,floor_id
59,NA.03010,Not Started,BH.BL-4.115 Nassau1.Floors.4,Layout & Top Track @ 3rd Floor,6,14-Jul-14,20-Aug-14,03
60,NA.03017,Not Started,BH.BL-4.115 Nassau1.Floors.4,Frame & Core Board Risers/Stairs/Elevators @ 3...,10,21-Aug-14,15-Sep-14,03
61,NA.03020,Not Started,BH.BL-4.115 Nassau1.Floors.4,Rough Duct Work @ 3rd Floor,10,22-Aug-14,26-Aug-14,03
62,NA.03030,Not Started,BH.BL-4.115 Nassau1.Floors.4,Rough Mechanical Piping @ 3rd Floor,10,22-Aug-14,26-Aug-14,03
63,NA.03040,Not Started,BH.BL-4.115 Nassau1.Floors.4,Rough Plumbing @ 3rd Floor,10,22-Aug-14,26-Aug-14,03
64,NA.03050,Not Started,BH.BL-4.115 Nassau1.Floors.4,Rough Sprinkler @ 3rd Floor,10,22-Aug-14,26-Aug-14,03
65,NA.03060,Not Started,BH.BL-4.115 Nassau1.Floors.4,Rough Electrical @ 3rd Floor,10,22-Aug-14,26-Aug-14,03
66,NA.03070,Not Started,BH.BL-4.115 Nassau1.Floors.4,Frame Walls & Ceilings @ 3rd Floor,10,,,03
67,NA.03080,Not Started,BH.BL-4.115 Nassau1.Floors.4,Install Plumbing Frames & Crotons @ 3rd Floor,5,,,03
68,NA.03090,Not Started,BH.BL-4.115 Nassau1.Floors.4,Pull Wire @ 3rd Floor,15,,,03


In [7]:
def setRelationshipsList (strFileName, lstPred, lstSucc, strDropTag):
    # Read in the scheudle logic
    dfBaseLogic = pd.read_csv(strFileName ,dtype={'PredID': str, 'SucID': str})

    # We need to make sure that the PredID and SucID strings are the correct length
    # and that they should have leading zeros '0'.
    dfBaseLogic['SucID'] = dfBaseLogic['SucID'].str.zfill(3)
    dfBaseLogic['PredID'] = dfBaseLogic['PredID'].str.zfill(3)

    # Define my dataframe
    dfLogic = pd.DataFrame(columns=['pred_id', 'succ_id', 'pred_type', 'pred_status', \
                                    'succ_status', 'lag', 'remove'])

    for i in range(len(lstPred)):
        dfTemp = pd.DataFrame(dfBaseLogic['Sequence'].replace({0 : lstPred[int(i)], 1 : lstSucc[int(i)]}))
        dfNew = pd.DataFrame({
                'pred_id' : 'NA.' + lstPred[int(i)] + dfBaseLogic['PredID'], \
                'succ_id' : 'NA.' + dfTemp['Sequence'] + dfBaseLogic['SucID'], \
                'pred_type' : dfBaseLogic['Type'], \
                'pred_status' : 'Not Started', \
                'succ_status' : 'Not Started', \
                'lag' : dfBaseLogic['Lag'], \
                'remove' : dfTemp['Sequence']})

        dfLogic = pd.DataFrame(pd.concat([dfLogic,dfNew]))   
    
    # reset the index of the dataframe
    dfLogic.reset_index(inplace=True, drop=True)
    # sort the dataframe by the 'pred_id' field
    dfLogic.sort_values('pred_id')

    # reset the index of the dataframe
    dfLogic.reset_index(inplace=True, drop=True)
    # drop relationships that will create loops when the schedule is calculated
    dfLogic.drop(dfLogic[dfLogic.succ_id == dfLogic.pred_id].index, inplace=True)
    # reset the index of the dataframe
    dfLogic.reset_index(inplace=True, drop=True)
    # drop relationships that are tagged to be removed
    dfLogic.drop(dfLogic[dfLogic.remove == 'DROP'].index, inplace=True)
    
    # reorder the dataframe and drop int floor because we only needed it for filtering the final product
    dfLogic = dfLogic[['pred_id', 'succ_id', 'pred_type', 'pred_status', 'succ_status', 'lag']]
    
    return dfLogic

In [8]:
# this function takes a template of relationships and duplicates them the specified number of times.
def setRelationshipsRange(strFileName, intStart, intFinish, intBreakStart = 0, intBreakEnd = 0):
    # Read in the scheudle logic
    dfBaseLogic = pd.read_csv(strFileName ,dtype={'PredID': str, 'SucID': str})

    # We need to make sure that the PredID and SucID strings are the correct length
    # and that they should have leading zeros '0'.
    dfBaseLogic['SucID'] = dfBaseLogic['SucID'].str.zfill(3)
    dfBaseLogic['PredID'] = dfBaseLogic['PredID'].str.zfill(3)

    # Define my dataframe
    dfLogic = pd.DataFrame(columns=['pred_id', 'succ_id', 'pred_type', 'pred_status', \
        'succ_status', 'lag', 'PredFloor', 'SuccFloor'])

    # For each floor we want to create...
    for i in range(intStart, intFinish + 1):
        # cache the new dataframe with the new string format
        dfNew = pd.DataFrame({
            'pred_id' : 'NA.' + str(i).zfill(2) + dfBaseLogic['PredID'], \
            'succ_id' : 'NA.' + (i + dfBaseLogic['Sequence']).astype(str).str.zfill(2) + dfBaseLogic['SucID'], \
            'pred_type' : dfBaseLogic['Type'], \
            'pred_status' : 'Not Started', \
            'succ_status' : 'Not Started', \
            'lag' : dfBaseLogic['Lag'], \
            'PredFloor' : i, \
            'SuccFloor' : i + dfBaseLogic['Sequence']})
        
        dfLogic = pd.DataFrame(pd.concat([dfLogic,dfNew]))    

    # sort the dataframe by the 'pred_id' field
    dfLogic.sort_values('pred_id')

    # reset the index of the dataframe
    dfLogic.reset_index(inplace=True, drop=True)
    
    # drop rows with links to activities that are linked to intFinish (meaning one floor past where we were going to go)
    # it is VERY IMPORTANT that this is done AFTER you reset the index.
    dfLogic.drop(dfLogic[dfLogic.SuccFloor > intFinish].index, inplace=True)
    
    # reset the index of the dataframe
    dfLogic.reset_index(inplace=True, drop=True)
    
    # if the user specified a break start
    if intBreakStart > 0 :
        # drop impropper predecessor relationships to activities within the break area
        dfLogic.drop(dfLogic[(dfLogic.PredFloor >= intBreakStart) & (dfLogic.PredFloor < intBreakEnd)].index, inplace=True)
        # reset the index of the dataframe
        dfLogic.reset_index(inplace=True, drop=True)

        # drop impropper successor relationships to activities within the break area
        dfLogic.drop(dfLogic[(dfLogic.SuccFloor >= intBreakStart) & (dfLogic.SuccFloor < intBreakEnd)].index, inplace=True)
        # reset the index of the dataframe
        dfLogic.reset_index(inplace=True, drop=True)
    
    
    # drop loop creating relationships relationships
    dfLogic.drop(dfLogic[dfLogic.succ_id == dfLogic.pred_id].index, inplace=True)
    
    # reorder the dataframe and drop PredFloor and SuccFloor because we only needed 
    # it for filtering the final product
    dfLogic = dfLogic[['pred_id', 'succ_id', 'pred_type', 'pred_status', 'succ_status', 'lag']]
    
    return dfLogic

In [9]:
# This function removes relationships that point to activities that do not exist
def setCleanRelationships(dfLogicList, dfActivitiesList):
    # remove predecessor activities (pred_id) that do not exist
    dfLogicList = dfLogicList[~dfLogicList.pred_id.isin(dfActivitiesList.id)]
    
    # remove successor activities (succ_id) that do not exist
    dfLogicList = dfLogicList[~dfLogicList.succ_id.isin(dfActivitiesList.id)]
    
    # retrun the cleaned dataframe
    return dfLogicList

In [10]:
# I need to create typical floors from the 13th floor to the 44th.
# I will do this by calling my floor creation function defined above. 
dfCreateLogic = setRelationshipsRange(strSourceLogic, 13, 44, 28, 30)

dfCreateLogic = setCleanRelationships(dfCreateLogic, dfActivities)

# export the dataframe, without the index
dfCreateLogic.to_csv(strLogicFileOutput, index=False)

# review the dataframe we just exported
dfCreateLogic

Unnamed: 0,pred_id,succ_id,pred_type,pred_status,succ_status,lag
0,NA.13010,NA.13020,FS,Not Started,Not Started,0
1,NA.13012,NA.13015,FS,Not Started,Not Started,0
2,NA.13015,NA.13110,FF,Not Started,Not Started,5
3,NA.13017,NA.13070,FS,Not Started,Not Started,0
4,NA.13020,NA.13030,SS,Not Started,Not Started,2
5,NA.13030,NA.13040,SS,Not Started,Not Started,2
6,NA.13040,NA.13050,SS,Not Started,Not Started,2
7,NA.13050,NA.13060,SS,Not Started,Not Started,2
8,NA.13060,NA.13070,FS,Not Started,Not Started,0
9,NA.13070,NA.13110,FS,Not Started,Not Started,0


In [11]:
# There are some specialty Floors in the building, I will add those by passing 2 lists
#     lstPredecessors : contains a list of the floors we need to make logic for
#     lstSuccessors : contains a list of the floors that are successors to the predecessor
#     floors. Not all predecessor floors will have successor floors.
lstPredecessors = ['SC', 'CL', '1L', '03', '11M']
lstSuccessors = ['CL', '1L', '03', 'DROP', 'DROP']

# call the function to create the successors
dfLogic = setRelationshipsList(strSourceLogic, lstPredecessors, lstSuccessors, 'DROP')

# export the dataframe, without the index
dfLogic.to_csv(strLogicFileOutput, index=False)

# review the dataframe we just exported
dfLogic

Unnamed: 0,pred_id,succ_id,pred_type,pred_status,succ_status,lag
0,NA.SC010,NA.SC020,FS,Not Started,Not Started,0
1,NA.SC012,NA.SC015,FS,Not Started,Not Started,0
2,NA.SC015,NA.SC110,FF,Not Started,Not Started,5
3,NA.SC017,NA.SC070,FS,Not Started,Not Started,0
4,NA.SC020,NA.SC030,SS,Not Started,Not Started,2
5,NA.SC030,NA.SC040,SS,Not Started,Not Started,2
6,NA.SC040,NA.SC050,SS,Not Started,Not Started,2
7,NA.SC050,NA.SC060,SS,Not Started,Not Started,2
8,NA.SC060,NA.SC070,FS,Not Started,Not Started,0
9,NA.SC070,NA.SC110,FS,Not Started,Not Started,0
