# Run Scenarios

Copyright 2020, Thabor WALBEEK

This notebook includes the detailed run, as described in the Master dissertation (from Chapter 4: Research Methodology). It processes all the data files, extracts the information and preprares the data for further processing in the Modeling Phase (see Notebook: xxxx)

## Set parameters for the scenarios

The master dissertation includes 10 different scenarios with settings as follows:

In [16]:
import pandas as pd

scen_settings = pd.DataFrame({'min': [0.2,0.4,0.5,0.6,0.8,0.9,1.0,1.1,1.2,0.2], 
                              'max': [0.8,0.9,1.0,1.1,1.2,1.3,1.5,1.7,1.9,1.9]},
                              index=[1, 2, 3,4,5,6,7,8,9, 10])

scen_settings

Unnamed: 0,min,max
1,0.2,0.8
2,0.4,0.9
3,0.5,1.0
4,0.6,1.1
5,0.8,1.2
6,0.9,1.3
7,1.0,1.5
8,1.1,1.7
9,1.2,1.9
10,0.2,1.9


## Create the project

In [None]:
metricinput = actDur

# Create dummy values for 'Cost Value' and 'Avg Cost'
metricinput['Cost Value'] = round(Decimal(0),5)
metricinput['Avg Cost'] = round(Decimal(0),5)

# Assign cost values as a new column to metrics for all activities except the dummy-variables
for i in range(1,nbAct-1,1):
        # Set the min and max value to calculate the random number
        minvalue = round(Decimal(50),5)
        maxvalue = round(Decimal(250),5)
        randomNumb = random.randint(minvalue,maxvalue)
        randomNumb = round(Decimal(randomNumb),5)
        # Assign the random number to the column 'Cost Value'
        metricinput.loc[i,'Cost Value'] = randomNumb
        costvalue = metricinput.loc[i,'Cost Value']
        # Get the activity duration
        activityDur = float(metricinput.loc[i,'Activity Duration'])
        activityDur = round(Decimal(activityDur),5)
        # Assign the 'Avg cost' value as the planned Cost value divided by the planned Activity duration
        metricinput.loc[i,'Avg Cost'] = round(costvalue / activityDur,5)

### Create Function to calculate the real duration

In [2]:
def GetRealDuration(duration):
    global RealDuration, cumdistr_step1,cumdistr_step2,cumdistr_step3,cumdistr_step4,cumdistr_step5
    
    minvalue = scen_settings.loc[run][0]
    maxvalue = scen_settings.loc[run][1]
    
    minimum = duration * int(minvalue)
    maximum = duration * int(maxvalue)
    
    step1 = int(minimum)
    step2 = int(minimum + ((maximum-minimum)/4))
    step3 = int(step2 + ((maximum-minimum)/4))
    step4 = int(step3 + ((maximum-minimum)/4))
    step5 = int(maximum)

    bracket1 = 0.1
    bracket2 = 0.2
    bracket3 = 0.5
    bracket4 = 0.75
    
    cumdistr_lst = []
    for x in range(1,1001,1):
        randomNumb = random.randint(0,100)/100
        if randomNumb < bracket1:
            cumdistr_lst.append(step1)
        elif randomNumb < bracket2:
            cumdistr_lst.append(step2)
        elif randomNumb < bracket3:
            cumdistr_lst.append(step3)
        elif randomNumb < bracket4:
            cumdistr_lst.append(step4)
        else:
            cumdistr_lst.append(step5)
    
    cumdistr_lst = pd.DataFrame(cumdistr_lst)
    cumdistr_lst.columns = ['Duration']
    cumdistr_step1 = round((int(cumdistr_lst[cumdistr_lst.Duration == step1].count())/1000) * 100, 2)
    cumdistr_step2 = round((int(cumdistr_lst[cumdistr_lst.Duration == step2].count())/1000) * 100, 2)
    cumdistr_step3 = round((int(cumdistr_lst[cumdistr_lst.Duration == step3].count())/1000) * 100, 2)
    cumdistr_step4 = round((int(cumdistr_lst[cumdistr_lst.Duration == step4].count())/1000) * 100, 2)
    cumdistr_step5 = round((int(cumdistr_lst[cumdistr_lst.Duration == step5].count())/1000) * 100, 2)
    cumdistr = (cumdistr_step1, 
                cumdistr_step1 + cumdistr_step2, 
                cumdistr_step1 + cumdistr_step2 + cumdistr_step3, 
                cumdistr_step1 + cumdistr_step2 + cumdistr_step3 + cumdistr_step4, 
                cumdistr_step1 + cumdistr_step2 + cumdistr_step3 + cumdistr_step4 + cumdistr_step5)
    
    randomNumb = random.randint(0,100)
    RealDuration = 0
    if randomNumb > cumdistr_step1:
        if randomNumb > cumdistr_step2:
            if randomNumb > cumdistr_step3:
                if randomNumb > cumdistr_step4:
                    RealDuration = step4
                else:
                    RealDuration = step5
            else:
                RealDuration = step3
        else:
            RealDuration = step2
    else:
        RealDuration = step1
    
    if RealDuration == 0:
        RealDuration = 1
        
    return RealDuration      

### Create the real durations

In [None]:
# Create 3 new columns in the metricinput dataframe for the Real values
metricinput['Real Duration'] = Decimal(0.000)
metricinput['Real Cost'] = Decimal(0.000)
metricinput['Avg Real Cost'] = Decimal(0.000)

# For each activity define the 3 new values and add them to the dataframe
for i in range(1,nbAct-1,1):
    # Get the Planned Duration
    duration = metricinput.loc[i,'Activity Duration']
    # Run the function GetRealDuration, and use Planned Duration as input value
    metricinput.loc[i,'Real Duration'] = GetRealDuration(duration)
    # Get the Planned Cost Value
    costvalue = Decimal(metricinput.loc[i,'Cost Value'])
    # Convert the Real Duration value to a decimal and add a small portion to avoid division by zero
    realdurvalue = Decimal(metricinput.loc[i,'Real Duration'])+Decimal(0.000000001)
    # Get the Planned Duration
    actdurvalue = float(duration)
    actdurvalue = round(Decimal(actdurvalue),5)
    # Get the Planned Average Cost
    avgcostvalue = Decimal(metricinput.loc[i,'Avg Cost'])
    # Calculate the Real Cost and Avg Real Cost and add this to the dataframe
    metricinput.loc[i,'Real Cost'] = round(costvalue + ((realdurvalue - actdurvalue) * avgcostvalue),5)
    metricinput.loc[i,'Avg Real Cost'] = round(costvalue / realdurvalue,5)
    
metricinput

### Create new Gantt chart based on the real durations

In [None]:
longest_path_real = 0

In [None]:
def calculateLongestPathReal(act_index_new, current_length_new):
        global longest_path_real
        realduration = metricinput.loc[act_index_new,'Real Duration']
        pathlength_new = current_length_new + realduration
        actualSuc = nbSuc.loc[act_index_new]['Number of Successors']

        if  actualSuc == 0:
            return pathlength_new
        
        else:
            
            for s in range(0,actualSuc,1):

                actualSucpath_new = actSuc.loc[act_index_new][s]
                
                if actualSucpath_new != -1:
                
                    path_new = calculateLongestPathReal(actualSucpath_new,pathlength_new)
          
                    longest_path_real = max(path_new, longest_path_real)
    
            return longest_path_real

In [None]:
calculateLongestPathReal(0,0)
ev_longestpath = calculateLongestPathReal(0,0)

### Create the corrected Gantt Chart

In [None]:
maxlongestpath = max(int(longest_path), int(longest_path_real))
maxlongestpath = int(maxlongestpath)
dev_longestpath = longest_path - longest_path_real
dev_longestpath = int(dev_longestpath)

# Make sure the already created ganttchart gets more columns in case the real duration is longer
if dev_longestpath < 0:
    dev_longestpath = abs(dev_longestpath)
    for j in range(0,dev_longestpath,1):
        columnName = int(longest_path + (j+1))
        ganttchart.insert(columnName, columnName, 0)

ganttchart = ganttchart.astype(float)
ganttchart

### Create Gantt based on Real Values

In [None]:
height_new = int(nbAct)
width_new = int(maxlongestpath+1)
gantt_real = pd.DataFrame(0, index=range(height_new), columns=range(width_new))

metricinput['Real Start'] = 1
metricinput['Real End'] = 0 + metricinput['Real Duration']

collength_new = len(actPrePivot.columns)

for i in range(0,nbAct,1):
    for j in range(1,collength_new,1):
        if actPrePivot.loc[i][j] == 0:
            metricinput.loc[i,'Real Start'] = 1
            metricinput.loc[i,'Real End'] = 0 + metricinput.loc[i,'Real Duration']

        elif actPrePivot.loc[i][j] > 0:
            # Set the predecessor value to 'value'
            value_new = actPrePivot.loc[i][j] # = predecessor
            endvalue_new = metricinput.loc[value_new]['Real End']
            metricinput.loc[i,'Real Start'] = endvalue_new + 1
            metricinput.loc[i,'Real End'] = endvalue_new + metricinput.loc[i,'Real Duration']
            
for i in range(0,nbAct,1):
    for j in range(0,width_new,1):
        startValue_new = metricinput.loc[i,'Real Start']
        endValue_new = metricinput.loc[i,'Real End']
        if j >= startValue_new and j <= endValue_new:
            gantt_real.loc[i][j] = 1

In [None]:
gantt_real = gantt_real.astype(float)
gantt_real

### Create Planned Cost Gantt Chart

In [None]:
height_planned = int(nbAct)
width_planned = int(maxlongestpath+1)
gantt_planned = pd.DataFrame(float(0), index=range(height_planned), columns=range(width_planned))

for i in range(0,nbAct,1):
    for j in range(1,width_planned,1):
        ganttchartvalue = ganttchart.loc[i][j]
        ganttchartvalue = round(float(ganttchartvalue),5)
        
        prevganttchartvalue = gantt_planned.loc[i][j-1]
        prevganttchartvalue = round(float(prevganttchartvalue),5)
        
        avgcostvalue = metricinput.loc[i]['Avg Cost']
        avgcostvalue = round(float(avgcostvalue),5)
        
        calcValue = prevganttchartvalue + ( avgcostvalue * ganttchartvalue)
        calcValue = round(float(calcValue),5)
        
        gantt_planned.loc[i][j] = float(calcValue)
        
sum_row = {col: gantt_planned[col].sum() for col in gantt_planned}
# Turn the sums into a DataFrame with one row with an index of 'Total':
sum_df = pd.DataFrame(sum_row, index=["Total PV"])
# Now append the row:
gantt_planned = gantt_planned.append(sum_df)

In [None]:
gantt_planned = gantt_planned.astype(float)
gantt_planned

### Create Real Cost Gantt Chart

In [None]:
height_realcost = int(nbAct)
width_realcost = int(maxlongestpath+1)

gantt_realcost = pd.DataFrame(float(0), index=range(height_realcost), columns=range(width_realcost))

for i in range(0,nbAct,1):
    for j in range(1,width_realcost,1):
        ganttchartvalue = gantt_real.loc[i][j]
        ganttchartvalue = round(float(ganttchartvalue),5)
        
        prevganttchartvalue = gantt_realcost.loc[i][j-1]
        prevganttchartvalue = round(float(prevganttchartvalue),5)
       
        avgcostvalue = metricinput.loc[i]['Avg Cost']
        avgcostvalue = round(float(avgcostvalue),5)

        calcValue = prevganttchartvalue + ( avgcostvalue * ganttchartvalue)
        calcValue = round(float(calcValue),5)
        
        gantt_realcost.loc[i][j] = float(calcValue)

sum_row = {col: gantt_realcost[col].sum() for col in gantt_realcost}
# Turn the sums into a DataFrame with one row with an index of 'Total':
sum_df = pd.DataFrame(sum_row, index=["Total AC"])
# Now append the row:
gantt_realcost = gantt_realcost.append(sum_df)

In [None]:
gantt_realcost

### Create EV Gantt Chart

In [None]:
height_ev = int(nbAct)
width_ev = int(maxlongestpath+1)

gantt_ev = pd.DataFrame(float(0), index=range(height_ev), columns=range(width_ev))

for i in range(0,nbAct,1):
    for j in range(1,width_ev,1):
        ganttchartvalue = gantt_real.loc[i][j]
        ganttchartvalue = round(float(ganttchartvalue),5)
            
        prevganttchartvalue = gantt_ev.loc[i][j-1]
        prevganttchartvalue = round(float(prevganttchartvalue),5)
               
        costvalue = metricinput.loc[i]['Cost Value']
        costvalue = round(float(costvalue),5)
        
        realdurvalue = metricinput.loc[i,'Real Duration']
        realdurvalue = round(float(realdurvalue),5)
        realdurvalue = realdurvalue + 0.0000000001
        
        avgcostvalue = float(costvalue) / float(realdurvalue)
        
        calcValue = prevganttchartvalue + ( avgcostvalue * ganttchartvalue)
        calcValue = round(float(calcValue),5)
        
        gantt_ev.loc[i][j] = float(calcValue)
        
sum_row = {col: gantt_ev[col].sum() for col in gantt_ev}
# Turn the sums into a DataFrame with one row with an index of 'Total':
sum_df = pd.DataFrame(sum_row, index=["Total EV"])
# Now append the row:
gantt_ev = gantt_ev.append(sum_df)

In [None]:
gantt_ev

### Create project metrics

In [None]:
project_metric = pd.DataFrame()
PV = gantt_planned.iloc[[32]]
EV = gantt_ev.iloc[[32]]
AC = gantt_realcost.iloc[[32]]
project_metric = project_metric.append(PV)
project_metric = project_metric.append(AC)
project_metric = project_metric.append(EV)
project_metric

### Calculate Earned Schedule

In [None]:
width_es = int(maxlongestpath+1)
ES = pd.DataFrame(0, index=["Total ES"], columns=range(width_es))
ES = ES.astype(float)
ES

In [None]:
longest_spi = int(maxlongestpath+1)

for i in range(1,longest_spi,1):
    EVvalue = project_metric.loc['Total EV'][i]
    curPVvalue = project_metric.loc['Total PV'][i]
    PVvaluemax = project_metric.loc['Total PV'][maxlongestpath]
    
    if EVvalue > PVvaluemax:
        ES.loc['Total ES'][i] = longest_path
    
    else:
    
        for j in range(1,longest_spi,1):
            PVvalue = project_metric.loc['Total PV'][j]
            if EVvalue < PVvalue:
                if j > 1:
                    PVprev = project_metric.loc['Total PV'][j-1]
                    colvalue = float(j)-1
                    ESvalue = (colvalue) + ((EVvalue - PVprev)/(PVvalue-PVprev))
                    ES.loc['Total ES'][i] = ESvalue
                    break
                else:
                    PVprev = float(0)
                    ESvalue = 0 + ((EVvalue - PVprev)/(PVvalue-PVprev))
                    ES.loc['Total ES'][i] = ESvalue
                
project_metric = project_metric.append(ES)
project_metric

### Add SPI values

In [None]:
longest_spi = int(maxlongestpath+1)
SPI = pd.DataFrame(float(0), index=["SPI_t"], columns=range(longest_spi))
SPI = SPI.astype(float)
SPI

for j in range(1,longest_spi,1):
    if j == 26:
        SPI.loc['SPI_t'][j] = float(1)
    else:
        ESvalue = float(project_metric.loc['Total ES'][j])
        if ESvalue == 0:
            SPIvalue = float(j) / (ESvalue+0.00000000000001)
            SPI.loc['SPI_t'][j] = SPIvalue
        else:
            SPIvalue = ESvalue / float(j)
            SPI.loc['SPI_t'][j] = SPIvalue

project_metric = project_metric.append(SPI)
project_metric

In [None]:
cols = ev_longestpath
spi_finish = spi[spi.columns[:cols]]
spi_finish

In [None]:
percinterval = ev_longestpath / 20
percinterval

### Calculate the 5% intervals

In [None]:
# Create a data frame where the final values can be added to
Final_SPI = pd.DataFrame(float(0), index=["SPI_t"], columns=range(1,20,1))

# set the interval (longest_path for EV divided by 20, to create 5% intervals)
interval = float(percinterval)

for i in range(1,21,1):
    interval_step = interval * float(i)
    interval_step = round(interval_step, 3)
    
    RoundUp = math.ceil(interval_step)
    RoundUpprev = RoundUp - 1
    ratioToCalc = interval_step / RoundUp
    
    if RoundUp == ev_longestpath:
        SPIvalue = spi_finish.loc['SPI_t'][RoundUp-1]
        OldSPIvalue = spi_finish.loc['SPI_t'][RoundUp-2]
        NewSPIvalue = OldSPIvalue + (SPIvalue - OldSPIvalue) * ((RoundUp-(RoundUpprev)) -(RoundUp-interval_step))
    else:
        SPIvalue = spi_finish.loc['SPI_t'][RoundUp]
        OldSPIvalue = spi_finish.loc['SPI_t'][RoundUpprev]
        NewSPIvalue = OldSPIvalue + (SPIvalue - OldSPIvalue) * ((RoundUp-(RoundUpprev)) -(RoundUp-interval_step))
    
    Final_SPI.loc['SPI_t',i] = NewSPIvalue

Final_SPI