In [1]:
import pandas as pd
import numpy as np
import math
from random import choices

In [2]:
import devanalyst.simulation.GenerateTimecards as timecard
import devanalyst.simulation.statics as S_

importing Jupyter notebook from c:\alex\code\labs\devanalyst\devanalyst\simulation\GenerateTimecards.ipynb
importing Jupyter notebook from c:\alex\code\labs\devanalyst\devanalyst\simulation\statics.ipynb
importing Jupyter notebook from c:\alex\code\labs\devanalyst\devanalyst\simulation\simulationModels.ipynb


<h1>Stochastic Utilities</h1>

In [3]:
class Random():
# Helper class to make random choices, but with a consistent generator instance across all calling sequences 
# so that if required a deterministic output is produced system-wide from a single seed.

    def __init__(self):
        self.seed = None
        self.random = np.random.RandomState()
        
    def reset(self, seed):
        self.seed = seed
        self.random = np.random.RandomState(self.seed)
        
    # Returns a random element from an array. Returns None if array is empty
    def pickOne(self, array):
        if len(array)==0:
            return None
        return array[self.random.randint(0, len(array))]

    # Returns a random index from a Pandas Series
    def pickOneIdx(self, series):
        return series.index[self.random.randint(0, len(series))]   

    # Returns an integer corresponding to a random duration between 1 day and the maxDuration. 
    def pickHowLong(self, maxDuration):
        return self.random.randint(1, maxDuration +1) 
    
    # Returns a random member of the population, selected with a likelihood given by the weights
    #
    # -population: a list of possible values over which to take a random selection
    # -weights: a list of equal size to population. The probability of selecting a particular item 
    # population[x] is equal to weights[x] divided by the sum of all weights.
    # If the sum of all weights is 0, or its length different than that of poulation, it returns None
    def pickOneWithWeights(self, population, weights):
        if len(population) != len(weights):
            return none
        total = sum(weights)
        if total == 0:
            return None

        probalityDist = []
        for w in weights:
            probalityDist.append(w/total)
        return self.random.choice(population, 1, replace=False, p=probalityDist)[0] # Pick 1 selection only, without replacement

In [4]:
class ModelsConfig:
    
    # -costModels: array of CostModel instances, each representing an independent driver for how actual costs deviate from
    # estimates. Thus the real cost is obtained by applying all the models in succession to the estimate.
    # -qualityModels: TBD
    # -allocationModel: an instance of an AllocationModel
    # -random: a Random instance, used for all stochastic-related calculations.
    # -context: a dictionary where the caller can put extra information about the call. Useful when debugging
    def __init__(self, costModels, qualityModels, allocationModel):
        self.costModels        = costModels
        self.qualityModels     = qualityModels
        self.allocationModel  = allocationModel
        
        self.random = Random()
        self.context = {}

In [25]:
class Distribution:
# Helper class of class-statc utilities for distributions.
# A distribution is represented as a dictionary, where the keys are the "x axis" (the possible values
# for for the random varible behind the distribution) and the value for each key are the "y axis" of the distribution. 
# Notice that distributions are not necessarily normalized, i.e., the sume of the 'y' values across all 'x''s may not 
# equal 1.0. So the 'y's are more like weights than probabilities.
    
    def measureDistributionDistance(dist1, dist2):
        # First ensure that the distributions (represented by dictionaries) have the same keys, padding with 0's
        # if needed
        
        prob1 = dist1 #self._normalizeDistribution(dist1)
        prob2 = dist2 #self._normalizeDistribution(dist2)
        
        if (prob1 == None): # Boundary case. Treat it like it is all 0's
            prob1 = {0:0}
        if (prob2 == None): # Boundary case. Treat it like it is all 0's
            prob2 = {0:0}
        
        keys1 = set(prob1.keys())
        keys2 = set(prob2.keys())
        all_keys = keys1.union(keys2)
        for x in all_keys:
            if x not in keys1:
                prob1[x] = 0.0
            if x not in keys2:
                prob2[x] = 0.0
        
        # Now compute the L2 difference of the distributions
        sum_of_squares = 0
        for x in all_keys:
            f1 = prob1[x]
            f2 = prob2[x]
            sum_of_squares += (f1-f2)*(f1-f2)
        return math.sqrt(sum_of_squares)    

    # Computes an attractiveness score for selecting x, based on the impact that selecting it
    # would have on the runningDist in terms of whether it gets it closer or not to the targetDist. 
    # Attractiveness is higher if the 'distance' between the running and target
    # distributions is reduced. If selecting the item ends up increasing the distance then its attractiveness is negative.
    def calcAttractiveness(x, runningDist, targetDist):
        
        initial_distance = Distribution.measureDistributionDistance(runningDist, targetDist)
        
        updatedDist = Distribution._appendToUnnormalizedDistribution(runningDist, x)

        final_distance = Distribution.measureDistributionDistance(updatedDist, targetDist)
        
        attractiveness = initial_distance - final_distance
        return attractiveness

    # Ramdomly selects a value in remainingDist, with random selection weighted by how much closer it
    # brings runningDist towards targetDist using L2 measure
    def pickUsingProximityToTarget(runningDist,  remainingDist, targetDist, modelsConfig):
        weights = []
        for estimate in remainingDist.keys():
            attractiveness = Distribution.calcAttractiveness(estimate, runningDist, targetDist)
            
            # Must have non-negative weights, since they trigger a probability distribution. Treat negative
            # attractiveness as a 0 weight
            if attractiveness < 0:
                weights.append(0)
            else:
                weights.append(attractiveness)
            
        selection = modelsConfig.random.pickOneWithWeights(list(remainingDist.keys()), weights)
        return selection
    
    # Ramdomly selects a value in remainingDist, with random selection weighted by the weights in 
    # remainingDist
    def pickUsingFrequency(remainingDist, modelsConfig):
        #selection = modelsConfig.random.pickOneWithWeights(list(remainingDist.keys()), list(remainingDist.values()))
        
        sample = Distribution.dist_to_sample(remainingDist)
        selection = modelsConfig.random.pickOne(sample)
        return selection

    def _appendToUnnormalizedDistribution(dist, estimate):
        
        result = dist.copy()
        if estimate in result.keys():
            result[estimate] =result[estimate] + 1
        else:
            result[estimate] = 1
        return result    
    # Returns a probability distribution (i.e., sum of "y's" is 1.0) by scaling down the y values of the given
    # distribution
    def _normalizeDistribution(dist):

        area = 0
        for x in dist.keys():
            area += dist[x]
            
        if area == 0:
            return None
        
        normalized_dist = {}
                
        for x in dist.keys():
            normalized_dist[x] = dist[x]/area
        return normalized_dist
    
    # Returns an array of the values of distribution, duplicating values as many times as their weight in the
    # distribution
    def dist_to_sample(distribution):
        sample = []
        for key in distribution:
            for i in range(int(distribution[key])):
                sample.append(key)
        return sample
    
    def addToDist(pick, dist):
        for k in pick.keys():
            val = pick[k]
            if k in dist.keys():
                dist[k] += val
            else:
                dist[k] = val 
                
    def removeFromDist(pick, dist):
        for k in pick.keys():
            val = pick[k]
            if k in dist.keys():
                dist[k] -= val
            else:
                dist[k] = -val
                
    def distIsEmpty(dist):
        for k in dist.keys():
            if dist[k] != 0:
                return False
        return True

<h1>Cost Models</h1>

In [6]:
class CostModel:
# Abstract class
    
    # Returns a cost multipier for the WorkItem 'item', i.e., a number equal to the ratio between the 'actual cost'
    # (in man-days) of developing the 'item' and the 'estimated cost'
    #
    # -item: a WorkItem
    def runModel(self, item): 
        return # This is the abstract class, so this method should never be called as concrete class implement it.

In [7]:
class DefaultCostModel (CostModel):
# Default class for models that simulate how actual costs differ from estimates. Usually there might be different
# implementations, each trying to capture a different dynamic with a different driver. This default class just assumes
# everything takes longer than expected by a given delay

    # -delay_pct: the percentage by which estimates are off. For example, a delay_pct of 0.25 means that a task
    # estimated to take 10 man-days actually takes 12.5 man-days.
    def __init__(self, delay_pct = 0.25):
        self.delay_pct = delay_pct
        return  
    # 
    def runModel(self, item):   
        return 1 + self.delay_pct

In [8]:
def computeRealCost(item, costModels):
# Computes the real cost of delivering a work item, based on a number of 'factors', which are functions implementing a model
# for what drives costs to differ from estimates.

    cost = item.estimate;
    for m in costModels:
        cost *= m.runModel(item)
    return cost

<h1>Allocation Models</h1>

In [9]:
class _AvailabilityCriterion:
# Abstract parent class
    def _getPeopleWithBandwidth(self, timeRequired, sprintDuration):
        return

In [10]:
class _GreedyAvailabilityCriterion (_AvailabilityCriterion):  
    
    def __init__(self, work, currentOrNext):
        self.work = work
        self.currentOrNext = currentOrNext
    
        return
    
    def _getPeopleWithBandwidth(self, timeRequired, sprintDuration):
        if self.currentOrNext:
            available = self.work.committedTime(sprintDuration)[['Developer', 'Bandwidth']]
            return list(available[available['Bandwidth'] >= timeRequired]['Developer'])
            
        else:
            available = self.work.committedTime(sprintDuration)[['Developer', 'NEXT SPRINT Bandwidth']]
            # Filter to only developers who have 'carry over' bandwidth from this spring into the next one
            haveCarryOver = available[available['NEXT SPRINT Bandwidth'] > sprintDuration] 
            return list(haveCarryOver[haveCarryOver['NEXT SPRINT Bandwidth'] >= timeRequired]['Developer'])  

In [23]:
class AllocationModel:
# Abstract class 

    # Implemented by concrete classes.
    # Mutates work by allocating WorkItem's to developers, depleting the workToPik
    def _allocate_helper(self, work, workToPick, currentOrNext, modelsConfig): 
        return
 
    # Implemented by concrete classes.
    # Picks the next "preferred" WorkItem to work on
    def _pickItem(self, runningDist, remainingDist, targetDist, workToPick, modelsConfig):
        return

    def allocate(self, work, modelsConfig):
        unplanned = work.allocations[S_.UNPLANNED][S_.OWNER_TBD]
        workToPick = []
        workToPick.extend(unplanned[S_.BUGS_ON_COMPLETED_STORIES])
        workToPick.extend(unplanned[S_.BUGS_ON_UNFINISHED_STORIES])
        workToPick.extend(unplanned[S_.UNFINISHED_STORIES])   

        self._allocate_helper(work, workToPick, True, modelsConfig) # mutates 'work' and 'workToPick'

        # Now try again, but this time allocating any unused time to deliverables for the next sprint, i.e., use
        # time left over from the current sprint to get a heat start on the work for the next sprint, borrowing next sprint's
        # capacity since we only need to deliver then

        #Update unplanned, workToPick since we changed it in prior call to helper
        unplanned = work.allocations[S_.UNPLANNED][S_.OWNER_TBD] 
        workToPick = []
        workToPick.extend(unplanned[S_.BUGS_ON_COMPLETED_STORIES])
        workToPick.extend(unplanned[S_.BUGS_ON_UNFINISHED_STORIES])
        workToPick.extend(unplanned[S_.UNFINISHED_STORIES]) 

        self._allocate_helper(work, workToPick, False, modelsConfig) # mutates 'work' and 'workToPick'    
        
        return work
    
    # Returns an unnormalized distribution of the estimated effort required for the WorkItems in items.
    # The distribution is represented as a dictionary, where the keys are the "x axis" (the possible values
    # for estimates) and the value for each key are the "y axis" of the distribution (the count of how many
    # WorkItems have such an estimate)
    # 
    # NOTE: WorkItems that were partially progressed in prior sprints may have a residual estimate that is
    # not a nice integer number, but a number with lots of decimal places. To avoid issues with having a distribution
    # that is too finely cut we use bins of length 0.1 and classify each estimate in a bin. That way we limit
    # the number of possibly values of 'x'
    def _getUnnormalizedDistribution(self, items):
        distribution = {}
        
        for item in items:
            x = self._getBin(item.estimate)           
            
            if x in distribution:
                distribution[x] = distribution[x] + 1
            else:
                distribution[x] = 1
                
        return distribution
    
    def _inferDistribution(self, work):
        workAlreadyPlanned = []
        for bucket in [S_.CURRENT_SPRINT, S_.NEXT_SPRINT]:
            subwork = work.allocations[bucket]
            for person in subwork.keys():
                workAlreadyPlanned.extend(subwork[person][S_.BUGS_ON_COMPLETED_STORIES])
                workAlreadyPlanned.extend(subwork[person][S_.BUGS_ON_UNFINISHED_STORIES])
                workAlreadyPlanned.extend(subwork[person][S_.UNFINISHED_STORIES])
        
        return self._getUnnormalizedDistribution(workAlreadyPlanned)
    
    # Partitions space into bins of 1 man-day in duration. The number of bins is set by self.sprintDuration, and
    # returns the bin to which the given 'estimate' falls.
    def _getBin(self, estimate):        
        return round(self.sprintDuration * estimate)/self.sprintDuration



In [29]:
class GreedyAllocationModel (AllocationModel):
# For a sprint, allocates work to developers by maximizing the planned tasks for each developer.
# So as long as the developer has some bandwidth left in the sprint, the algorithm will search for a user story
# that can be done in the remaining time.
# As a result, this algorithm has a tendency to plan short-duration user stories early in the release cycle, so that
# later sprints in the release cycle need to deal with comparatively coarser-sized user stories. This causes the
# release cycle to be unbalanced: later sprints have big-ticket items for the most part.

    def __init__(self, sprintDuration):
        self.sprintDuration = sprintDuration
        return

    # Mutates 'work' and 'workToPick' by allocating WorkItem's to developers and in the process depleting
    # partially or fully the 'workToPick'
    #
    # -workToPick: an array of WorkItem objects, corresponding to unplanned tasks that are candidate tasks to allocate
    # to developers
    # -work: a WorkAssignment instance reflecting a ScrumTeam's allocations and remaining bandwidth prior to the
    # allocation this model will conduct. This model's allocation process will then mutate 'work' by reflecting in it
    # tasks that are no longer unplanned but rather are now allocated to a particular developer.
    # -currentOrNext: boolean to indicate if the allocation is being done for the current sprint or the next sprint. True 
    # for current, False for next.
    # -modelsConfig: parameters for running the model (e.g., random generators)
    def _allocate_helper(self, work, workToPick, currentOrNext, modelsConfig): 
        availabilityCriterion   = _GreedyAvailabilityCriterion(work, currentOrNext)        
        targetDist = self._getUnnormalizedDistribution(workToPick) 
#        runningDist = self._inferDistribution(work)
#        remainingDist = self._getUnnormalizedDistribution(workToPick)    

        while len(workToPick) > 0:
        #while Distribution.distIsEmpty(remainingDist) == False:

            runningDist = self._inferDistribution(work) # Re-compute in each loop since work changes in each loop
            remainingDist = self._getUnnormalizedDistribution(workToPick) # Re-compute as workToPick changes
            
            item = self._pickItem(runningDist, remainingDist, targetDist, workToPick, modelsConfig)
            #item = modelsConfig.random.pickOne(workToPick)
            pick = {}
            pick[item.estimate] = 1    
            #Distribution.removeFromDist(pick, remainingDist)
            workToPick.remove(item) # In next cycle of loop don't want to encounter this item, as it would be processed by then
            timeRequired = item.estimate * (1-item.percentAchieved)
        
            peopleWithTimeToDoIt = availabilityCriterion._getPeopleWithBandwidth(timeRequired, self.sprintDuration)
            
            potentialOwner = modelsConfig.random.pickOne(peopleWithTimeToDoIt)
            
            if potentialOwner == None:
                # This WorkItem can't be done in this sprint, as nobody has enough time for the effort it requires.
                # Try with some other work item
                 continue
            
            if currentOrNext:
                work.reAssign(item, potentialOwner, S_.CURRENT_SPRINT)
            else:
                work.reAssign(item, potentialOwner, S_.NEXT_SPRINT) 

    # Picks the next "preferred" WorkItem to work on
    def _pickItem(self, runningDist, remainingDist, targetDist, workToPick, modelsConfig):
        
        item = modelsConfig.random.pickOne(workToPick)
        return item


In [30]:
class BalancedAllocationModel (AllocationModel):
# For a sprint, allocates work to developers by trying to keep a roughly average user story size across all sprints
# in the release.

    def __init__(self, sprintDuration):
        self.sprintDuration = sprintDuration
        return

    # Mutates 'work' and 'workToPick' by allocating WorkItem's to developers and in the process depleting
    # partially or fully the 'workToPick'
    #
    # -workToPick: an array of WorkItem objects, corresponding to unplanned tasks that are candidate tasks to allocate
    # to developers
    # -work: a WorkAssignment instance reflecting a ScrumTeam's allocations and remaining bandwidth prior to the
    # allocation this model will conduct. This model's allocation process will then mutate 'work' by reflecting in it
    # tasks that are no longer unplanned but rather are now allocated to a particular developer.
    # -currentOrNext: boolean to indicate if the allocation is being done for the current sprint or the next sprint. True 
    # for current, False for next.
    # -modelsConfig: parameters for running the model (e.g., random generators)
    def _allocate_helper(self, work, workToPick, currentOrNext, modelsConfig): 
        
        if (len(workToPick) == 0):
            return # Nothing to do, and if we don't return may get an assertion below as we assume there is work to pick
        
        availabilityCriterion   = _GreedyAvailabilityCriterion(work, currentOrNext)  
        
        # For debugging - we want to make sure that after the while loop we did allocate at least something, so
        # keep track of when we allocate stuff
        something_was_allocated = False
        assertion_info = {}
        assertion_info['modelsConfig context'] = modelsConfig.context
        assertion_info['len(workToPick) at start'] = len(workToPick)
        
        targetDist = self._getUnnormalizedDistribution(workToPick) 
        # remainingDist = self._getUnnormalizedDistribution(workToPick)    
        while len(workToPick) > 0:
        #while Distribution.distIsEmpty(remainingDist) != True:
            runningDist = self._inferDistribution(work) # Re-compute in each loop since work changes in each loop
            remainingDist = self._getUnnormalizedDistribution(workToPick)    
                        
            chosenItem = self._pickItem(runningDist, remainingDist, targetDist, workToPick, modelsConfig)
            
            #pick = {}
            #pick[chosenItem.estimate] = 1    
            #Distribution.removeFromDist(pick, remainingDist)
             
            # In next cycle of loop don't want to encounter this item, as it would be processed by then
            workToPick.remove(chosenItem)
            itemAttractiveness = Distribution.calcAttractiveness(chosenItem.estimate, runningDist, targetDist)
            if (itemAttractiveness <= 0):
                continue
            
            timeRequired = chosenItem.estimate * (1-chosenItem.percentAchieved)
        
            peopleWithTimeToDoIt = availabilityCriterion._getPeopleWithBandwidth(timeRequired, self.sprintDuration)
            
            potentialOwner = modelsConfig.random.pickOne(peopleWithTimeToDoIt)
            
            if potentialOwner == None:
                # This WorkItem can't be done in this sprint, as nobody has enough time for the effort it requires.
                # Try with some other work item
                 continue
            
            if currentOrNext:
                work.reAssign(chosenItem, potentialOwner, S_.CURRENT_SPRINT)
                something_was_allocated = True
            else:
                work.reAssign(chosenItem, potentialOwner, S_.NEXT_SPRINT) 
                something_was_allocated = True

        # For debugging, confirm that we at least allocated something for the current release
        assertion_info['len(workToPick) at end'] = len(workToPick)
        if currentOrNext:
            assert something_was_allocated, assertion_info
        
    # Picks the next "preferred" WorkItem to work on
    def _pickItem(self, runningDist, remainingDist, targetDist, workToPick, modelsConfig):
 
        chosenEstimate = Distribution.pickUsingProximityToTarget(runningDist, remainingDist, targetDist, modelsConfig)
            
        chosenItem = None
        #find an item for such a chosenEstimate
        for item in workToPick:
            if item.estimate == chosenEstimate:
                chosenItem = item
        # Should never get here - should found an item 
        assert(chosenItem != None)
        
        return chosenItem
    
    
    

