In [3]:
import pandas as pd
import numpy as np
import math
from random import choices
import statistics

In [4]:
import devanalyst.simulation.statics as S_
from devanalyst.simulation.businessObjects import Ticket

importing Jupyter notebook from c:\users\aleja\documents\code\chateauclaudia-labs\devanalyst\devanalyst\simulation\statics.ipynb
importing Jupyter notebook from c:\users\aleja\documents\code\chateauclaudia-labs\devanalyst\devanalyst\simulation\businessObjects.ipynb


<h1>Stochastic Utilities</h1>

In [6]:
class Random():
# Helper class to make random choices, but with a consistent generator instance across all calling sequences 
# so that if required a deterministic output is produced system-wide from a single seed.

    def __init__(self):
        self.seed = None
        self.random = np.random.RandomState()
        
    def reset(self, seed):
        self.seed = seed
        self.random = np.random.RandomState(self.seed)
        
    # Returns a random element from an array. Returns None if array is empty
    def pickOne(self, array):
        if len(array)==0:
            return None
        return array[self.random.randint(0, len(array))]

    # Returns a random index from a Pandas Series
    def pickOneIdx(self, series):
        return series.index[self.random.randint(0, len(series))]   

    # Returns an integer corresponding to a random duration between 1 day and the maxDuration. 
    def pickHowLong(self, maxDuration):
        return self.random.randint(1, maxDuration +1) 
    
    # Returns a random member of the population, selected with a likelihood given by the weights
    #
    # -population: a list of possible values over which to take a random selection
    # -weights: a list of equal size to population. The probability of selecting a particular item 
    # population[x] is equal to weights[x] divided by the sum of all weights.
    # If the sum of all weights is 0, or its length different than that of population, it returns None
    def pickOneWithWeights(self, population, weights):
        if len(population) != len(weights):
            return none
        total = sum(weights)
        if total == 0:
            return None

        probalityDist = []
        for w in weights:
            probalityDist.append(w/total)
        return self.random.choice(population, 1, replace=False, p=probalityDist)[0] # Pick 1 selection only, without replacement
    
    # Selects a list of integers corresponding to a random subset of indices from the given collection. The returned
    # list has a minimum size of 1 and a maximum size less than the given collection, unless the collection is 
    # has length less than 2, in which case an empty list is returned
    #
    # -collection: a list of objects
    def pickSubsetIndices(self, collection):
        if len(collection) < 2:
            return []
        
        subset_size    = self.random.randint(1, len(collection))
        subset_indices = self.random.choice(range(subset_size), subset_size, replace=False)
        return list(subset_indices)
    
    # Returns a list corresponding to a random subset of the given collection. It may be empty, be the whole
    # collection, or anything in between,
    # -collection: a list of objects
    def pickSubset(self, collection):
        subset_size    = self.random.randint(0, len(collection))
        subset         = self.random.choice(collection, subset_size, replace=False)
        return list(subset)
    

In [6]:
class ModelsConfig:
    
    # -costModels: array of CostModel instances, each representing an independent driver for how actual costs deviate from
    # estimates. Thus the real cost is obtained by applying all the models in succession to the estimate.
    # -qualityModels: TBD
    # -allocationModel: an instance of an AllocationModel
    def __init__(self, costModels, qualityModels, allocationModel):
        self.costModels        = costModels
        self.qualityModels     = qualityModels
        self.allocationModel   = allocationModel
        
        self.random            = Random() # used for all stochastic-related calculations.
        self.globalRepo        = None # Set after construction, once the repos have been built
        self.context           = None #Should be set in each cycle of the release (changes per cycle), not at instance construction

In [7]:
class Distribution:
# Helper class of class-statc utilities for distributions.
# A distribution is represented as a dictionary, where the keys are the "x axis" (the possible values
# for for the random varible behind the distribution) and the value for each key are the "y axis" of the distribution. 
# Notice that distributions are not necessarily normalized, i.e., the sume of the 'y' values across all 'x''s may not 
# equal 1.0. So the 'y's are more like weights than probabilities.
    
    def measureDistributionDistance(dist1, dist2):
        # First ensure that the distributions (represented by dictionaries) have the same keys, padding with 0's
        # if needed
        
        prob1 = dist1 #self._normalizeDistribution(dist1)
        prob2 = dist2 #self._normalizeDistribution(dist2)
        
        if (prob1 == None): # Boundary case. Treat it like it is all 0's
            prob1 = {0:0}
        if (prob2 == None): # Boundary case. Treat it like it is all 0's
            prob2 = {0:0}
        
        keys1 = set(prob1.keys())
        keys2 = set(prob2.keys())
        all_keys = keys1.union(keys2)
        for x in all_keys:
            if x not in keys1:
                prob1[x] = 0.0
            if x not in keys2:
                prob2[x] = 0.0
        
        # Now compute the L2 difference of the distributions
        sum_of_squares = 0
        for x in all_keys:
            f1 = prob1[x]
            f2 = prob2[x]
            sum_of_squares += (f1-f2)*(f1-f2)
        return math.sqrt(sum_of_squares)    

    # Computes an attractiveness score for selecting x, based on the impact that selecting it
    # would have on the runningDist in terms of whether it gets it closer or not to the targetDist. 
    # Attractiveness is higher if the 'distance' between the running and target
    # distributions is reduced. If selecting the item ends up increasing the distance then its attractiveness is negative.
    def calcAttractiveness(x, runningDist, targetDist):
        
        initial_distance = Distribution.measureDistributionDistance(runningDist, targetDist)
        
        updatedDist = Distribution._appendToUnnormalizedDistribution(runningDist, x)

        final_distance = Distribution.measureDistributionDistance(updatedDist, targetDist)
        
        attractiveness = initial_distance - final_distance
        return attractiveness

    # Ramdomly selects a value in remainingDist, with random selection weighted by how much closer it
    # brings runningDist towards targetDist using L2 measure
    def pickUsingProximityToTarget(runningDist,  remainingDist, targetDist, modelsConfig):
        weights = []
        for estimate in remainingDist.keys():
            attractiveness = Distribution.calcAttractiveness(estimate, runningDist, targetDist)
            
            # Must have non-negative weights, since they trigger a probability distribution. Treat negative
            # attractiveness as a 0 weight
            if attractiveness < 0:
                weights.append(0)
            else:
                weights.append(attractiveness)
            
        selection = modelsConfig.random.pickOneWithWeights(list(remainingDist.keys()), weights)
        return selection
    
    # Ramdomly selects a value in remainingDist, with random selection weighted by the weights in 
    # remainingDist
    def pickUsingFrequency(remainingDist, modelsConfig):
        #selection = modelsConfig.random.pickOneWithWeights(list(remainingDist.keys()), list(remainingDist.values()))
        
        sample = Distribution.dist_to_sample(remainingDist)
        selection = modelsConfig.random.pickOne(sample)
        return selection

    def _appendToUnnormalizedDistribution(dist, estimate):
        
        result = dist.copy()
        if estimate in result.keys():
            result[estimate] =result[estimate] + 1
        else:
            result[estimate] = 1
        return result    
    # Returns a probability distribution (i.e., sum of "y's" is 1.0) by scaling down the y values of the given
    # distribution
    def _normalizeDistribution(dist):

        area = 0
        for x in dist.keys():
            area += dist[x]
            
        if area == 0:
            return None
        
        normalized_dist = {}
                
        for x in dist.keys():
            normalized_dist[x] = dist[x]/area
        return normalized_dist
    
    # Returns an array of the values of distribution, duplicating values as many times as their weight in the
    # distribution
    def dist_to_sample(distribution):
        sample = []
        for key in distribution:
            for i in range(int(distribution[key])):
                sample.append(key)
        return sample
    
    def sample_to_dist(sample, number_of_bins):
        distribution = {}
        for e in sample:
            x = Distribution.getBin(e, number_of_bins)                       
            if x in distribution:
                distribution[x] = distribution[x] + 1
            else:
                distribution[x] = 1                
        return distribution    

    def getBin(estimate, number_of_bins):        
        return round((number_of_bins * estimate)/number_of_bins)
    
    def addToDist(pick, dist):
        for k in pick.keys():
            val = pick[k]
            if k in dist.keys():
                dist[k] += val
            else:
                dist[k] = val 
                
    def removeFromDist(pick, dist):
        for k in pick.keys():
            val = pick[k]
            if k in dist.keys():
                dist[k] -= val
            else:
                dist[k] = -val
                
    def distIsEmpty(dist):
        for k in dist.keys():
            if dist[k] != 0:
                return False
        return True

<h1>Cost Models</h1>

In [8]:
class CostModel:
# Abstract class
    
    # Returns a cost multipier for the WorkItem 'item', i.e., a number equal to the ratio between the 'actual cost'
    # (in man-days) of developing the 'item' and the 'estimated cost'
    #
    # -item: a WorkItem
    def runModel(self, item, modelsConfig): 
        return # This is the abstract class, so this method should never be called as concrete class implement it.

In [9]:
class DefaultCostModel (CostModel):
# Default class for models that simulate how actual costs differ from estimates. Usually there might be different
# implementations, each trying to capture a different dynamic with a different driver. This default class just assumes
# everything takes longer than expected by a given delay

    # -delay_pct: the percentage by which estimates are off. For example, a delay_pct of 0.25 means that a task
    # estimated to take 10 man-days actually takes 12.5 man-days.
    def __init__(self, delay_pct = 0.25):
        self.delay_pct = delay_pct
        return  
    # 
    def runModel(self, item, modelsConfig):   
        return 1 + self.delay_pct

In [10]:
def computeRealCost(item, modelsConfig):
# Computes the real cost of delivering a work item, based on a number of 'factors', which are functions implementing a model
# for what drives costs to differ from estimates.

    cost = item.estimate;
    for m in modelsConfig.costModels:
        cost *= m.runModel(item, modelsConfig)
    return cost

In [11]:
# Sets the cost it takes to complete a task based on the profile of the developer completing it, as a percentage
# of the original estimate of the task:
class MeritocraticCostModel (CostModel):

    def __init__(self):
        return 
    
    # Sets deviation of real cost vs estimates depending on developer excellence
    # Star - 100%
    # Solid - 200%
    # Mediocre - 400%
    # QA: - 200%
    #
    # -item: a WorkItem instance
    def runModel(self, item, modelsConfig):   
        developer = modelsConfig.globalRepo.developersRepo.findDeveloper(item.owner)
        if developer.profile == S_.STAR:
            multiplier = 1.0
        else:
            if developer.profile == S_.SOLID:
                multiplier = 2.0
            else:
                if developer.profile == S_.MEDIOCRE:
                    multiplier = 4.0
                else:
                    multiplier = 3.0 # for new people and for everyone else
            
        return multiplier

<h1>Allocation Models</h1>

In [12]:
class _AvailabilityCriterion:
# Abstract parent class
    def _getPeopleWithBandwidth(self, timeRequired, modelsConfig):
        return

In [13]:
class AllocationModel:
# Abstract class 

    def __init__(self):
        self._resetLog()

    def _resetLog(self):
        self.log = {} # Log information of how allocation decisions are made. 
        self.log['Cycles'] = {}
        
    # Implemented by concrete classes.
    # Mutates work by allocating WorkItem's to developers, depleting the workToPik
    def _allocate_helper(self, work, workToPick, currentOrNext, modelsConfig): 
        return
    
    # Does any post processing after the main allocation logic, if any
    def _post_process(self, work, modelsConfig, currentOrNext): 
        return
 
    # Implemented by concrete classes.
    # Picks the next "preferred" WorkItem to work on
    def _pickItem(self, runningDist, remainingDist, targetDist, workToPick, modelsConfig):
        return

    def allocate(self, work, modelsConfig):
        self._resetLog() # Log reflects a single call to allocate, so must be cleared from one call to another

        workToPick = self._getWorkToPick(work)

        self._allocate_helper(work, workToPick, True, modelsConfig) # mutates 'work' and 'workToPick'
        self._post_process(work, modelsConfig, True)

        # Now try again, but this time allocating any unused time to deliverables for the next sprint, i.e., use
        # time left over from the current sprint to get a heat start on the work for the next sprint, borrowing next sprint's
        # capacity since we only need to deliver then

        #Update workToPick since we changed it in prior call to helper
        workToPick = self._getWorkToPick(work)

        self._allocate_helper(work, workToPick, False, modelsConfig) # mutates 'work' and 'workToPick'    
        self._post_process(work, modelsConfig, False)
        
        return work    
    
    def _getWorkToPick(self, work):
        unplanned = work.allocations[S_.UNPLANNED][S_.OWNER_TBD]
        workToPick = []
        workToPick.extend(unplanned[S_.PRODUCTION_BUGS])
        workToPick.extend(unplanned[S_.DEV_TIME_BUGS])
        workToPick.extend(unplanned[S_.UNFINISHED_STORIES])  
        return workToPick
        
    # Returns an unnormalized distribution of the estimated effort required for the WorkItems in items.
    # The distribution is represented as a dictionary, where the keys are the "x axis" (the possible values
    # for estimates) and the value for each key are the "y axis" of the distribution (the count of how many
    # WorkItems have such an estimate)
    # 
    # NOTE: WorkItems that were partially progressed in prior sprints may have a residual estimate that is
    # not a nice integer number, but a number with lots of decimal places. To avoid issues with having a distribution
    # that is too finely cut we use bins of length 0.1 and classify each estimate in a bin. That way we limit
    # the number of possibly values of 'x'
    def _getUnnormalizedDistribution(self, items, context):
        distribution = {}
        
        for item in items:
            x = Distribution.getBin(item.estimate, context.sprintDuration)           
            
            if x in distribution:
                distribution[x] = distribution[x] + 1
            else:
                distribution[x] = 1
                
        return distribution
    
    def _inferDistribution(self, work, context):
        workAlreadyPlanned = []
        for bucket in [S_.CURRENT_SPRINT, S_.NEXT_SPRINT]:
            subwork = work.allocations[bucket]
            for person in subwork.keys():
                workAlreadyPlanned.extend(subwork[person][S_.PRODUCTION_BUGS])
                workAlreadyPlanned.extend(subwork[person][S_.DEV_TIME_BUGS])
                workAlreadyPlanned.extend(subwork[person][S_.UNFINISHED_STORIES])
        
        return self._getUnnormalizedDistribution(workAlreadyPlanned, context)

    def _postLog(self, cycle, bucket, remainingDist, originalRunningDist, finalRunningDist):
        
        if cycle not in self.log['Cycles'].keys():
            self.log['Cycles'][cycle] = {}
        
        self.log['Cycles'][cycle][bucket] = {'Remaining Dist': remainingDist, \
                      'Original Running Dist': originalRunningDist,\
                     'Final Running Dist': finalRunningDist}
        


    def _buildLog_df_Helper(self, targetDist, cycle, bucket, distName):

        if bucket not in self.log['Cycles'][cycle].keys():
            return 0,0,[]
    
        snapshotDist = self.log['Cycles'][cycle][bucket][distName]
    
        data = Distribution.dist_to_sample(snapshotDist)
        distance = Distribution.measureDistributionDistance(targetDist, snapshotDist)    
        if len(data)==0:
            mean = 0
        else:
            mean = statistics.mean(data)
        
        return mean, distance, data

    def buildLog_df(self, title, context):
       
        DEL = ' - '
        logical_cols = ['Initial Size', 'Initial Mean', 'Initial Distance', 'Initial Data', \
               'Remaining Size', 'Remaining Mean', 'Remaining Distance', 'Remaining Data', \
               'Final Size', 'Final Mean', 'Final Distance', 'Final Data', \
                        'Bins']
        log_dict = {}
        actual_columns = ['Title', 'Cycle']
        log_dict['Title'] = []
        log_dict['Cycle'] = []
        for bucket in [S_.CURRENT_SPRINT, S_.NEXT_SPRINT]:
            for c in logical_cols:
                real_col = c + DEL + bucket
                actual_columns.append(real_col)
                log_dict[real_col] = []
    
        targetDist = self.log['Target dist']
    
        for cycle in self.log['Cycles'].keys():
            log_dict['Title'].                 append(title)
            log_dict['Cycle'].                 append(cycle)
            for bucket in [S_.CURRENT_SPRINT, S_.NEXT_SPRINT]:
                  
                mean, distance, data = self._buildLog_df_Helper(targetDist, cycle, bucket, 'Original Running Dist')

                log_dict['Initial Size' + DEL + bucket].          append(len(data))
                log_dict['Initial Mean' + DEL + bucket].          append(mean)
                log_dict['Initial Distance' + DEL + bucket].      append(distance)
                log_dict['Initial Data' + DEL + bucket].          append(data)
        
                mean, distance, data = self._buildLog_df_Helper(targetDist, cycle, bucket, 'Remaining Dist')

                log_dict['Remaining Size' + DEL + bucket].        append(len(data))
                log_dict['Remaining Mean' + DEL + bucket].        append(mean)
                log_dict['Remaining Distance' + DEL + bucket].    append(distance)
                log_dict['Remaining Data' + DEL + bucket].        append(data)
        
                mean, distance, data = self._buildLog_df_Helper(targetDist, cycle, bucket, 'Final Running Dist')

                log_dict['Final Size' + DEL + bucket].            append(len(data))
                log_dict['Final Mean' + DEL + bucket].            append(mean)
                log_dict['Final Distance' + DEL + bucket].        append(distance)
                log_dict['Final Data' + DEL + bucket].            append(data)
                
                #log_dict['Color' + DEL + bucket].                 append(COLOR)
                log_dict['Bins' + DEL + bucket].                  append(context.sprintDuration)
                
        log_df = pd.DataFrame(log_dict, columns=actual_columns)   
        return log_df        

In [14]:
class _GreedyAvailabilityCriterion (_AvailabilityCriterion):  
    
    def __init__(self, work, currentOrNext):
        self.work = work
        self.currentOrNext = currentOrNext
    
        return
    
    def _getPeopleWithBandwidth(self, timeRequired, modelsConfig):
        ctx = modelsConfig.context
        if self.currentOrNext:
            available = self.work.committedTime(ctx.sprintDuration)[['Developer', 'Bandwidth']]
            return list(available[available['Bandwidth'] >= timeRequired]['Developer'])
            
        else:
            available = self.work.committedTime(ctx.sprintDuration)[['Developer', 'NEXT SPRINT Bandwidth']]
            # Filter to only developers who have 'carry over' bandwidth from this sprint into the next one
            haveCarryOver = available[available['NEXT SPRINT Bandwidth'] > ctx.sprintDuration] 
            return list(haveCarryOver[haveCarryOver['NEXT SPRINT Bandwidth'] >= timeRequired]['Developer'])  

In [15]:
class GreedyAllocationModel (AllocationModel):
# For a sprint, allocates work to developers by maximizing the planned tasks for each developer.
# So as long as the developer has some bandwidth left in the sprint, the algorithm will search for a user story
# that can be done in the remaining time.
# As a result, this algorithm has a tendency to plan short-duration user stories early in the release cycle, so that
# later sprints in the release cycle need to deal with comparatively coarser-sized user stories. This causes the
# release cycle to be unbalanced: later sprints have big-ticket items for the most part.

    def __init__(self):
        super(GreedyAllocationModel, self).__init__()
        return

    def _selectAvailabilityCriterion(self, work, currentOrNext):
        return _GreedyAvailabilityCriterion(work, currentOrNext)
        
    # Mutates 'work' and 'workToPick' by allocating WorkItem's to developers and in the process depleting
    # partially or fully the 'workToPick'
    #
    # -workToPick: an array of WorkItem objects, corresponding to unplanned tasks that are candidate tasks to allocate
    # to developers
    # -work: a WorkAssignment instance reflecting a ScrumTeam's allocations and remaining bandwidth prior to the
    # allocation this model will conduct. This model's allocation process will then mutate 'work' by reflecting in it
    # tasks that are no longer unplanned but rather are now allocated to a particular developer.
    # -currentOrNext: boolean to indicate if the allocation is being done for the current sprint or the next sprint. True 
    # for current, False for next.
    # -modelsConfig: parameters for running the model (e.g., random generators)
    def _allocate_helper(self, work, workToPick, currentOrNext, modelsConfig): 
        availabilityCriterion   = self._selectAvailabilityCriterion(work, currentOrNext) 
        
        ctx = modelsConfig.context
        
        targetDist = self._getUnnormalizedDistribution(workToPick, ctx) 

        self.log['Target dist'] = targetDist
        cycle = -1
        while len(workToPick) > 0:

            cycle += 1
            
            runningDist = self._inferDistribution(work, ctx) # Re-compute in each loop since work changes in each loop
            remainingDist = self._getUnnormalizedDistribution(workToPick, ctx) # Re-compute as workToPick changes
            
            item = self._pickItem(runningDist, remainingDist, targetDist, workToPick, modelsConfig)

            # In next cycle of loop don't want to encounter this item, as it would be processed by then           
            workToPick.remove(item) 
            
            
            timeRequired = item.estimate * (1-item.percentAchieved)
        
            peopleWithTimeToDoIt = availabilityCriterion._getPeopleWithBandwidth(timeRequired, modelsConfig)
            
            potentialOwner = modelsConfig.random.pickOne(peopleWithTimeToDoIt)
            
            if potentialOwner == None:
                # This WorkItem can't be done in this sprint, as nobody has enough time for the effort it requires.
                # Try with some other work item
                 continue
            
            if currentOrNext:
                work.reAssign(item, potentialOwner, S_.CURRENT_SPRINT)
                newRunningDist = self._inferDistribution(work, ctx)
                self._postLog(cycle, S_.CURRENT_SPRINT, remainingDist, runningDist, newRunningDist)
            else:
                work.reAssign(item, potentialOwner, S_.NEXT_SPRINT) 
                newRunningDist = self._inferDistribution(work, ctx)
                self._postLog(cycle, S_.NEXT_SPRINT, remainingDist, runningDist, newRunningDist)

    # Picks the next "preferred" WorkItem to work on
    def _pickItem(self, runningDist, remainingDist, targetDist, workToPick, modelsConfig):
        
        item = modelsConfig.random.pickOne(workToPick)
        return item

In [22]:
# Filters available developers to only those who don't have unfinished work from prior sprints
class _NoLaggardsAvailabilityCriterion (_AvailabilityCriterion):  
    
    def __init__(self, work, currentOrNext):
        self.work = work
        self.currentOrNext = currentOrNext
    
        return
    
    # -timeRequired: a double indicating how much time we need for a task being resourced
    # -ctx: a ReleaseCycleContext instance for the sprint for which we seek people available
    def _getPeopleWithBandwidth(self, timeRequired, modelsConfig):
        
        ctx = modelsConfig.context
        
        # WARNING: Avoid the use of sets, even though it is tempting since set difference is more succinct
        # than list difference (i.e., to remove laggards it is easier with a set).
        # Reason: using sets introduces a bug whereby the return values from this method are not deterministic, so
        # regression tests fail.
        
        priorCommittments = self.work.committedTasks()
        
        laggards, stuckWork = NoLaggardsAllocationModel._identify_laggards(self.work, modelsConfig, tolerance=2)
                
        #laggards = list(priorCommittments[priorCommittments['Most Recently Assigned in Sprint'] +2 <= ctx.sprint]['Owner'])
        
        if self.currentOrNext:
            available = self.work.committedTime(ctx.sprintDuration)[['Developer', 'Bandwidth']]
            potential = list(available[available['Bandwidth'] >= timeRequired]['Developer'])
            return NoLaggardsAllocationModel._list_different(potential, laggards)
            
        else: 
            available = self.work.committedTime(ctx.sprintDuration)[['Developer', 'NEXT SPRINT Bandwidth']]
            # Filter to only developers who have 'carry over' bandwidth from this sprint into the next one
            haveCarryOver = available[available['NEXT SPRINT Bandwidth'] > ctx.sprintDuration] 
            potential = list(haveCarryOver[haveCarryOver['NEXT SPRINT Bandwidth'] >= timeRequired]['Developer'])
            
            # Unlike when self.currentOrNext=True, in this case we don't reduce the list of candidates by 
            # removing the laggards. It is OK for laggards
            # to built a backlog for the next sprint - that way they stay fully utilized if we did not give them
            # enough work in the current release due to they being a laggard
            return potential          

In [53]:
class NoLaggardsAllocationModel (GreedyAllocationModel):
    
    def __init__(self):
        super(NoLaggardsAllocationModel, self).__init__()
        return
    
    def _selectAvailabilityCriterion(self, work, currentOrNext):
        return _NoLaggardsAvailabilityCriterion(work, currentOrNext)

    def _post_process(self, work, modelsConfig, currentOrNext): 
        if currentOrNext:
            self._balanceWork(work, modelsConfig)
            self._stealWork(work, modelsConfig, tolerance=2) # Only steal work from the current sprint's tasks    

    # Returns a subset of 'list1' corresponding to elements of 'list1' that are not in 'list2'
    # 
    # Used as a means to avoid using sets to make differences of collections, since sets have non-deterministic
    # orderings and that leads to regression tests failures.
    def _list_different(list1, list2):
        result = []
        for elt in list1:
            if elt not in list2:
                result.append(elt)
        return result    
    
    # Returns two lists:
    #    - A list of 'laggards', i.e., developers who are behind in their work because they have some user stories that 
    #      are less than 50% done and which were allocated to them more than 'tolerance'-many sprints ago.
    def _identify_laggards(work, modelsConfig, tolerance):        
        teamsRepo = modelsConfig.globalRepo.teamsRepo
        stuckWork = []
        laggards = [] # Developers from whom work might be stolen
        for person in work.allocations[S_.CURRENT_SPRINT].keys():
            items = work.allocations[S_.CURRENT_SPRINT][person][S_.UNFINISHED_STORIES]
            for item in items:
                uss = modelsConfig.globalRepo.teamsRepo.getUserStoryStatus(item.userStoryId)
                sprint_assigned = uss.retrieveMostRecentAssignment()
#                if item.sprintPlanned + tolerance >= modelsConfig.context.sprint:
                if sprint_assigned + tolerance >= modelsConfig.context.sprint:
                    continue # item is not that far behind yet to be stolen, as it is within tolerance
                uss = teamsRepo.getUserStoryStatus(item.userStoryId)
                if uss.percentAchieved > 0.5:
                    continue # Current developer has done at least 50% of this old story. Allow the dude to finish it up

                #If we get this far, we have an old story that is not getting enough action. Think of giving it to someone
                #else
                stuckWork.append(item)
                laggards.append(person)
        return laggards, stuckWork

    # In the event that some developer has no user story to implement at all, this function will search for
    # other developers with multipe user stories to implement, and if so will take the one that is most incomplete
    # and try to re-assign it to the unutilized developers
    def _balanceWork(self, work, modelsConfig):
        resourcing          = work.committedTime(modelsConfig.context.sprintDuration)
        unemployed          = list(resourcing[resourcing['Implementation (#)']==0]['Developer'])
        if (S_.OWNER_TBD in unemployed):
            unemployed.remove(S_.OWNER_TBD)
        
        bandwidths          = resourcing['Bandwidth']
        utilisation_cutoff  = bandwidths.mean() + bandwidths.std()
        under_utilized      = list(resourcing[resourcing['Bandwidth'] > utilisation_cutoff]['Developer'])
        if (S_.OWNER_TBD in under_utilized):
            under_utilized.remove(S_.OWNER_TBD)
            
        # Merge the unemployed and the underemployed, without duplicates
        candidates = unemployed
        for person in under_utilized:
            if person not in candidates:
                candidates.append(person)
        
        steal_from          = list(resourcing[resourcing['Implementation (#)'] > 1]['Developer'])
        if (S_.OWNER_TBD in steal_from):
            steal_from.remove(S_.OWNER_TBD)

        items_to_steal      = []
        for person in steal_from:
            wis = work.allocations[S_.CURRENT_SPRINT][person][S_.UNFINISHED_STORIES]
            most_incomplete_item = wis[0]
            most_incomplete_uss  = modelsConfig.globalRepo.teamsRepo.getUserStoryStatus(most_incomplete_item.userStoryId)
            for item in wis:
                uss = modelsConfig.globalRepo.teamsRepo.getUserStoryStatus(item.userStoryId)
                if (uss.percentAchieved < most_incomplete_uss.percentAchieved):
                    most_incomplete_item = item
                    most_incomplete_uss  = uss
            items_to_steal.append(most_incomplete_item)
        
        for person in candidates: # Give 1 story to work on to each person who is unemployed or under utilised
            if len(items_to_steal) > 0:
                item = items_to_steal[0]
                items_to_steal.remove(item)
                work.reAssign(item, person, S_.CURRENT_SPRINT)


    # Re-allocates work from one developer to another to developers who have no other work to do 
    #
    # -work: a WorkAssignment instance. Normally this method is called when 'work' is filled using the normal
    # greedy algorithm, i.e., any backlog work has been assigned to someone if bandwidth exists. If indeed bandwidth
    # has been totally used up then this method will do nothing. But if there is extra bandwidth, this method
    # will re-allocate work from laggard developers to those with the extra bandwidth.
    # -modelsConfig: overall configuration for release simulation
    # -tolerance: maximum number of sprints that a WorkItem can be behind before being a candidate for stealing
    def _stealWork(self, work, modelsConfig, tolerance):
                
        laggards, stuckWork = NoLaggardsAllocationModel._identify_laggards(work, modelsConfig, tolerance)
        
        # Now get all the future work for the laggards. If they struggle with the current work, most likely they
        # won't get to the future work either, so that future work is ripe for stealing
        laggardsBacklog = []
        for person in laggards:
            items = work.allocations[S_.NEXT_SPRINT][person][S_.UNFINISHED_STORIES]
            laggardsBacklog.extend(items)
        
        # Now steal the work, first from the laggards' backlog, and then if there is still bandwidth,
        # from the stuck work. First steal for completion in the current sprint, and later for the next sprint
        #self._stealFromList(laggardsBacklog, laggards, work, modelsConfig, False)
        laggardsBacklog_remaining = self._stealFromList(laggardsBacklog,           
                                                        laggards, work, modelsConfig,
                                                        S_.NEXT_SPRINT,    
                                                        S_.CURRENT_SPRINT)
        stuckWork_remaining       = self._stealFromList(stuckWork,                 
                                                        laggards, work, modelsConfig,
                                                        S_.CURRENT_SPRINT, 
                                                        S_.CURRENT_SPRINT)
        self                            ._stealFromList(laggardsBacklog_remaining, 
                                                        laggards, 
                                                        work, modelsConfig,
                                                        S_.NEXT_SPRINT,    
                                                        S_.NEXT_SPRINT)
        self                            ._stealFromList(stuckWork_remaining,       
                                                        laggards, work, modelsConfig,
                                                        S_.CURRENT_SPRINT, 
                                                        S_.NEXT_SPRINT)
        
            
    def _stealFromList(self, workToSteal, laggards, work, modelsConfig, fromBucket, toBucket):
        teamsRepo = modelsConfig.globalRepo.teamsRepo
        remaining = workToSteal
        if (toBucket == S_.CURRENT_SPRINT):
            currentOrNext = True
        else:
            currentOrNext = False
        availabilityCriterion = self._selectAvailabilityCriterion(work, currentOrNext)
        for item in workToSteal:
            timeRequired = item.estimate * (1-item.percentAchieved)
        
            peopleWithTimeToDoIt = availabilityCriterion._getPeopleWithBandwidth(timeRequired, modelsConfig)
            candidates = NoLaggardsAllocationModel._list_different(peopleWithTimeToDoIt, laggards)
            potentialOwner = modelsConfig.random.pickOne(candidates)
            
            if potentialOwner == None:
                # This WorkItem can't be done in this sprint, as nobody has enough time for the effort it requires.
                # Try with some other work item
                 continue
            uss = teamsRepo.getUserStoryStatus(item.userStoryId) 
            work.reAssign(item, potentialOwner, toBucket, fromBucket)
            remaining.remove(item)
        return remaining


In [18]:
class BalancedAllocationModel (AllocationModel):
# For a sprint, allocates work to developers by trying to keep a roughly average user story size across all sprints
# in the release.

    def __init__(self):
        super(BalancedAllocationModel, self).__init__()
        return

    # Mutates 'work' and 'workToPick' by allocating WorkItem's to developers and in the process depleting
    # partially or fully the 'workToPick'
    #
    # -workToPick: an array of WorkItem objects, corresponding to unplanned tasks that are candidate tasks to allocate
    # to developers
    # -work: a WorkAssignment instance reflecting a ScrumTeam's allocations and remaining bandwidth prior to the
    # allocation this model will conduct. This model's allocation process will then mutate 'work' by reflecting in it
    # tasks that are no longer unplanned but rather are now allocated to a particular developer.
    # -currentOrNext: boolean to indicate if the allocation is being done for the current sprint or the next sprint. True 
    # for current, False for next.
    # -modelsConfig: parameters for running the model (e.g., random generators)
    def _allocate_helper(self, work, workToPick, currentOrNext, modelsConfig): 
        
        if (len(workToPick) == 0):
            return # Nothing to do, and if we don't return may get an assertion below as we assume there is work to pick
        
        ctx = modelsConfig.context
        
        availabilityCriterion   = _GreedyAvailabilityCriterion(work, currentOrNext)  
        
        # For debugging - we want to make sure that after the while loop we did allocate at least something, so
        # keep track of when we allocate stuff
        something_was_allocated = False
        assertion_info = {}
        assertion_info['modelsConfig context'] = modelsConfig.context
        assertion_info['len(workToPick) at start'] = len(workToPick)
        
        targetDist = self._getUnnormalizedDistribution(workToPick, ctx) 
        while len(workToPick) > 0:
            runningDist = self._inferDistribution(work, ctx) # Re-compute in each loop since work changes in each loop
            remainingDist = self._getUnnormalizedDistribution(workToPick, ctx)    
                        
            chosenItem = self._pickItem(runningDist, remainingDist, targetDist, workToPick, modelsConfig)
            
            # In next cycle of loop don't want to encounter this item, as it would be processed by then
            workToPick.remove(chosenItem)
            itemAttractiveness = Distribution.calcAttractiveness(chosenItem.estimate, runningDist, targetDist)
            if (itemAttractiveness <= 0):
                continue
            
            timeRequired = chosenItem.estimate * (1-chosenItem.percentAchieved)
        
            peopleWithTimeToDoIt = availabilityCriterion._getPeopleWithBandwidth(timeRequired, modelsConfig)
            
            potentialOwner = modelsConfig.random.pickOne(peopleWithTimeToDoIt)
            
            if potentialOwner == None:
                # This WorkItem can't be done in this sprint, as nobody has enough time for the effort it requires.
                # Try with some other work item
                 continue
            
            if currentOrNext:
                work.reAssign(chosenItem, potentialOwner, S_.CURRENT_SPRINT)
                something_was_allocated = True
            else:
                work.reAssign(chosenItem, potentialOwner, S_.NEXT_SPRINT) 
                something_was_allocated = True

        # For debugging, confirm that we at least allocated something for the current release
        assertion_info['len(workToPick) at end'] = len(workToPick)
        if currentOrNext:
            assert something_was_allocated, assertion_info
        
    # Picks the next "preferred" WorkItem to work on
    def _pickItem(self, runningDist, remainingDist, targetDist, workToPick, modelsConfig):
 
        chosenEstimate = Distribution.pickUsingProximityToTarget(runningDist, remainingDist, targetDist, modelsConfig)
            
        chosenItem = None
        #find an item for such a chosenEstimate
        for item in workToPick:
            if item.estimate == chosenEstimate:
                chosenItem = item
        # Should never get here - should found an item 
        assert(chosenItem != None)
        
        return chosenItem

<h1>Quality Models</h1>

In [19]:
class QualityModel:
# Abstract class 

    def __init__(self):
        return
    
    # Implemented by concrete classes. Returns a list of Ticket instances and persists them to the TicketsRepo in
    # modelsConfig.context.
    # -modelsConfig: a ModelsConfig instance
    def findBugs(self, modelsConfig): 
        return

In [20]:
class DistributedLagQualityModel(QualityModel):

    def __init__(self):
        super(DistributedLagQualityModel, self).__init__()
        return
    
    # Returns a list of Ticket instances.
    # -modelsConfig: a ModelsConfig instance
    def findBugs(self, modelsConfig): 
        uss_list = DistributedLagQualityModel._findFinishedStories(modelsConfig.context, modelsConfig.globalRepo)
        
        bugs = []
        current_sprint = modelsConfig.context.sprint
        for uss in uss_list:
            delivery_sprint = uss.sprintDelivered
            lag = current_sprint - delivery_sprint
            story = modelsConfig.globalRepo.storiesRepo.findStory(uss.userStoryId)
            story_bugs = DistributedLagQualityModel._findDefectsInStory(lag, story, uss, modelsConfig)
            bugs.extend(story_bugs)
        return bugs

    # Returns a list of UserStoryStatus instances, corresponding to all user stories that have been finished.
    def _findFinishedStories(context, globalRepo):
        uss_list = []
        ids = globalRepo.storiesRepo.findIds()
        for userStoryId in ids:
            teamId = globalRepo.teamsRepo.getTeamId(userStoryId)
            if (teamId != context.teamId):
                # We only want the finished stories owned by the context.teamId to avoid a bug whereby we generate 
                # bugs for the stories of another team, whilst still in the same sprint as the user story
                # that the bug is for, which is an assertion violation (since bugs should be found after
                # a story is delivered, not in the same sprint, as might happen if a different team Y who is closing
                # a sprint after another team X finds X's completed story and generates a bug for X's story
                # in the same sprint)
                continue 
            uss = globalRepo.teamsRepo.getUserStoryStatus(userStoryId)
            if (uss.percentAchieved == 1.0):
                uss_list.append(uss)
        return uss_list
    
    # Returns a list of Ticket instances
    def _findDefectsInStory(lag, userStory, uss, modelsConfig):
        if lag < 1 or lag > 3: # No bugs for stories just finished or which were finished a while ago
            return []
        # Exposure is the probability of finding a bug. For now hardcode a 50% exposure distributed over 
        # 3 sprints with the surge in the middle one
        if lag == 1:
            exposure = 0.125
        if lag == 2:
            exposure = 0.25
        if lag == 3:
            exposure = 0.125
        possible_defect_count  = [0, 1] # Possible values for number of bugs found
        likelihoods            = [1-exposure, exposure]
        defect_count           = modelsConfig.random.pickOneWithWeights(possible_defect_count, likelihoods)
        
        # Issue defect Tickets
        defects = []
        repo = modelsConfig.globalRepo.ticketsRepo
        for i in range(defect_count):
            costToFix = DistributedLagQualityModel._estimateCostToFix(userStory, modelsConfig)
            ticketId = repo._nextTicketId()
            ticket = Ticket(ticketId, userStory.userStoryId, costToFix, modelsConfig.context.sprint)
            repo.addTicket(ticket)
            defects.append(ticket)
        return defects
    
        
    def _estimateCostToFix(userStory, modelsConfig):
    # For now hard-code a simplistic cost to fix: each bug costs 20% of the original estimate of the story.
    # Better would be a percentage of the actual cost to develop.
         return 0.20 * userStory.originalEstimate