In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, date, timedelta

ModuleNotFoundError: No module named 'pandas'

In [2]:
import devanalyst.simulation.statics as S_
from devanalyst.simulation.businessObjects import UserStory, UserStoriesRepo, Ticket, TicketsRepo, WorkItem, \
UserStoryStatus, Backlog, ScrumTeam, ScrumTeamsRepo, WorkAssignments, ReleaseCycleContext
import devanalyst.simulation.simulationModels as simm

importing Jupyter notebook from c:\alejandro\code\chateauclaudia-labs\devanalyst\devanalyst\simulation\statics.ipynb
importing Jupyter notebook from c:\alejandro\code\chateauclaudia-labs\devanalyst\devanalyst\simulation\businessObjects.ipynb
importing Jupyter notebook from c:\alejandro\code\chateauclaudia-labs\devanalyst\devanalyst\simulation\simulationModels.ipynb


<h1>User Stories</h1>
<p>Capacity planning for the release: we create a backlog of stories. Initial resource allocations are made, but they might change in a sprint if needed</p>

In [3]:
class IdCounter:
    def __init__(self):
        self.counter = 1
        
    def increment(self):
        self.counter += 1
        
    def next_id(self):
        return self.counter

In [4]:
def generateNextUserStory(nextId, team, bandwidth, sprintDuration, modelsConfig):
# Returns a UserStory, randomly choosing the amount of effort the UserStory might take (while fitting withing a sprint),
# and assigning to a random developer in the team with enough time to do it, to a spec written by a randomly chosen 
# product manager from the team.
#
# Random choices are made using the random generator in the modelsConfig parameter.
#
# Depletes the time this UserStory would take from the bandwidth for the developer in question.
#
# If no developer in the team has time to do such a UserStory, returns 'None'

    estimate = modelsConfig.random.pickHowLong(sprintDuration)

    available = bandwidth[bandwidth >= estimate] # Subset of developers with enough time to do this user story
    if (len(available) == 0):
        return None
    
    developer = modelsConfig.random.pickOneIdx(available)
    bandwidth[developer] -= estimate # deplete capacity now earmarked for this user story

    productManager = modelsConfig.random.pickOne(team.productManagers)
    
    return UserStory('UserStory #' + str(nextId), estimate, developer, productManager)

In [5]:
def canResourceMoreWork(bandwidth, sprintDuration):
# Helper function used in createUserStoryBacklog.
# Returns a boolen on whether the team has enough capacity left in 'bandwidth' to resource one more user story
#
# -bandwidth: Pandas Series indexed on developers' names, with the values being the amount of unallocated days
# for that developer.
# -sprintDuration: integer representing duration of a sprint, in number of days 

    # If over half the developers in the team have at least half a sprint left, still can resource more work
    numberOfDevelopers = bandwidth.size
    condition1 = bandwidth[bandwidth > sprintDuration/2.0].size > numberOfDevelopers / 2.0
    
    # If any developer in the team has more than 150% of a sprint left, then can still resource more work
    condition2 = bandwidth.max() >= 1.5 * sprintDuration
    
    return condition1 or condition2

In [6]:
def createUserStoryBacklog(team, releaseDuration, sprintDuration, counter, modelsConfig):
#Creates user stories that a scrum team should work on for a release. Algorithm basically determines how much time 
# there is in a planned duration for a release, and based on that randomly creates user stories associated with the
# areas of responsibility of the scrum team in question, until capacity is filled to between 90% and 100%. Each user story 
# is supposed to be completed in a sprint, so a user story's estimated duration is ramdomly generated to be between
# 10% and 100% of the sprint's duration period.
#
# -team: the ScrumTeam for which the backlog is being created
# -releaseDuration: integer number of business days to the intended release date. Defaults to around 6 months
# -sprintDuration: integer number of business days that a sprint lasts. Defaults to 2 weeks. 
# -counter: an IdCounter, to tell us what is the id for the next user story to create. This method increments the counter
# -modelsConfig: a ModelsConfig instance with a random generator used in the creation of user stories.
# with each user story that is created.

    numberOfDevelopers = len(team.developers)
    initialCapacity = numberOfDevelopers *releaseDuration # number of work-days available for the release
    bandwidth = pd.Series(releaseDuration, team.developers) # initialize bandwidth to all the time left in the release

    #numberOfSprints = releaseDuration/sprintDuration
    stories = []
    backlog = Backlog()     
    
    # We allocate as much of the team as practical, with user stories ranging in estimated duration from 1 day
    # to the number of days in a sprint. To avoid artificial situations, don't aim to fill every single day,
    # so will stop when we have allocated at least 10% and there is no developer left with enough bandwidth
    # to resource a medium-size user story.
    # To avoid infinite loops, we force a stop after 1000 tries, though most likely we will exit well before then.
    next_id = counter.next_id()
    while (canResourceMoreWork(bandwidth, sprintDuration) and next_id < 10000): #To avoid infinite loops, cap number of user stories
        story = generateNextUserStory(next_id, team, bandwidth, sprintDuration, modelsConfig) #this call mutates bandwidth
        if (story == None):
            continue #Try again, maybe we tried for a very large estimate but a smaller user story can still be resourced
        stories.append(story)
        
        counter.increment()
        next_id = counter.next_id()
        backlog.pendingUserStories.append(UserStoryStatus(story.userStoryId))       
    
    return stories, backlog

<h1>Do a sprint</h1>

In [7]:
def chooseWhatToDoInSprint(modelsConfig):
# At the start of a sprint, elects what items a team will do in a sprint
#
# -teamId: string identifier of team for which we are choosing work for this sprint
# sprintDuration: length of the sprint for which work needs to be chosen, in man-days
# -modelsConfig: a ModelsConfig instance with parameters for making the choice of work to do in the sprint
# -sprint: integer representing which sprint we are in

    
    allocationModel = modelsConfig.allocationModel
    work = WorkAssignments(modelsConfig.context, modelsConfig.globalRepo)
    
    return allocationModel.allocate(work, modelsConfig) # Mutates work

In [8]:
def inflowOfTickets(modelsConfig):
# Returns an array of newly generated tickets against the work done by the team
    bugs = []
    for qualityModel in modelsConfig.qualityModels:
        bugs.extend(qualityModel.findBugs(modelsConfig))
    return bugs

In [9]:
def bookDeveloperEffort(items, budget, modelsConfig):
# Helper class, used as part of the book keeping involved in delivering a sprint.
#
# Returns what portion of the budget, if any, was left over after booking the work done by the developer
#
# -items: a list of WorkItem objects that record work that should be done in a sprint by a single developer. 
# It will be mutated by this fundtion by recording what were the actual number of man-days spent on each WorkItem, and to what degree it was completed
# -budget: number of man-days that the developer was given to try to complete the WorkItems
# -modelsConfig: a ModelsConfig instance to model how real costs differ from estimates

    for task in items:
        if budget <= 0:
            break # No more tasks progressed during this sprint
        realCost = simm.computeRealCost(task, modelsConfig)
        if realCost == 0:
            continue # Boundary base. Shouldn't happen, but if someone entered an estimate of 0 don't want to divide by 0
        if realCost <= budget:
            task.percentAchieved = 1
            task.actual = realCost
            budget -= realCost
        else:
            # Can only complete part of the item
            task.percentAchieved = budget/realCost
            task.actual = budget
            budget = 0
    return budget

In [10]:
def deliverSprint(work, modelsConfig):
# Based on the team's productivy during the sprint, record how much of the work initially planned for the sprint
# actually gets done
#
# -work: a WorkAssignment as it was at the start of the sprint. It is mutated by this method by recording, for each
# WorkItem in the WorkAssignment, what percentage of it got accomplished
# -sprintDuration: number of man-days that sprint lasted
# -modelsConfig: a ModelsConfig instance to model how real costs differ from estimates

    teamId                 = modelsConfig.context.teamId
    sprintDuration         = modelsConfig.context.sprintDuration
    sprint                 = modelsConfig.context.sprint

    teamsRepo              = modelsConfig.globalRepo.teamsRepo
    storiesRepo            = modelsConfig.globalRepo.storiesRepo
    ticketsRepo            = modelsConfig.globalRepo.ticketsRepo

    team = teamsRepo.findTeam(teamId)
    for person in team.developers:
        budget = sprintDuration # Developer has up to these many days to complete work in the sprint
        
        # First, deliver the work from the current sprint
        budget = bookDeveloperEffort(work.getWorkItems(person, S_.CURRENT_SPRINT), budget, modelsConfig)
                
        # Second, developer still has some time in his/her budget for this sprint, then he/she might have started to work
        # on things for the next sprint, in which case record that progress
        budget = bookDeveloperEffort(work.getWorkItems(person, S_.NEXT_SPRINT), budget, modelsConfig)    

In [11]:
def updateBacklogAfterSprint(work, modelsConfig):
# At the end of a sprint, updates the backlog of a team based on the work completed in this sprint
#
# -work: a WorkAssignment, which reflects what was accomplished during the sprint through the field percentAchieved
# in each of the WorkItems in the various arrays within the work.allocations dictionary

    teamId                 = modelsConfig.context.teamId
    teamsRepo              = modelsConfig.globalRepo.teamsRepo

    team = teamsRepo.findTeam(teamId)
    completedWork = []
    for person in team.developers:
        completedWork.extend(work.getWorkItems(person, S_.CURRENT_SPRINT))
        completedWork.extend(work.getWorkItems(person, S_.NEXT_SPRINT))
    
    team.backlog.updateStatus(completedWork, modelsConfig.context, modelsConfig.globalRepo) 

<h1>Functions to Generate Timecards</h1>

In [12]:
def addBusinessDays(start, duration):
# Returns a datetime.date object which is several business days after the input 'start'
    remainsToAdd = duration
    current = start
    while remainsToAdd > 0:
        current = current + timedelta(1)
        weekday = current.weekday()
        if weekday >= 5: # sunday = 6
            continue
        remainsToAdd -= 1
    return current

def subtractBusinessDays(start, duration):
# Returns a datetime.date object which is several business days before the input 'start'
    remainsToSubtract = duration
    current = start
    while remainsToSubtract > 0:
        current = current - timedelta(1)
        weekday = current.weekday()
        if weekday >= 5: # sunday = 6
            continue
        remainsToSubtract -= 1
    return current

In [9]:
class ReleaseLog:
    
    BREAKOUT = 'Breakout'
    COMPLETED = 'Completed'
    REGRESSED = 'Regressed' # Used for user stories considered "completed" but which have open Tickets against them
    NEW_WORK = 'New Work'
    NOT_STARTED = 'Not Started'
    OVER_BUDGET = 'Over Budget'
    PRIOR_PROGRESSED = 'Prior Progressed'
    PRIOR_TO_FINISH = 'Prior to Finish'
    PROGRESSED = 'Progressed'
    TO_FINISH = 'To Finish'
    UNPLANNED = 'Unplanned'
    
    SNAPSHOTS = ['planned_Start_CURRENT_SPRINT', 'planned_End_CURRENT_SPRINT', 
                      'planned_Start_NEXT_SPRINT', 'planned_End_NEXT_SPRINT', 
                      'backlog', 'Resourcing', 'Outcome']  
    
    def __init__(self):
        self.log = {}
        
    
    def _getPlans(self, work):
        unassigned = work.allocations[S_.UNPLANNED][S_.OWNER_TBD]
        unplanned = []
        unplanned.extend(unassigned[S_.PRODUCTION_BUGS])
        unplanned.extend(unassigned[S_.DEV_TIME_BUGS])
        unplanned.extend(unassigned[S_.UNFINISHED_STORIES])
    
        planned_CURRENT_SPRINT = []
        subwork = work.allocations[S_.CURRENT_SPRINT]
        for person in subwork.keys():
            planned_CURRENT_SPRINT.extend(subwork[person][S_.PRODUCTION_BUGS])
            planned_CURRENT_SPRINT.extend(subwork[person][S_.DEV_TIME_BUGS])
            planned_CURRENT_SPRINT.extend(subwork[person][S_.UNFINISHED_STORIES])

        planned_NEXT_SPRINT = []
        subwork = work.allocations[S_.NEXT_SPRINT]
        for person in subwork.keys():
            planned_NEXT_SPRINT.extend(subwork[person][S_.PRODUCTION_BUGS])
            planned_NEXT_SPRINT.extend(subwork[person][S_.DEV_TIME_BUGS])
            planned_NEXT_SPRINT.extend(subwork[person][S_.UNFINISHED_STORIES])
            
        return planned_CURRENT_SPRINT, planned_NEXT_SPRINT, unplanned
        
    def snapshotSprintBacklog(self, modelsConfig):

        teamId                 = modelsConfig.context.teamId
        sprint                 = modelsConfig.context.sprint

        if (teamId not in self.log.keys()):
            self.log[teamId] = {}
        if (sprint not in self.log[teamId].keys()):
            self.log[teamId][sprint] = {}
    
        self.log[teamId][sprint]['backlog']             = ReleaseLog._build_backlog_df(modelsConfig.context,
                                                                                       modelsConfig.globalRepo)
    def snapshotSprintPlan(self, work, modelsConfig):

        teamId                 = modelsConfig.context.teamId
        sprintDuration         = modelsConfig.context.sprintDuration
        sprint                 = modelsConfig.context.sprint

        teamsRepo              = modelsConfig.globalRepo.teamsRepo
        storiesRepo            = modelsConfig.globalRepo.storiesRepo
        ticketsRepo            = modelsConfig.globalRepo.ticketsRepo

        if (teamId not in self.log.keys()):
            self.log[teamId] = {}
        if (sprint not in self.log[teamId].keys()):
            self.log[teamId][sprint] = {}
    
        planned_CURRENT_SPRINT, planned_NEXT_SPRINT, unplanned = self._getPlans(work)
        
        phase = 'Start'
        self.log[teamId][sprint]['planned_' + phase + '_CURRENT_SPRINT'] = ReleaseLog._build_plan_df(planned_CURRENT_SPRINT,
                                                                                                     modelsConfig.context,
                                                                                                    modelsConfig.globalRepo)
        self.log[teamId][sprint]['planned_' + phase + '_NEXT_SPRINT']    = ReleaseLog._build_plan_df(planned_NEXT_SPRINT,
                                                                                                      modelsConfig.context,
                                                                                                    modelsConfig.globalRepo)
        self.log[teamId][sprint]['Resourcing'] = work.committedTime(modelsConfig.context.sprintDuration)

        
    def snapshotSprintOutcome(self, work, modelsConfig):

        teamId                 = modelsConfig.context.teamId        
        sprintDuration         = modelsConfig.context.sprintDuration
        sprint                 = modelsConfig.context.sprint

        teamsRepo              = modelsConfig.globalRepo.teamsRepo
        storiesRepo            = modelsConfig.globalRepo.storiesRepo
        ticketsRepo            = modelsConfig.globalRepo.ticketsRepo

        if (teamId not in self.log.keys()):
            self.log[teamId] = {}
        if (sprint not in self.log[teamId].keys()):
            self.log[teamId][sprint] = {}
    
        planned_CURRENT_SPRINT, planned_NEXT_SPRINT, unplanned = self._getPlans(work)
        
        phase = 'End'
        self.log[teamId][sprint]['planned_' + phase + '_CURRENT_SPRINT'] = ReleaseLog._build_outcome_df(planned_CURRENT_SPRINT,
                                                                                                        modelsConfig.context,
                                                                                                        modelsConfig.globalRepo)
        self.log[teamId][sprint]['planned_' + phase + '_NEXT_SPRINT']    = ReleaseLog._build_outcome_df(planned_NEXT_SPRINT,
                                                                                                     modelsConfig.context,
                                                                                                        modelsConfig.globalRepo)
        self.log[teamId][sprint]['Outcome'] = work.committedTasks()
        
    def _getCategory(estimate):
        cat = int(estimate)
        return cat
        
    def _build_plan_df(allocations, context, globalRepo):

        sprintDuration         = context.sprintDuration

        teamsRepo              = globalRepo.teamsRepo
        storiesRepo            = globalRepo.storiesRepo
        ticketsRepo            = globalRepo.ticketsRepo

        cols = [ReleaseLog.BREAKOUT]
        categories = []
        # Used categories go 1, 2, ..., sprintDuration, corresponding to the original estimates for user stories. 
        # Spurious category 0 will be never used, but no harm in having it.
        for x in range(sprintDuration + 1): 
            
            category = x
            categories.append(category)
        cols.extend(categories)

        prior_progressed = [ReleaseLog.PRIOR_PROGRESSED]
        prior_remaining = [ReleaseLog.PRIOR_TO_FINISH]
        new_work = [ReleaseLog.NEW_WORK] 
        regressed = [ReleaseLog.REGRESSED]

        for cat in categories:
            prior_progressed.append(0.0)
            prior_remaining.append(0.0)
            new_work.append(0.0)
            regressed.append(0.0)
        
        bugs_per_story = {} # Dictionary where keys are user story ids and values are the bugs associated with it
        for item in allocations:
            story = storiesRepo.findStory(item.userStoryId)
            uss = teamsRepo.getUserStoryStatus(item.userStoryId)
            cat = ReleaseLog._getCategory(story.originalEstimate)
            idx = categories.index(cat) + 1 # offset by 1 since first element in arrays is the layer
        
            if (uss.percentAchieved == 0.0):
                new_work[idx] += 1
            else:
                if (uss.percentAchieved < 1.0):
                    prior_progressed[idx] += (story.originalEstimate * uss.percentAchieved)/story.originalEstimate
                    prior_remaining[idx] += (story.originalEstimate * (1-uss.percentAchieved))/story.originalEstimate
                else: 
                # Must be a bug on a story already completed. So we need to increment the 'regressed' counter, but this
                # is tricky: even if there
                # are multiple bugs on a user story we want to increment it only once. Therefore we do so only if the 
                # 'bugs_per_story' records no prior bugs for this story in this loop, i.e. for the first bug
                    ticket = ticketsRepo.findTicket(item.ticketId)
                    assert(ticket != None)
                    if item.userStoryId not in bugs_per_story.keys(): # First bug we see on this user story
                        regressed[idx] += 1
                        bugs_per_story[item.userStoryId] = [ticket]
                    else:
                        bugs_per_story[item.userStoryId].append(ticket)

                    
                    
                    
         
        data = []
        data.append(prior_progressed)
        data.append(prior_remaining)
        data.append(new_work)
        data.append(regressed)
        data_df = pd.DataFrame(data, columns=cols)
        return data_df
    
    def _build_outcome_df(allocations, context, globalRepo):

        sprintDuration         = context.sprintDuration

        teamsRepo              = globalRepo.teamsRepo
        storiesRepo            = globalRepo.storiesRepo
        ticketsRepo            = globalRepo.ticketsRepo
        
        cols = [ReleaseLog.BREAKOUT]
        categories = []
        for x in range(sprintDuration + 1): # inlude 0 as a category for tasks with less than 1 person-day to go
            category = x
            categories.append(category)
        cols.extend(categories)

        completed = [ReleaseLog.COMPLETED]
        progressed = [ReleaseLog.PROGRESSED]
        to_finish = [ReleaseLog.TO_FINISH] 
        not_started = [ReleaseLog.NOT_STARTED] 
        over_budget = [ReleaseLog.OVER_BUDGET] 
        regressed = [ReleaseLog.REGRESSED]

        for cat in categories:
            completed.append(0.0)
            progressed.append(0.0)
            to_finish.append(0.0)
            not_started.append(0.0)
            over_budget.append(0.0)
            regressed.append(0.0)
        
        # Dictionary where keys are user story ids and values are two dictionaries: one for open bugs and one for
        # closed bugs associated with it
        bugs_per_story = {} 
        OPEN_BUGS    = 'OPEN_BUGS'
        CLOSED_BUGS  = 'CLOSED_BUGS'
        
        for item in allocations:
            story = storiesRepo.findStory(item.userStoryId)
            uss = teamsRepo.getUserStoryStatus(item.userStoryId)
            cat = ReleaseLog._getCategory(story.originalEstimate)
            idx = categories.index(cat) + 1 # offset by 1 since first element in arrays is the layer
        
            if (uss.percentAchieved == 1.0):
                # Two possibilities: either we 1) completed the story in this sprint, or 2) completed it in a prior
                # sprint and this WorkItem is for a bug on that story. In case 1) we increment the 'completed' counter,
                # and in case 2) we need to increment the 'regressed' counter, but the latter is tricky: even if there
                # are multiple bugs on a user story we want to increment it only once. Therefore we do so only if the 
                # 'bugs_per_story' records no prior bugs for this story in this loop, i.e., for the first bug
                if (item.taskType == S_.UNFINISHED_STORIES):
                    completed[idx] += 1
                else: # Must be a bug
                    ticket = ticketsRepo.findTicket(item.ticketId)
                    assert(ticket != None)
                    # Record this is a  ticket. After loop in another loop we increment counters
                    if item.userStoryId not in bugs_per_story.keys(): #Create the key with an empty value
                        bugs_per_story[item.userStoryId] = {OPEN_BUGS: [], CLOSED_BUGS: []}
                        
                    if item.percentAchieved == 1.0:  
                        bugs_per_story[item.userStoryId][CLOSED_BUGS].append(ticket)
                    else:
                        bugs_per_story[item.userStoryId][OPEN_BUGS].append(ticket)
            else:
                if (uss.percentAchieved == 0.0):
                    not_started[idx] += 1
                else:
                    progressed[idx] += (story.originalEstimate * uss.percentAchieved)/story.originalEstimate
                    to_finish[idx] += (story.originalEstimate * (1-uss.percentAchieved))/story.originalEstimate
                    
            over_budget[idx] += (item.actual - item.estimate*item.percentAchieved)/story.originalEstimate
            
        # We need to loop again, not over the items but over the bugs we detected. Bugs are always against stories
        # completed in prior releases. So the counters to increment are associated to regressions for the story, and
        # even if there are multiple bugs we only increment one counter and only once (since we are counting 
        # stories in the logs, not bugs). So if there are open bugs, count it as a regression. Otherwise if
        # there are no open bugs but there are closed bugs, count it as a completion. Else no counter grows            
        for storyId in bugs_per_story.keys():
            story = storiesRepo.findStory(storyId)
            cat = ReleaseLog._getCategory(story.originalEstimate)
            idx = categories.index(cat) + 1 # offset by 1 since first element in arrays is the layer
            if len(bugs_per_story[storyId][OPEN_BUGS]) > 0: # we didn't fix all the bugs we set out to in the sprint
                regressed[idx] += 1
            else:
                if len(bugs_per_story[storyId][CLOSED_BUGS]) > 0: # we planned to fix bugs and we fixed them all
                    completed[idx] += 1
         
        data = []
        data.append(completed)
        data.append(progressed)
        data.append(to_finish)
        data.append(not_started)
        data.append(over_budget)
        data.append(regressed)
        data_df = pd.DataFrame(data, columns=cols)
        return data_df

    def _build_backlog_df(context, globalRepo):
        
        teamId                 = context.teamId
        sprintDuration         = context.sprintDuration

        teamsRepo              = globalRepo.teamsRepo
        storiesRepo            = globalRepo.storiesRepo
        ticketsRepo            = globalRepo.ticketsRepo
        
        team = teamsRepo.findTeam(teamId)
        backlog = team.backlog.pendingUserStories
        
        cols = [ReleaseLog.BREAKOUT]
        categories = []
        for x in range(sprintDuration + 1): # inlude 0 as a category for tasks with less than 1 person-day to go
            category = x
            categories.append(category)
        cols.extend(categories)

        unplanned = [ReleaseLog.UNPLANNED]
        completed = [ReleaseLog.COMPLETED]
        planned_progressed = [ReleaseLog.PROGRESSED]
        planned_remaining = [ReleaseLog.TO_FINISH]
        not_started = [ReleaseLog.NOT_STARTED]
        regressed = [ReleaseLog.REGRESSED]

        for cat in categories:
            unplanned.append(0.0)
            completed.append(0.0)
            planned_progressed.append(0.0)
            planned_remaining.append(0.0)
            not_started.append(0.0)
            regressed.append(0.0)
        
        for uss in backlog:
            story = storiesRepo.findStory(uss.userStoryId)
            cat = ReleaseLog._getCategory(story.originalEstimate)
            idx = categories.index(cat) + 1 # offset by 1 since first element in arrays is the layer
        
            if (uss.percentAchieved == 0.0):
                if (uss.planned == False):
                    unplanned[idx] += 1
                else:
                    not_started[idx] += 1
            else:
                if (uss.percentAchieved == 1.0):
                    bugs = ticketsRepo.getOpenTickets(uss.userStoryId)
                    if (len(bugs) > 0):
                        regressed[idx] += 1
                    else:
                        completed[idx] += 1                    
                else:
                    planned_progressed[idx] += (story.originalEstimate * uss.percentAchieved)/story.originalEstimate
                    planned_remaining[idx] += (story.originalEstimate * (1-uss.percentAchieved))/story.originalEstimate
         
        data = []
        data.append(unplanned)
        data.append(completed)
        data.append(planned_progressed)
        data.append(planned_remaining)
        data.append(not_started)
        data.append(regressed)
        data_df = pd.DataFrame(data, columns=cols)
        return data_df
    
    # Returns a dataframe of merged dataframes across all sprints and teams for a given snaphot name.
    # -snapshot_name: a string for the type of snapshot we will merge across all sprints and teams.
    # Possible choices:
    #   'planned_Start_CURRENT_SPRINT'
    #   'planned_End_CURRENT_SPRINT'
    #   'planned_Start_NEXT_SPRINT'
    #   'planned_End_NEXT_SPRINT'
    #   'backlog'
    #   'Resourcing'    
    #   'Outcome'    
    def mergeLogs(self, snapshot_name):
        log = self.log
        frames = []
        for teamId in log.keys():
            for sprint in log[teamId].keys():
                snapshot_df = log[teamId][sprint][snapshot_name]
                snapshot_df['Team Id'] = teamId
                snapshot_df['Sprint'] = sprint
                frames.append(snapshot_df)
        merged_df = pd.concat(frames)
        merged_df = merged_df.reset_index().drop(columns=['index'])
        return merged_df 

In [10]:
def runReleaseCycle(startDate, sprintDuration, numberOfSprints, modelsConfig):
# Runs the dynamics of release cycle, iterating through all the sprints. For each sprint a WorkAssignment work sheet 
# is created for each team, recording what the team aims to do during the sprint. At the end of the sprint the 
# work sheet is updated with what was actually accomplished.
#
# This function returns two objects: a detailed timecards dataframe (one row for each row in any of the sheets, across all 
# teams and all sprints), and a ReleaseLog instance
# The timecards dataframes has only scalars. The work sheet dataframe has WorkAssignment objects in one column, and an
# informative list of backlog items not yet planned.
#
# Input parameters:
# -startDate: a datetime object for when the release cycle starts
# -sprintDuration: an integer representing the number of man-days that a sprint lasts.
# -numberOfSprints: an integer, representing for how many sprints are we going to run the release. This should be set
# high enough that all work (including delays and bug fixing) is completed within these numberOfSprints, even if the 
# original estimates are for much less. The reason for needing to set it high enough is that timecard entries will stop
# after numberOfSprints.
# -modelsConfig: a ModelsConfig instance. Has the simulation models and parameters used to drive the simulated release
# cycle.
    
    timecards_dict = {'Sprint': [], 'Date': [], 'Team': [],'Developer': [], \
                      'User Story': [], 'Task Type': [], 'Task Description': [],'Time Spent': []}
    
    #Used to record how decisions are made at each sprint.
    log = ReleaseLog()
    
    sprintEndDate = startDate
    for n in range(numberOfSprints):
        sprintEndDate = addBusinessDays(sprintEndDate, sprintDuration)
        sprint = n+1
        for team in modelsConfig.globalRepo.teamsRepo.teams:
            teamId = team.teamId

            # Set context to inform the environment in which decisions and events for this cycle occur
            modelsConfig.context = ReleaseCycleContext(teamId, sprint, sprintDuration)
            
            log.snapshotSprintBacklog(modelsConfig) 

            # Do the sprint
            work = chooseWhatToDoInSprint(modelsConfig) 
            log.snapshotSprintPlan(work, modelsConfig) 

            deliverSprint(work, modelsConfig) # mutates 'work'
            inflow = inflowOfTickets(modelsConfig) #Bugs reported this sprint. Should be for stories completed on prior sprints
            updateBacklogAfterSprint(work, modelsConfig) # Does not mutate 'work' but mutates the UserStoryStatus's
            
            log.snapshotSprintOutcome(work, modelsConfig) 


            sprintOutcome = work.committedTasks()
            for index, row in sprintOutcome.iterrows():
                timecards_dict['Sprint'].append(n+1)
                timecards_dict['Date'].append(sprintEndDate)
                timecards_dict['Developer'].append(row['Owner'])
                timecards_dict['Team'].append(teamId)
                timecards_dict['User Story'].append(row['User Story Id'])
                timecards_dict['Task Type'].append(row['Task Type'])
                timecards_dict['Task Description'].append(row['Task Description'])
                timecards_dict['Time Spent'].append(row['Effort Spent']) #Even 0, record it to indicate it was in scope
            # Boundary case: if we finished all work before all sprints are through, enter 0 time spent
            if (sprintOutcome.index.size == 0):
                timecards_dict['Sprint'].append(n+1)
                timecards_dict['Date'].append(sprintEndDate)
                timecards_dict['Developer'].append(None)
                timecards_dict['Team'].append(teamId)
                timecards_dict['User Story'].append(None)
                timecards_dict['Task Type'].append(None)
                timecards_dict['Task Description'].append(None)
                timecards_dict['Time Spent'].append(0) 
                        
    return pd.DataFrame(timecards_dict), log

In [13]:
# Number of user stories worked on in a Sprint, either to implement or fix bugs on them
def countUniques(seriesGroup):
    cleanedSeriesGroup = pd.Series([x for x in seriesGroup if x != None]) # Remove nulls as they break the count of uniques
    return cleanedSeriesGroup.unique().size

def releaseBurnout(timecard):
    
    bystory             = timecard.groupby('User Story')
    ends                = bystory['Sprint'].max()
    starts              = bystory['Sprint'].min()

    impl                = timecard[timecard['Task Type'] == S_.UNFINISHED_STORIES]
    bugs                = timecard[timecard['Task Type'] == S_.DEV_TIME_BUGS]

    bysprint_all        = timecard.groupby('Sprint')
    bysprint_impl       = impl.groupby('Sprint')
    bysprint_bugs       = bugs.groupby('Sprint')
    
    counts_all          = bysprint_all['User Story'].apply(lambda x: countUniques(x)) 
    counts_impl         = bysprint_impl['User Story'].apply(lambda x: countUniques(x)) 
    counts_bugs         = bysprint_bugs['User Story'].apply(lambda x: countUniques(x)) 


    efforts_all         = bysprint_all['Time Spent'].sum()
    efforts_impl        = bysprint_impl['Time Spent'].sum()
    efforts_bugs        = bysprint_bugs['Time Spent'].sum()

    d = {'Sprint': [], 'Stories Started': [], 'Stories Active': [], 'Implementing (# stories)': [], \
         'Debugging (# stories)': [], 'Stories Completed': [], \
         'Effort': [], 'Implementation Effort': [], 'Debugging Effort': [], 'Active Developers': []}

    # Initialize
    for sprint in counts_all.index:
        d['Sprint']                                 .append(sprint)
        d['Stories Active']                         .append(counts_all[sprint])
        if sprint in counts_impl.index:
            d['Implementing (# stories)']           .append(counts_impl[sprint])
        else:
            d['Implementing (# stories)']           .append(0.0)
        
        if sprint in counts_bugs.index:
            d['Debugging (# stories)']              .append(counts_bugs[sprint])
        else:
            d['Debugging (# stories)']              .append(0.0)
        
        d['Effort']                                 .append(efforts_all[sprint])
        if sprint in efforts_impl.index:
            d['Implementation Effort']              .append(efforts_impl[sprint])
        else:
            d['Implementation Effort']              .append(0.0)
        if sprint in efforts_bugs.index:        
            d['Debugging Effort']                   .append(efforts_bugs[sprint])
        else:
            d['Debugging Effort']                   .append(0.0)
            
        for s,group in bysprint_all:
            if s==sprint: #found it
                count = group['Developer'].unique().size
                if (count == 1): #It might be that actually no developer has work left, if the 'unique' is None
                    if (None == (group['Developer'].unique())[0]):
                        count = 0
                d['Active Developers'].append(count)

        # Allocate space, we'll come back to set a value
        d['Stories Completed'].append(0)
        d['Stories Started'].append(0) 

    for story in ends.index:
        sprint = ends[story]
        if sprint in d['Sprint']:
            idx = d['Sprint'].index(sprint)
            d['Stories Completed'][idx] += 1
        else:
            d['Sprint'].append(sprint)
            d['Stories Completed'].append(1)
            d['Stories Started'].append(0) # Allocate space, we'll come back to set a value
    for story in starts.index:
        sprint = starts[story]
        if sprint in d['Sprint']:
            idx = d['Sprint'].index(sprint)
            d['Stories Started'][idx] += 1
        else:
            d['Sprint'].append(sprint)
            d['Stories Started'].append(1)
        
        
    df = pd.DataFrame(d)
    return df.sort_values(by='Sprint')