In [1]:
from datetime import datetime
import pandas as pd

In [2]:
import devanalyst
import devanalyst.simulation.statics as S_
from devanalyst.simulation.businessObjects import UserStory, Ticket
from devanalyst.simulation.simulationModels import ModelsConfig
import devanalyst.simulation.generateTimecards as timecard

importing Jupyter notebook from c:\users\aleja\documents\code\chateauclaudia-labs\devanalyst\devanalyst\simulation\statics.ipynb
importing Jupyter notebook from c:\users\aleja\documents\code\chateauclaudia-labs\devanalyst\devanalyst\simulation\businessObjects.ipynb
importing Jupyter notebook from c:\users\aleja\documents\code\chateauclaudia-labs\devanalyst\devanalyst\simulation\simulationModels.ipynb
importing Jupyter notebook from c:\users\aleja\documents\code\chateauclaudia-labs\devanalyst\devanalyst\simulation\generateTimecards.ipynb


In [3]:
class ReleaseSimulationStats:
    def __init__(self):
        self.headcount                       = 0.0
        self.entries_df                      = None
        self.commits_impl                    = None
        self.commits_bugs                    = None
        self.commits_all                     = None
        self.burnout                         = None
        self.implementation_effort           = 0.0
        self.debugging_effort                = 0.0
        self.total_effort                    = 0.0
        self.cost                            = 0.0
        self.number_of_stories_completed     = 0
        self.sprint_implementation_completed = None
        self.sprint_bulk_of_work_completed   = None #'bulk of work' depends on a cutoff level, e.g. 90%
        
    # -cutoff: a double between 0 and 1.0, reflecting the percentage of commits after which the drag starts. 
    #  For example, a cutoff of 0.9 would lead to identifying the least productive
    #  developers who jointly account for less than 10% of all commits, and the list of these developers is the drag.
    def computeStats(self, entries_df, modelsConfig, cutoff):
        self.entries_df                         = entries_df
        commits_impl, commits_bugs, commits_all = CommitsAnalysis.tabulateCommits(modelsConfig)
        
        self.commits_impl                       = commits_impl
        self.commits_bugs                       = commits_bugs
        self.commits_all                        = commits_all
        self.headcount                          = entries_df['Developer'].dropna().unique().size
        self.burnout                            = timecard.releaseBurnout(self.entries_df)
        self.implementation_effort              = self.burnout['Implementation Effort'].sum()
        self.debugging_effort                   = self.burnout['Debugging Effort'].sum()
        self.total_effort                       = self.implementation_effort + self.debugging_effort
        
        days_per_developer = entries_df.groupby('Developer')['Time Spent'].sum().to_frame()
        days_per_developer.reset_index(inplace = True)
        repo = modelsConfig.globalRepo.developersRepo
        days_per_developer['Cost'] = days_per_developer.apply(lambda row: row['Time Spent'] * \
                                                              repo.findDeveloper(row['Developer']).salary/240, \
                                                              axis=1) #240 working days in a year; salary is annual

        self.cost                               = round(days_per_developer['Cost'].sum(), 0)
        self.number_of_stories_completed        = self.burnout['Stories Completed'].sum()
        self.sprint_implementation_completed    = self.burnout[self.burnout['Cum % Completion'] ==1.0]['Sprint'].min()
        self.sprint_bulk_of_work_completed      = self.burnout[self.burnout['Cum % Completion'] >= 0.9]['Sprint'].min()        

In [4]:
class DragAnalyzer:
    
    # -engine: a ReleaseSimulationEngine instance
    def __init__(self, engine, number_of_sprints, sprint_duration, start_date, cutoff):
        self.engine                = engine
        self.number_of_sprints     = number_of_sprints
        self.sprint_duration       = sprint_duration
        self.start_date            = start_date
        self.cutoff                = cutoff
        
        # Stats are set when the engine is run
        self.stats = {'With_drag': ReleaseSimulationStats(), 'Without_drag': ReleaseSimulationStats()}
    
    def runSimulations(self):
        # First simulation: With_drag
        self.engine.run(self.number_of_sprints, self.sprint_duration, self.start_date)
        self.stats['With_drag'].computeStats(self.engine.entries_df, self.engine.modelsConfig, self.cutoff)
        
        drag = self._identifyDrag(self.engine.modelsConfig, self.cutoff)
        self._removeDrag(self.engine.modelsConfig, drag)
        
        self.engine.resetEngine() # Clear progress made so that we can start release cycle from the start again
        
        # Second simulation: re-run release cycle, but Without_drag this time
        self.engine.run(self.number_of_sprints, self.sprint_duration, self.start_date)
        self.stats['Without_drag'].computeStats(self.engine.entries_df, self.engine.modelsConfig, self.cutoff)
        
    # Identifies and 'drag' from a team that has completed a release cycle. The 'drag' is defined as the 
    # list of developers who contribute the least, and whose aggregate contribution falls below the 'cutoff', expressed
    # as a percentage of total commits.
    # -cutoff: a double between 0 and 1.0, reflecting the percentage of commits after which the drag starts. 
    #  For example, a cutoff of 0.9 would lead to identifying the least productive
    #  developers who jointly account for less than 10% of all commits, and the list of these developers is the drag.
    # -modelsConfig: the configuration that was used for the already completed release cycle against whose commits the 
    #  drag will be identified.
    def _identifyDrag(self, modelsConfig, cutoff):
        commits_impl, commits_bugs, commits_all = CommitsAnalysis.tabulateCommits(modelsConfig)
        x = commits_impl.sort_values(by='User Story Id', ascending=False).cumsum()
        m = x['User Story Id'].max()
        y = x/m
        y
        drag_df = y[y['User Story Id'] > 0.9]
        solid_df = y[y['User Story Id'] <= 0.9]
        drag_df
        drag = list(drag_df.index)
        return drag

    # -drag: a list of developer names corresponding to developers that should be removed from the release generation
    #  engine driven by 'modelsConfig' 
    def _removeDrag(self, modelsConfig, drag):
        teamsRepo = modelsConfig.globalRepo.teamsRepo
        storiesRepo = modelsConfig.globalRepo.storiesRepo
        for team in teamsRepo.teams:
            developers = team.developers
            for person in developers.copy(): #Important to do a copy since we'll mutate developers within the loop 
                if person in drag:
                    developers.remove(person)

        for story in storiesRepo.stories:
            if story.developer in drag:
                story.developer = S_.OWNER_TBD

    # Returns a dataframe listing key metrics for two release cycles: one with the full R&D Team, one With_drag.    
    def tabulateDragImpact(self):
        
        cutoff_percent = str(round(self.cutoff * 100, 0)) + '%'
        index_cols = ['Headcount', 'Implementation Effort (days)', 'Debugging Effort (days)', 'Total Effort (days)', \
                     'Cost ($)', 'Stories completed (#)', 'When implementation is completed (sprint #)', 
                     'When implementation is ' + cutoff_percent + ' completed (sprint #)']
        
        stats1 = self.stats['With_drag']
        with_drag_cols    = [stats1.headcount, stats1.implementation_effort, stats1.debugging_effort, stats1.total_effort, \
                             stats1.cost, stats1.number_of_stories_completed, stats1.sprint_implementation_completed, \
                             stats1.sprint_bulk_of_work_completed]  
        
        stats2 = self.stats['Without_drag']
        without_drag_cols = [stats2.headcount, stats2.implementation_effort, stats2.debugging_effort, stats2.total_effort, \
                             stats2.cost, stats2.number_of_stories_completed, stats2.sprint_implementation_completed, \
                             stats2.sprint_bulk_of_work_completed]
        
        diff_cols         = self._compute_diff(with_drag_cols, without_drag_cols)
        diff_cols         = [i * 100.0 for i in diff_cols] # Make it a percent, out of 100%
        
        # For readability, eliminate excessive number of decimal places
        self._roundList(with_drag_cols, 0)      # Values are large, don't need decimals
        self._roundList(without_drag_cols, 0)
        self._roundList(diff_cols, 2)           # Diffs are percentages, so want 2 decimal places
            
        impact_dict = {'Metric': index_cols, 'Full R&D Team': with_drag_cols, 'Without drag': without_drag_cols, \
                      '% difference': diff_cols}
        
        return pd.DataFrame(impact_dict)

    # Takes two lists of numbers, of equal size, and returns a list of the percent change from one to the other.
    #
    def _compute_diff(self, list1, list2):
        if len(list1) != len(list2):
            return None
        result = []
        for idx in range(len(list1)):
            x = list1[idx]
            y = list2[idx]
            z = 0.0
            if x != 0:
                z = y/x -1
            result.append(z)
        return result  
    
    def _roundList(self, a_list, decimal_places):
        for idx in range(len(a_list)):
            a_list[idx] = round(a_list[idx], decimal_places) 

In [5]:
class CommitsAnalysis:
    
    # Returns several Pandas dataframe containing the number of commits per developer:
    # 1) A dataframe for commits related to user story implementation
    # 2) A dataframe for commits related to bug fixing
    # 3) A dataframe for the combined commits (story implementation and commits)
    #
    # -modelsConfig: simulation configuration for the release cycle in question. It is assumed that the release
    # cycle is completed prior to calling this function, so that the repositories referenced by 'modelsConfig'
    # contain repos that indicate final state of user story completion after the release cycle.
    def tabulateCommits(modelsConfig):
        globalRepo = modelsConfig.globalRepo
        stories_df = UserStory.build_stories_df(globalRepo)
        bugs_df = Ticket.build_bugs_df(globalRepo.ticketsRepo.tickets)

        commits_impl = stories_df.groupby(['Developer'])['User Story Id'].count().to_frame()
        commits_bugs = bugs_df.groupby(['Fixed By'])['Ticket Id'].count().to_frame()
        commits_bugs = commits_bugs.reindex(commits_impl.index, fill_value=0.0)
        commits_all = (commits_impl['User Story Id'] + commits_bugs['Ticket Id']).to_frame()
        
        return commits_impl, commits_bugs, commits_all
    
    # Renderas a chart of the commits per developer, with developers sorted in the order of descending number of
    # commits. 
    # -commits_df: Pandas dataframe containing a single column, corresponding to the number of commits per developer 
    # that should be charted
    # -cumulative: if False, the chart represents each developer's commit (a decaying curve). If true, it is the
    # accumulated commits up to that developer (a concave growing curve)
    def chartCommits(commits_df, cumulative = False):
        col = list(commits_df.columns)[0]
        sorted_series = commits_df[col].sort_values(ascending = False)
        if (not cumulative):
            sorted_series.T.plot(kind = 'bar')
        else:
            cum_df = sorted_series.cumsum()/sorted_series.sum()
            cum_df.T.plot(kind = 'bar')
            
 