In [1]:
from random import *
import math
import csv
import numpy as np

This simulation will take historical data and future data as list of lists. 
For this sprint the measurement of the input should always match the expected output (days, months, points, sprints, tasks, projects)

Historical -> a list of lists that passes through 
Task/Project Id, 
Original Estimate(months, days, points, sprints)
Actual Estimate (months, days, points, sprints)

Future -> a list of lists that passes through
Task/Project Id, 
Predicted Estimation (months, days, points, sprints)


In [2]:
def runSingleSimulation(historical,future):
    '''
    Runs a single simulation of future data based on historical data.
    Input: historical = list of lists in the format: [[TaskName, estimated, actual], ...]
            future = list of lists in the format: [[TaskName, estimated], ...]
    '''
    estimatedTotal=0
    for task in future:
        selectedEvent = historical[randint(0,len(historical)-1)]
        velocity = int(float(selectedEvent[2]))/int(float(selectedEvent[1]))
        estimated = velocity*task[1]
        estimatedTotal+=estimated
        task.append(estimated)
    estimatedTotal=round(estimatedTotal,2)
    #estimatedTotal=math.ceil(estimatedTotal/110) 
    return (estimatedTotal)

In [3]:
def loadFile(historicalFilename, futureFilename, verbose=False, historicalType='task'):
    '''Input: historicalFilename, futureFilename
            Optional: Verbose, historicalType("task" or "sprint")
        Output: historical, future'''
    
    historical = []
    future = []
    hasHeaders=False
    
    with open(historicalFilename) as csvfile:
        sample = csvfile.read(1024)
        sniffer = csv.Sniffer()
        hasHeaders = csv.Sniffer().has_header(sample)
        dialect = sniffer.sniff(sample)
        csvfile.seek(0)
        reader = csv.reader(csvfile, dialect)
        firstLine=True
        for row in reader:
            if (hasHeaders and firstLine):
                firstLine=False
            else:
                historical.append(row)
        if (verbose):
            print("Loaded {0} historical items".format(len(historical)))
            
    with open(futureFilename) as csvfile:
        sample = csvfile.read(1024)
        hasHeaders =csv.Sniffer().has_header(sample)
        dialect = sniffer.sniff(sample)
        csvfile.seek(0)
        reader = csv.reader(csvfile, dialect)
        firstLine=True
        for row in reader:
            if(hasHeaders and firstLine):
                firstLine=False
            else:
                future.append([row[0], int(row[1])])
                
    if (verbose):
        print("Loaded {0} future items".format(len(future)))
    return(historical, future)

To get expanded unsorted results of n simulations, change 'verbose' to 'True'

In [4]:
def runSimulations(historical,future,n=10**5,verbose=False):
    '''
            Runs n simulations of future data based on historical data.
            Input: historical = list of lists in the format: [[ProjectName, estimated, actual], ...]
            future = list of lists in the format: [[ProjectName, estimated], ...]
            n = number of simulations to run
    '''
    estTotalMult=0
    estimations=[]
    for task in future:
        estTotalMult+=task[1]
    print("Estimated Total: "+str(estTotalMult))
    for i in range(0,n):
        estimatedTotal = runSingleSimulation(historical,future)
        estimations.append(estimatedTotal)
        if (verbose):
            print ("Trial {0:2} prediction: {1:.2f} ({2:.2f}% of estimated)".format(i,estimatedTotal,100*estimatedTotal/estTotalMult))
    print("Min:{0:.2f} ({1:.2f}% of estimated)\nMax:{2:.2f} ({3:.2f}% of estimated)".format(min(estimations),100*min(estimations)/estTotalMult,max(estimations),100*max(estimations)/estTotalMult))
    return(sorted(estimations))


In [5]:
def summary(data,verbose=False):
    '''
    Input: prediction data after running runSimulations()
    Output: List of lists estimations and their counts: [[estimation, count], [estimation, count]...]
    '''
    
    points = []
    output=[]
    for p in data:
        if (p not in points):
            points.append(p)
    for p in points:
        c = data.count(p)
        output.append([p,c])
    return(output)

Format of the project input is ['Project_name', estimated, actual], where the last project in a list should be the one you actually need to simulate. For the simulated project, the input format is ['Project_name', 3].

The format of input is equal to the format of the output, depending on how your team plans the velocity (engineer months, weeks, sprints etc)

In [6]:
data = loadFile('projects_historic.csv', 'projects_future.csv')

estimation = runSimulations(data[0], data[1])
summary(estimation)
print(round(np.percentile(estimation,95),2))

Estimated Total: 76
Min:25.33 (33.33% of estimated)
Max:418.00 (550.00% of estimated)
215.33
