# Loads workitems from a query
A notebook to load data from devops, merge and clean the data before storing it to a file for analyzis

In [1]:
import os
import datetime
from pytz import timezone
import pandas as pd
from azure.devops.credentials import BasicAuthentication
from azure.devops.connection import Connection
from azure.devops.v7_1.work_item_tracking.models import Wiql

## Configuration 
auth_token is a personal access token (PAT) stored in a Windows environment variable called AZURE_DEVOPS_PAT

In [2]:
__VERSION__ = "1.0.0"
auth_token = os.environ['AZURE_DEVOPS_PAT']
url ="https://dev.azure.com/skanskanordic/"
increment = "Skanska Sverige IT\\2023\Höst 2023-3"
areas = ["Skanska Sverige IT\\Development and Operations\\Global Services\\Global Digital Communication Services",
         "Skanska Sverige IT\\Development and Operations\\Global Services\\Global Digital Communication Services\\Brand Hub Web",
         "Skanska Sverige IT\\Development and Operations\\Global Services\\Global Digital Communication Services\\Corporate Web",
         "Skanska Sverige IT\\Development and Operations\\Global Services\\Global Digital Communication Services\\Enterprise Search Global",
         "Skanska Sverige IT\\Development and Operations\\Global Services\\Global Digital Communication Services\\Microsites Web",
         "Skanska Sverige IT\\Development and Operations\\Global Services\\Global Digital Communication Services\\OneSkanska",
         "Skanska Sverige IT\\Development and Operations\\Global Services\\Global Digital Communication Services\\Web Development Environment"]
#areas = ["Skanska Sverige IT\\Development and Operations\\Global Services\\Global Digital Communication Services\\Corporate Web" ]
increment = "Skanska Sverige IT\\2023\Höst 2023-3"
# Three exact times UTC time (Greenwich time without daylight saving time), 
#   1st sometime after the sprint planning meeting, 
#   2nd before sprint demo
#   3rd same as 1st for next sprint...
sprint = ("Sprint 1", datetime.datetime(2023,9,  6,10,0, tzinfo=timezone('UTC')), datetime.datetime(2023, 9,15,0,0, tzinfo=timezone('UTC')), datetime.datetime(2023, 9,18,12,0, tzinfo=timezone('UTC')) )
sprint = ("Sprint 2", datetime.datetime(2023,9, 18,12,0, tzinfo=timezone('UTC')), datetime.datetime(2023, 9,29,0,0, tzinfo=timezone('UTC')), datetime.datetime(2023,10, 2, 9,0, tzinfo=timezone('UTC')) )
sprint = ("Sprint 3", datetime.datetime(2023,10, 2, 9,0, tzinfo=timezone('UTC')), datetime.datetime(2023,10,13,0,0, tzinfo=timezone('UTC')), datetime.datetime(2023,10,16, 8,0, tzinfo=timezone('UTC')) )
sprint = ("Sprint 4", datetime.datetime(2023,10, 16,8,0, tzinfo=timezone('UTC')), datetime.datetime(2023,10,27,0,0, tzinfo=timezone('UTC')), datetime.datetime(2023,10,30, 8,0, tzinfo=timezone('UTC')) )
sprint = ("Sprint 5", datetime.datetime(2023,10, 30,8,0, tzinfo=timezone('UTC')), datetime.datetime(2023,11,10,0,0, tzinfo=timezone('UTC')), datetime.datetime(2023,11,13, 8,0, tzinfo=timezone('UTC')) )


In [3]:
from types import SimpleNamespace
context = SimpleNamespace()
context.runner_cache = SimpleNamespace()
context.connection = Connection(base_url=url,creds=BasicAuthentication('PAT', auth_token), user_agent='azure-devops-python-samples/' + __VERSION__)


## Workitems is loaded from devops using a query

In [18]:
# List all tasks that at the current moment is within the increment and areapaths for the team.
desired_ids = list()
all_tasks = pd.DataFrame()
wit_client = context.connection.clients.get_work_item_tracking_client()
for area in areas:
    wiql = Wiql(query="SELECT [System.Id] from WorkItems WHERE [System.AreaPath] = '"+area+"' AND [System.IterationPath] under '"+increment+"' AND [System.WorkItemType] = 'Task'")
    wiql_results = wit_client.query_by_wiql(wiql, top=1000).work_items
    if wiql_results:       
        for item in wiql_results: desired_ids.append(int(item.id))
        pd.concat([all_tasks, pd.DataFrame(wiql_results) ])

all_tasks

In [20]:
workitems= list()
start = 0 
stop = 0
max = len(desired_ids)-1
while stop<max:
    if (start+100)<=max: stop = start+99  
    else:                stop = stop+(len(desired_ids) - start)
    workitems += wit_client.get_work_items(ids=desired_ids[start:stop],  error_policy="omit" )
    start+=100

workitems

[<azure.devops.v7_0.work_item_tracking.models.WorkItem at 0x29ff3c2e210>,
 <azure.devops.v7_0.work_item_tracking.models.WorkItem at 0x29fefe21190>,
 <azure.devops.v7_0.work_item_tracking.models.WorkItem at 0x29ff360c690>,
 <azure.devops.v7_0.work_item_tracking.models.WorkItem at 0x29ff360c550>,
 <azure.devops.v7_0.work_item_tracking.models.WorkItem at 0x29ff360c4d0>,
 <azure.devops.v7_0.work_item_tracking.models.WorkItem at 0x29ff360c5d0>,
 <azure.devops.v7_0.work_item_tracking.models.WorkItem at 0x29ff360f950>,
 <azure.devops.v7_0.work_item_tracking.models.WorkItem at 0x29ff360f0d0>,
 <azure.devops.v7_0.work_item_tracking.models.WorkItem at 0x29ff360e710>,
 <azure.devops.v7_0.work_item_tracking.models.WorkItem at 0x29ff11bcf10>,
 <azure.devops.v7_0.work_item_tracking.models.WorkItem at 0x29ff360f850>,
 <azure.devops.v7_0.work_item_tracking.models.WorkItem at 0x29ff360ff90>,
 <azure.devops.v7_0.work_item_tracking.models.WorkItem at 0x29ff3acbd90>,
 <azure.devops.v7_0.work_item_tracking

In [22]:
def getTasks(workitems):
    result = list()
    for item in workitems:
        if (item is None): continue
        result.append(
            {
                "id": item.id,
                "area": item.fields["System.AreaPath"].split('\\')[-1],
                "iteration": item.fields["System.IterationPath"].split('\\')[-1],
                "title": item.fields["System.Title"],
                "state.start": item.fields["System.State"],
                "state.end": item.fields["System.State"],
                "resource": item.fields["System.AssignedTo"]["displayName"] if "System.AssignedTo" in item.fields else "", 
                "estimate": item.fields["Microsoft.VSTS.Scheduling.OriginalEstimate"] if "Microsoft.VSTS.Scheduling.OriginalEstimate" in item.fields else "", 
                "completed": item.fields["Microsoft.VSTS.Scheduling.CompletedWork"] if "Microsoft.VSTS.Scheduling.CompletedWork" in item.fields else "",
                "remaining" : item.fields["Microsoft.VSTS.Scheduling.RemainingWork"] if "Microsoft.VSTS.Scheduling.RemainingWork" in item.fields else "",
                "activated" : item.fields["Microsoft.VSTS.Common.ActivatedDate"] if "Microsoft.VSTS.Common.ActivatedDate" in item.fields else "",
                "resolved": item.fields["Microsoft.VSTS.Common.ResolvedDate"] if "Microsoft.VSTS.Common.ResolvedDate" in item.fields else ""
            })  
    return result

In [24]:
tasks = pd.DataFrame(getTasks(workitems))
tasks[tasks['iteration']=="Sprint 5"]


Unnamed: 0,id,area,iteration,title,state.start,state.end,resource,estimate,completed,remaining,activated,resolved
37,91219,Global Digital Communication Services,Sprint 5,QA,Closed,Closed,Max Haase,8.0,21.0,0.0,2023-10-26T06:55:44.643Z,2023-11-08T07:57:12.92Z
162,93997,Global Digital Communication Services,Sprint 5,Itrash - setup Bostad,Closed,Closed,Itrash Aslam,8.0,8.0,0.0,2023-11-15T12:44:21.553Z,2023-11-15T14:52:45.377Z
163,93998,Global Digital Communication Services,Sprint 5,Itrash - setup Microsites,Active,Active,Itrash Aslam,,,,2023-11-15T14:53:19.99Z,
178,94025,Global Digital Communication Services,Sprint 5,Ebba - setup OneSkanska,Closed,Closed,Ebba Enhörning,8.0,8.0,0.0,2023-11-07T07:37:05.253Z,2023-11-09T07:38:12.48Z
181,94211,Global Digital Communication Services,Sprint 5,"Meeting, support and other stuff",Closed,Closed,brychan.toda,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
590,94915,Web Development Environment,Sprint 5,QA möte,Closed,Closed,Martin Nilsson,1.0,1.0,0.0,2023-11-08T07:13:03.4Z,2023-11-09T06:15:09.133Z
591,94925,Web Development Environment,Sprint 5,Build React component,Active,Active,brychan.toda,12.0,15.0,1.0,2023-11-13T06:39:15.107Z,
592,94957,Web Development Environment,Sprint 5,Test scenario,Active,Active,Martin Nilsson,3.0,3.0,0.0,2023-11-13T13:51:28.793Z,
593,94964,Web Development Environment,Sprint 5,Set up pages for elements according to diskuss...,Resolved,Resolved,"Leo Lindeberg, Dan",,,,2023-11-13T08:58:40.613Z,2023-11-13T08:58:42.09Z


In [17]:

workitems_after_planning = list()
workitems_before_demo= list()
workitems_after_demo= list()
start = 0 
stop = 0
max = len(desired_ids)-1
while stop<max:
    if (start+100)<=max: stop = start+99  
    else:                stop = stop+(len(desired_ids) - start)
    workitems_after_planning += wit_client.get_work_items(ids=desired_ids[start:stop], as_of=sprint[1], error_policy="omit" )
    workitems_before_demo    += wit_client.get_work_items(ids=desired_ids[start:stop], as_of=sprint[2], error_policy="omit" ) #2:00
    workitems_after_demo     += wit_client.get_work_items(ids=desired_ids[start:stop], as_of=sprint[3], error_policy="omit" ) #
    start+=100

664

# These items is then loaded from different time 

In [18]:
workitems_after_planning = list()
workitems_before_demo= list()
workitems_after_demo= list()
start = 0 
stop = 0
start
max = len(desired_ids)-1
print(str(max))
while stop<max:
    if (start+100)<=max:
        stop = start+99  
    else:
        stop = stop+(len(desired_ids) - start)

    workitems_after_planning += wit_client.get_work_items(ids=desired_ids[start:stop], as_of=datetime.datetime(2023,10, 2,8,0, tzinfo=timezone('UTC')), error_policy="omit" )
    workitems_before_demo    += wit_client.get_work_items(ids=desired_ids[start:stop], as_of=datetime.datetime(2023,10,13,0,0, tzinfo=timezone('UTC')), error_policy="omit" ) #2:00
    workitems_after_demo     += wit_client.get_work_items(ids=desired_ids[start:stop], as_of=datetime.datetime(2023,10,16,7,0, tzinfo=timezone('UTC')), error_policy="omit" ) #
    start+=100

663


In [19]:
len(workitems_after_planning)

657

In [20]:
def getTasks(workitems):
    result = list()
    for item in workitems:
        if (item is None): continue
        result.append(
            {
                "id": item.id,
                "area": item.fields["System.AreaPath"].split('\\')[-1],
                "iteration": item.fields["System.IterationPath"].split('\\')[-1],
                "title": item.fields["System.Title"],
                "state.start": item.fields["System.State"],
                "state.end": item.fields["System.State"],
                "resource": item.fields["System.AssignedTo"]["displayName"] if "System.AssignedTo" in item.fields else "", 
                "estimate": item.fields["Microsoft.VSTS.Scheduling.OriginalEstimate"] if "Microsoft.VSTS.Scheduling.OriginalEstimate" in item.fields else "", 
                "completed": item.fields["Microsoft.VSTS.Scheduling.CompletedWork"] if "Microsoft.VSTS.Scheduling.CompletedWork" in item.fields else "",
                "remaining" : item.fields["Microsoft.VSTS.Scheduling.RemainingWork"] if "Microsoft.VSTS.Scheduling.RemainingWork" in item.fields else "",
                "activated" : item.fields["Microsoft.VSTS.Common.ActivatedDate"] if "Microsoft.VSTS.Common.ActivatedDate" in item.fields else "",
                "resolved": item.fields["Microsoft.VSTS.Common.ResolvedDate"] if "Microsoft.VSTS.Common.ResolvedDate" in item.fields else ""
            })  
    return result

In [21]:
tasks_after_planning = pd.DataFrame(getTasks(workitems_after_planning))
tasks_before_demo = pd.DataFrame(getTasks(workitems_before_demo))
tasks_after_demo = pd.DataFrame(getTasks(workitems_after_demo))

In [22]:
#tasks_after_planning.info()
tasks_after_planning

Unnamed: 0,id,area,iteration,title,state.start,state.end,resource,estimate,completed,remaining,activated,resolved
0,85797,Corporate Web,Sprint 2,Frontend: Add design and columnlogic,Closed,Closed,Sara Winter,16.0,3.0,0.0,2023-09-19T07:03:56.413Z,2023-09-19T14:05:22.377Z
1,85798,Corporate Web,Sprint 2,QA block in various col-widths,Closed,Closed,Max Haase,8.0,5.0,0.0,2023-09-22T06:50:54.54Z,2023-09-26T06:48:09.433Z
2,85800,Corporate Web,Sprint 4,Remove blocktypes,Proposed,Proposed,,6.0,,6.0,,
3,85801,Corporate Web,Sprint 4,QA,Proposed,Proposed,,4.0,,4.0,,
4,85803,Corporate Web,Sprint 4,Remove admin tools,Proposed,Proposed,,6.0,,6.0,,
...,...,...,...,...,...,...,...,...,...,...,...,...
407,92554,Brand Hub Web,Sprint 3,Sussanne kollar,Active,Active,Susanne Holmberg,,,,2023-10-02T07:08:59.45Z,
408,92563,Enterprise Search Global,Sprint 3,Ayefi working,Active,Active,"Lorentzon, Hanna",,,,2023-10-02T07:36:27.447Z,
409,92567,OneSkanska,Sprint 3,Support,Proposed,Proposed,"Leo Lindeberg, Dan",,,,,
410,92568,OneSkanska,Sprint 3,Support,Proposed,Proposed,Morgan Jönsson,12.0,,12.0,,


# The data is merged into one dataframe

In [23]:
df = pd.merge(tasks_after_planning,tasks_after_demo , on="id", how="right") 
df = pd.merge(df,tasks_before_demo , on="id", how="right") 
df['estimate_z'] = df['estimate_x'].where(df['estimate_x'].notnull(), df['estimate'])

df = df[['id', 'area_y', 'iteration_y', 'title_y','state.start_x', 'state.end_y', 'resource_y','estimate_z','completed_y', 'remaining_y', 'activated_y', 'resolved_y'  ]]
df = df.rename(columns={'area_y': 'area', 'iteration_y': 'iteration', 'title_y': 'title', 'state.start_x': 'state.start', 'state.end_y': 'state.end', 'resource_y': 'resource', 'estimate_z': 'estimate', 'completed_y': 'completed', 'remaining_y': 'remaining', 'activated_y': 'activated', 'resolved_y': 'resolved'})
df

Unnamed: 0,id,area,iteration,title,state.start,state.end,resource,estimate,completed,remaining,activated,resolved
0,85797,Corporate Web,Sprint 2,Frontend: Add design and columnlogic,Closed,Closed,Sara Winter,16.0,3.0,0.0,2023-09-19T07:03:56.413Z,2023-09-19T14:05:22.377Z
1,85798,Corporate Web,Sprint 2,QA block in various col-widths,Closed,Closed,Max Haase,8.0,5.0,0.0,2023-09-22T06:50:54.54Z,2023-09-26T06:48:09.433Z
2,85800,Corporate Web,Sprint 5,Remove blocktypes,Proposed,Proposed,,6.0,,6.0,,
3,85801,Corporate Web,Sprint 5,QA,Proposed,Proposed,,4.0,,4.0,,
4,85803,Corporate Web,Sprint 5,Remove admin tools,Proposed,Proposed,,6.0,,6.0,,
...,...,...,...,...,...,...,...,...,...,...,...,...
491,93563,Bostad Web,Sprint 4,Show/hide on Project card,,Proposed,,6.0,,6.0,,
492,93564,Bostad Web,Sprint 4,Review,,Proposed,,1.0,,1.0,,
493,93565,Bostad Web,Sprint 4,QA,,Proposed,SE-ITN Quality Assurance,6.0,,6.0,,
494,93566,Bostad Web,Sprint 4,Backend updates,,Proposed,,1.0,,1.0,,


In [24]:
df = df.replace("NaN", 0)
df['estimate'].replace('',0)
df['completed'].replace('',0)
df['remaining'].replace('',0)
df['estimate'] = pd.to_numeric(df['estimate'])
df['completed'] = pd.to_numeric(df['completed'])
df['remaining'] = pd.to_numeric(df['remaining'])
#df.info()
df

Unnamed: 0,id,area,iteration,title,state.start,state.end,resource,estimate,completed,remaining,activated,resolved
0,85797,Corporate Web,Sprint 2,Frontend: Add design and columnlogic,Closed,Closed,Sara Winter,16.0,3.0,0.0,2023-09-19T07:03:56.413Z,2023-09-19T14:05:22.377Z
1,85798,Corporate Web,Sprint 2,QA block in various col-widths,Closed,Closed,Max Haase,8.0,5.0,0.0,2023-09-22T06:50:54.54Z,2023-09-26T06:48:09.433Z
2,85800,Corporate Web,Sprint 5,Remove blocktypes,Proposed,Proposed,,6.0,,6.0,,
3,85801,Corporate Web,Sprint 5,QA,Proposed,Proposed,,4.0,,4.0,,
4,85803,Corporate Web,Sprint 5,Remove admin tools,Proposed,Proposed,,6.0,,6.0,,
...,...,...,...,...,...,...,...,...,...,...,...,...
491,93563,Bostad Web,Sprint 4,Show/hide on Project card,,Proposed,,6.0,,6.0,,
492,93564,Bostad Web,Sprint 4,Review,,Proposed,,1.0,,1.0,,
493,93565,Bostad Web,Sprint 4,QA,,Proposed,SE-ITN Quality Assurance,6.0,,6.0,,
494,93566,Bostad Web,Sprint 4,Backend updates,,Proposed,,1.0,,1.0,,


# The dataframe is stored as csv into a file

In [25]:
df.to_csv("sprinttasks.csv", sep=";", decimal=',')