# Loads workitems from a query
A notebook to load data from devops, merge and clean the data before storing it to a file for analyzis

In [12]:
import os
import datetime
from pytz import timezone
import pandas as pd
from azure.devops.credentials import BasicAuthentication
from azure.devops.connection import Connection
from azure.devops.v7_1.work_item_tracking.models import Wiql
from enum import Enum

## Configuration 
auth_token is a personal access token (PAT) stored in a Windows environment variable called AZURE_DEVOPS_PAT

In [13]:
__VERSION__ = "1.0.0"
auth_token = os.environ['AZURE_DEVOPS_PAT']
url ="https://dev.azure.com/skanskanordic/"
area = "Skanska Sverige IT\\Development and Operations\\Global Services\\Global Digital Communication Services"
increment = "Skanska Sverige IT\\2023\Höst 2023-3"
sprint = "Sprint 3"

In [14]:
from types import SimpleNamespace
context = SimpleNamespace()
context.runner_cache = SimpleNamespace()
context.connection = Connection(base_url=url,creds=BasicAuthentication('PAT', auth_token), user_agent='azure-devops-python-samples/' + __VERSION__)

In [15]:
wit_client = context.connection.clients.get_work_item_tracking_client()

## Workitems is loaded from devops using a query

In [16]:
wiql = Wiql(query="SELECT [System.Id] from WorkItems WHERE [System.AreaPath] under '"+area+"' AND [System.IterationPath] under '"+increment+'\\'+"' AND [System.WorkItemType] = 'Task'")
wiql_results = wit_client.query_by_wiql(wiql, top=1000).work_items

In [17]:
desired_ids = list()
for item in wiql_results: desired_ids.append(int(item.id))
len(desired_ids)

664

# These items is then loaded from different time 

In [18]:
workitems_after_planning = list()
workitems_before_demo= list()
workitems_after_demo= list()
start = 0 
stop = 0
start
max = len(desired_ids)-1
print(str(max))
while stop<max:
    if (start+100)<=max:
        stop = start+99  
    else:
        stop = stop+(len(desired_ids) - start)

    workitems_after_planning += wit_client.get_work_items(ids=desired_ids[start:stop], as_of=datetime.datetime(2023,10, 2,8,0, tzinfo=timezone('UTC')), error_policy="omit" )
    workitems_before_demo    += wit_client.get_work_items(ids=desired_ids[start:stop], as_of=datetime.datetime(2023,10,13,0,0, tzinfo=timezone('UTC')), error_policy="omit" ) #2:00
    workitems_after_demo     += wit_client.get_work_items(ids=desired_ids[start:stop], as_of=datetime.datetime(2023,10,16,7,0, tzinfo=timezone('UTC')), error_policy="omit" ) #
    start+=100

663


In [19]:
len(workitems_after_planning)

657

In [20]:
def getTasks(workitems):
    result = list()
    for item in workitems:
        if (item is None): continue
        result.append(
            {
                "id": item.id,
                "area": item.fields["System.AreaPath"].split('\\')[-1],
                "iteration": item.fields["System.IterationPath"].split('\\')[-1],
                "title": item.fields["System.Title"],
                "state.start": item.fields["System.State"],
                "state.end": item.fields["System.State"],
                "resource": item.fields["System.AssignedTo"]["displayName"] if "System.AssignedTo" in item.fields else "", 
                "estimate": item.fields["Microsoft.VSTS.Scheduling.OriginalEstimate"] if "Microsoft.VSTS.Scheduling.OriginalEstimate" in item.fields else "", 
                "completed": item.fields["Microsoft.VSTS.Scheduling.CompletedWork"] if "Microsoft.VSTS.Scheduling.CompletedWork" in item.fields else "",
                "remaining" : item.fields["Microsoft.VSTS.Scheduling.RemainingWork"] if "Microsoft.VSTS.Scheduling.RemainingWork" in item.fields else "",
                "activated" : item.fields["Microsoft.VSTS.Common.ActivatedDate"] if "Microsoft.VSTS.Common.ActivatedDate" in item.fields else "",
                "resolved": item.fields["Microsoft.VSTS.Common.ResolvedDate"] if "Microsoft.VSTS.Common.ResolvedDate" in item.fields else ""
            })  
    return result

In [21]:
tasks_after_planning = pd.DataFrame(getTasks(workitems_after_planning))
tasks_before_demo = pd.DataFrame(getTasks(workitems_before_demo))
tasks_after_demo = pd.DataFrame(getTasks(workitems_after_demo))

In [22]:
#tasks_after_planning.info()
tasks_after_planning

Unnamed: 0,id,area,iteration,title,state.start,state.end,resource,estimate,completed,remaining,activated,resolved
0,85797,Corporate Web,Sprint 2,Frontend: Add design and columnlogic,Closed,Closed,Sara Winter,16.0,3.0,0.0,2023-09-19T07:03:56.413Z,2023-09-19T14:05:22.377Z
1,85798,Corporate Web,Sprint 2,QA block in various col-widths,Closed,Closed,Max Haase,8.0,5.0,0.0,2023-09-22T06:50:54.54Z,2023-09-26T06:48:09.433Z
2,85800,Corporate Web,Sprint 4,Remove blocktypes,Proposed,Proposed,,6.0,,6.0,,
3,85801,Corporate Web,Sprint 4,QA,Proposed,Proposed,,4.0,,4.0,,
4,85803,Corporate Web,Sprint 4,Remove admin tools,Proposed,Proposed,,6.0,,6.0,,
...,...,...,...,...,...,...,...,...,...,...,...,...
407,92554,Brand Hub Web,Sprint 3,Sussanne kollar,Active,Active,Susanne Holmberg,,,,2023-10-02T07:08:59.45Z,
408,92563,Enterprise Search Global,Sprint 3,Ayefi working,Active,Active,"Lorentzon, Hanna",,,,2023-10-02T07:36:27.447Z,
409,92567,OneSkanska,Sprint 3,Support,Proposed,Proposed,"Leo Lindeberg, Dan",,,,,
410,92568,OneSkanska,Sprint 3,Support,Proposed,Proposed,Morgan Jönsson,12.0,,12.0,,


# The data is merged into one dataframe

In [23]:
df = pd.merge(tasks_after_planning,tasks_after_demo , on="id", how="right") 
df = pd.merge(df,tasks_before_demo , on="id", how="right") 
df['estimate_z'] = df['estimate_x'].where(df['estimate_x'].notnull(), df['estimate'])

df = df[['id', 'area_y', 'iteration_y', 'title_y','state.start_x', 'state.end_y', 'resource_y','estimate_z','completed_y', 'remaining_y', 'activated_y', 'resolved_y'  ]]
df = df.rename(columns={'area_y': 'area', 'iteration_y': 'iteration', 'title_y': 'title', 'state.start_x': 'state.start', 'state.end_y': 'state.end', 'resource_y': 'resource', 'estimate_z': 'estimate', 'completed_y': 'completed', 'remaining_y': 'remaining', 'activated_y': 'activated', 'resolved_y': 'resolved'})
df

Unnamed: 0,id,area,iteration,title,state.start,state.end,resource,estimate,completed,remaining,activated,resolved
0,85797,Corporate Web,Sprint 2,Frontend: Add design and columnlogic,Closed,Closed,Sara Winter,16.0,3.0,0.0,2023-09-19T07:03:56.413Z,2023-09-19T14:05:22.377Z
1,85798,Corporate Web,Sprint 2,QA block in various col-widths,Closed,Closed,Max Haase,8.0,5.0,0.0,2023-09-22T06:50:54.54Z,2023-09-26T06:48:09.433Z
2,85800,Corporate Web,Sprint 5,Remove blocktypes,Proposed,Proposed,,6.0,,6.0,,
3,85801,Corporate Web,Sprint 5,QA,Proposed,Proposed,,4.0,,4.0,,
4,85803,Corporate Web,Sprint 5,Remove admin tools,Proposed,Proposed,,6.0,,6.0,,
...,...,...,...,...,...,...,...,...,...,...,...,...
491,93563,Bostad Web,Sprint 4,Show/hide on Project card,,Proposed,,6.0,,6.0,,
492,93564,Bostad Web,Sprint 4,Review,,Proposed,,1.0,,1.0,,
493,93565,Bostad Web,Sprint 4,QA,,Proposed,SE-ITN Quality Assurance,6.0,,6.0,,
494,93566,Bostad Web,Sprint 4,Backend updates,,Proposed,,1.0,,1.0,,


In [24]:
df = df.replace("NaN", 0)
df['estimate'].replace('',0)
df['completed'].replace('',0)
df['remaining'].replace('',0)
df['estimate'] = pd.to_numeric(df['estimate'])
df['completed'] = pd.to_numeric(df['completed'])
df['remaining'] = pd.to_numeric(df['remaining'])
#df.info()
df

Unnamed: 0,id,area,iteration,title,state.start,state.end,resource,estimate,completed,remaining,activated,resolved
0,85797,Corporate Web,Sprint 2,Frontend: Add design and columnlogic,Closed,Closed,Sara Winter,16.0,3.0,0.0,2023-09-19T07:03:56.413Z,2023-09-19T14:05:22.377Z
1,85798,Corporate Web,Sprint 2,QA block in various col-widths,Closed,Closed,Max Haase,8.0,5.0,0.0,2023-09-22T06:50:54.54Z,2023-09-26T06:48:09.433Z
2,85800,Corporate Web,Sprint 5,Remove blocktypes,Proposed,Proposed,,6.0,,6.0,,
3,85801,Corporate Web,Sprint 5,QA,Proposed,Proposed,,4.0,,4.0,,
4,85803,Corporate Web,Sprint 5,Remove admin tools,Proposed,Proposed,,6.0,,6.0,,
...,...,...,...,...,...,...,...,...,...,...,...,...
491,93563,Bostad Web,Sprint 4,Show/hide on Project card,,Proposed,,6.0,,6.0,,
492,93564,Bostad Web,Sprint 4,Review,,Proposed,,1.0,,1.0,,
493,93565,Bostad Web,Sprint 4,QA,,Proposed,SE-ITN Quality Assurance,6.0,,6.0,,
494,93566,Bostad Web,Sprint 4,Backend updates,,Proposed,,1.0,,1.0,,


# The dataframe is stored as csv into a file

In [25]:
df.to_csv("sprinttasks.csv", sep=";", decimal=',')