Use a note book so debugging is quicker

In [1]:
import sys 
import settings
project_root = str(settings.get_project_root())
sys.path.append(project_root)

In [2]:
from pystackql import StackQL
import json 
import os
import pandas as pd
import dotenv
import base64
from IPython.display import display
from stackqueries import GitHubQueries

In [3]:
dotenv.load_dotenv()

True

In [4]:
auth = { 
    "github": 
      { "type": "basic", "credentialsenvvar": "GITHUB_CREDS" }
}
iql = StackQL(auth=json.dumps(auth))

In [5]:
def get_dataframe_from_query(query: str) -> pd.DataFrame :
    res = iql.execute(query)
    try: 
        if not res:
            raise TypeError(res)
        res_obj = json.loads(res)
        if not res_obj:
            raise TypeError(res)
        if "error" in res_obj:
            raise AttributeError(res)
        data = pd.DataFrame(res_obj)
        return data
    except Exception as error:
        if not isinstance(error, TypeError):
            error.args = ('StackQL execute error with error: %s, res: %s, error type: %s, res type: %s' %(error, res, type(error), type(res)), *error.args)
        raise error


In [6]:
## pull github registry
registry_res = iql.execute('REGISTRY LIST;')
registry_list = json.loads(registry_res)
github_registry = list(filter(lambda reg: reg["provider"] == 'github', registry_list))[0]
github_version = github_registry['version']

pull_provider_query = """
REGISTRY PULL github %s;
""" % github_version

res = iql.execute(pull_provider_query)


## Team Analytics

### Backlog efficiency

#### Developer throughput and work in progress
Team Leads can ensure that developers:
- Have an active branch.
- Developers who do not have an active branch might not have picked up a new story or might be stuck.
- Do NOT have too many active branches.


#### Open Pull Requests

In [7]:
owner = 'daos-stack'
repo = 'daos'

In [8]:
def list_pull_request_numbers(owner, repo):
    query = GitHubQueries.get_pull_request_numbers % (owner, repo)
    print(query)
    data = get_dataframe_from_query(query)
    return data

In [9]:
pull_requests_numbers = list_pull_request_numbers(owner=owner, repo=repo)


    SELECT 
    id,
    number
    from github.pulls.pull_requests   
    WHERE
    owner = 'daos-stack' AND repo = 'daos';
    


In [10]:
display(pull_requests_numbers.head())

Unnamed: 0,id,number
0,1009626152,9821
1,1009625043,9820
2,1009438018,9819
3,1009434480,9818
4,1009418814,9817


In [11]:
def get_open_pull_requests_by_number(owner, repo, pull_number):
    query = GitHubQueries.get_open_request_with_cycle_time % (owner, repo, pull_number)
    data = get_dataframe_from_query(query)
    return data

def get_open_pull_requests(owner, repo):
    pull_requests_numbers = list_pull_request_numbers(owner, repo)
    numbers = pull_requests_numbers['number'].to_list()
    open_pull_requests = pd.DataFrame()
    for number in numbers:
        pull_request = get_open_pull_requests_by_number(owner, repo, number)
        open_pull_requests = pd.concat([open_pull_requests, pull_request])
    return open_pull_requests

In [15]:
pull_request = get_open_pull_requests_by_number(owner=owner, repo=repo, pull_number=9821)
display(pull_request)

Unnamed: 0,changed_files,created_at,issued_days,merge_commit_sha,merged,merged_at,number,state,title,updated_at
0,1,2022-07-27T12:51:48Z,0,e9c6cc2c433bab5adf80ab7ebcad195f472aecd9,False,,9821,open,DAOS-10967 vos: coverity issue fix (#9681),2022-07-27T12:53:17Z


In [None]:
open_pull_requests = get_open_pull_requests(owner, repo)
display(open_pull_requests.head())

Active branch:
- branches that has been updated with in 5 days
- branches that is not merged in

Might need to get the commits of the branch and check the latest one


In [45]:
UPDATE_THRESHOLD = 7 ##only branches has been updated within 5 days consider active
def list_branches_with_update_date(org_name, repo):
    query = """
    select 
    b.name, 
    round(julianday('now') - julianday(JSON_EXTRACT(c.commit, '$.author.date'))) as days_after_last_updated
    from github.repos.branches b 
    JOIN github.repos.commits c
    ON JSON_EXTRACT(b.commit, '$.sha') = c.sha
    WHERE
    owner = '%s'
    AND
    repo = '%s'
    ;
    """ % (org_name, repo)
    print(query)
    data = get_dataframe_from_query(query)
    return data
branches = list_branches_with_update_date('daos-stack', 'daos')


    select 
    b.name, 
    round(julianday('now') - julianday(JSON_EXTRACT(c.commit, '$.author.date'))) as days_after_last_updated
    from github.repos.branches b 
    JOIN github.repos.commits c
    ON JSON_EXTRACT(b.commit, '$.sha') = c.sha
    WHERE
    owner = 'daos-stack'
    AND
    repo = 'daos'
    AND 
    ref = '85c774f6e1bbc9dd59ed81d2137e2d7d002d9d54'
    ;
    


In [46]:
display(branches)
branches['days_after_last_updated'] = pd.to_numeric(branches['days_after_last_updated'], errors='coerce')
active_branches = branches[branches['days_after_last_updated'] < UPDATE_THRESHOLD]
display(active_branches)


Unnamed: 0,days_after_last_updated,name
0,0,pahender/DAOS-11194


Unnamed: 0,days_after_last_updated,name
0,0,pahender/DAOS-11194


Check a branch is merged in
- use github.repos.commit_pull_requests resource
- make use the branch commit_sha to join commit_pull_requests
- commit_pull_requests merged_at is not null
- So active branch = the branch in recent updated branch but not in merged in branch

TODO: improve this by using outer join, when new version of stackql released

Get commits of a branch:

Use [commit_branches](https://registry.stackql.io/providers/github/repos/commit_branches/) resource to get the commits of the branch