In [2]:
import requests
import datetime
import os

GH_API_TOKEN = os.environ['GITHUB_PAT']

DEBUG = False

def parse_gh_date(date):
    return datetime.datetime.strptime(date, "%Y-%m-%dT%H:%M:%SZ")

def get_all_prs():
    """ Get all open PRs from the Apache Arrow repo"""
    prs = []
    page_num=1
    while True:
            open_prs = requests.get("https://api.github.com/repos/apache/arrow/pulls?per_page=100&page=%s" % page_num, 
    headers={"Accept": "application/vnd.github.v3+json", "Authorization": f"token {GH_API_TOKEN}"})
            prs.extend(open_prs.json())

            # the response is paginated, so we need to check for a link to the next page
            if 'rel="next"' in open_prs.headers["Link"]:
                page_num += 1
            else:
                break
    return prs  

def extract_prs(prs, component):
    """ Extract PRs labelled with a particular component from a list of PRs 
    
    Parameters
    ------------
        prs: list
            JSON list of PRs returned from GitHub API call
        
    Return
    -----------
        r_prs : list
            JSON list of PRs returned from GitHub API call that have the specified `component` label
    
    
    """
    relevant_prs = []
    for pr in prs:
        for label in pr['labels']:
            if component in label['name']:
                pr["created_at"] = parse_gh_date(pr["created_at"])
                relevant_prs.append(pr)
                break
    return relevant_prs

def get_all_issues(component_label):
    """ Get all open issues from the Apache Arrow repo that are labelled with a particular component"""
    issues = []
    page_num=1
    while True:
            open_issues = requests.get("https://api.github.com/repos/apache/arrow/issues?labels=%s&per_page=100&page=%s" % (component_label, page_num), 
    headers={"Accept": "application/vnd.github.v3+json", "Authorization": f"token {GH_API_TOKEN}"})
            issues.extend(open_issues.json())

            # the response is paginated, so we need to check for a link to the next page
            if 'rel="next"' in open_issues.headers["Link"]:
                page_num += 1
            else:
                break
                
    # GitHub consider both issues and PRs as issues, so we should filter out the PRs            
    not_prs = []
    for i in issues:
        if 'pull_request' not in i:
            not_prs.append(i)
    
    return not_prs

def extract_bugs(issues):
    
    """ Extract PRs labelled 'Type: bug' and their triage status from a list of issues 
    
    Parameters
    ------------
        r_issues: list
            JSON list of issues returned from GitHub API call
        
    Return
    -----------
        triage_status : dictionary
            Dictionary containing items "triaged" and "untriaged" based 
             on whether bugs have a "Priority" label assigned or not
    
    
    """
    
    # get all bugs
    bugs = []
    for i in issues:
        for label in i['labels']:
            if label['name'] == 'Type: bug':
                bugs.append(i)
                break
    
    # get triage status
    triage_status = {"triaged": {"blocker": [], "critical": [], "other": []}, "untriaged": []}
    for bug in bugs:
        assigned = False
        bug["created_at"] = parse_gh_date(bug['created_at'])
        for label in bug['labels']:
            if label['name'].startswith('Priority: Blocker'):
                triage_status["triaged"]["blocker"].append(bug)
                assigned = True
                break
            elif label['name'].startswith('Priority: Critical'):
                triage_status["triaged"]["critical"].append(bug)
                assigned = True
                break
            elif label['name'].startswith('Priority'):
                triage_status["triaged"]["other"].append(bug)
                assigned = True
                break
        if assigned == False:
            triage_status["untriaged"].append(bug)
            
    return triage_status

def summarise_issue(issue):
    msg = "• <%s|GH-%s: %s>" %  (issue["html_url"], issue["number"], issue["title"]) + "\n"
    return msg

def generate_message(bugs, prs):
    
    one_month = (datetime.datetime.now() - datetime.timedelta(days = 31))
    two_months = (datetime.datetime.now() - datetime.timedelta(days = 62))
    past_month = [bug for bug in bugs["untriaged"] if bug['created_at'] > one_month]
    older_untriaged_bugs = [bug for bug in bugs["untriaged"] if bug['created_at'] <= one_month]

    past_month_message = "\n"
    if(len(past_month) > 0):
        for bug in past_month:
            # need backticks around the title for lazy escaping of special characters
            past_month_message += summarise_issue(bug)
    else:
        past_month_message += "\n* _No bugs to triage!_"

    blockers_message = ""
    blockers = [bug for bug in bugs["triaged"]["blocker"] if bug['created_at'] > two_months]
    for bug in blockers:
        blockers_message += summarise_issue(bug)

    critical_message = ""
    critical = [bug for bug in bugs["triaged"]["critical"] if bug['created_at'] > two_months]

    for bug in critical:
        critical_message += summarise_issue(bug)

    others_message = ""
    others = [bug for bug in bugs["triaged"]["other"] if bug['created_at'] > two_months]
    for bug in others:
        others_message += summarise_issue(bug)

    older = []
    older.extend([bug for bug in bugs["triaged"]["blocker"] if bug['created_at'] < two_months])
    older.extend([bug for bug in bugs["triaged"]["critical"] if bug['created_at'] < two_months])
    older.extend([bug for bug in bugs["triaged"]["other"] if bug['created_at'] < two_months])
    
    
    #### start of section to replace
    prs_message = ""
    for pr in prs:
        prs_message += summarise_issue(pr)
    
    message = {"to_triage": "", "to_fix": "", "prs": ""}
    message["to_triage"] += ":beetle: *Need triage (%s)* :beetle:" % len(past_month) + "\n"
    message["to_triage"] += past_month_message + "\n"
    message["to_triage"] += ("*Untriaged bugs older than 1 month: <https://github.com/apache/arrow/issues?q=is%3Aissue+is%3Aopen+label%3A%22Component%3A+R%22+label%3A%22Type%3A+bug%22+-label%3A%22Priority%3A+Critical%22+-label%3A%22Priority%3A+Medium%22+-label%3A%22Priority%3A+Blocker%22|[" + str(len(older_untriaged_bugs)) + "]>*" + "\n\n")
    message["to_fix"] += "🐜 *Need fix (%s)* 🐜" % (len(blockers) + len(critical) + len(others)) + "\n"
    message["to_fix"] += "*Blockers (%s)*" % len(blockers) + "\n"
    message["to_fix"] += blockers_message + "\n"
    message["to_fix"] += "*Critical (%s)*" % len(critical) + "\n"
    message["to_fix"] += critical_message + "\n"
    message["to_fix"] += "*Others (%s)*" % len(others) + "\n"
    message["to_fix"] += others_message + "\n"
    message["to_fix"] += "*Triaged bugs older than 2 months: " 
    message["to_fix"] += "<https://github.com/apache/arrow/issues?q=is%3Aissue+is%3Aopen+label%3A%22Component%3A+R%22+label%3A%22Type%3A+bug%22+label%3A%22Priority%3A+Critical%22%2C%22Priority%3A+Medium%22%2C%22Priority%3A+Blocker%22|[" + str(len(older)) + "]>*" + "\n\n"
    message["prs"] += "*🤝 Open PRs (%s)*" % len(prs) + " 🤝 \n"
    message["prs"] += prs_message + "\n"
    
    
    ### end of section to replace
    return message

def send_message(message, webhook):
    
    for component in ["to_triage", "to_fix", "prs"]:    
        resp = requests.post(webhook, json={            
            "blocks": [
                {
                    "type": "section",
                    "text": {
                        "type": "mrkdwn",
                        "text": message[component]
                    }
                },
                {
                        "type": "divider"
                }
            ]        
        })

        print("\tSLACK ANSWER", resp.content)    




In [None]:
if __name__ == "__main__":
    
    component = "Component: Python"
    
    # Get PRs
    prs = get_all_prs()
    open_r_prs = extract_prs(prs, component)
    
    # Get issues
    r_issues = get_all_issues(component)
    bugs = extract_bugs(r_issues)
    
    # Generate and send message
    message = generate_message(bugs, open_r_prs)

In [12]:
# get most recent 10 untriaged bugs
[(issue["html_url"], issue["number"], issue["title"]) for issue in bugs["untriaged"][:10]]

[('https://github.com/apache/arrow/issues/40886',
  40886,
  '[R] Cryptic error when creating Arrow array from POSIXct with invalid time zones'),
 ('https://github.com/apache/arrow/issues/40742',
  40742,
  '[R] fix max_rows_per_group must be a positive number'),
 ('https://github.com/apache/arrow/issues/40723',
  40723,
  '[R] write_dataset returns nothing'),
 ('https://github.com/apache/arrow/issues/40711',
  40711,
  '[R] Error when saving attached data.frame containing nested data.frame (character versus all NA logical column)'),
 ('https://github.com/apache/arrow/issues/40678',
  40678,
  '[R] s3_bucket crashes RStudio in 15.0.1'),
 ('https://github.com/apache/arrow/issues/40632',
  40632,
  "[R] I can't update from arrow 13 to 14, and 15."),
 ('https://github.com/apache/arrow/issues/40627',
  40627,
  '[R] In R/MacOS writing a parquet to a non existant path crashes'),
 ('https://github.com/apache/arrow/issues/40624',
  40624,
  '[R] Collect crashes on R when partioning col is in 

In [15]:
# get oldest 10 untriaged bugs
n_issues = len(bugs["untriaged"])


[('https://github.com/apache/arrow/issues/30245',
  30245,
  "[C++] unify_schemas can't handle int64 + double, affects CSV dataset"),
 ('https://github.com/apache/arrow/issues/30096',
  30096,
  '[R] [CI] Consider installing release from our repo + RSPM'),
 ('https://github.com/apache/arrow/issues/29005',
  29005,
  '[R][Python] DLL ImportError loading pyarrow with reticulate'),
 ('https://github.com/apache/arrow/issues/28749',
  28749,
  "[Dev] r_valgrind image doesn't use full parallelism"),
 ('https://github.com/apache/arrow/issues/28343',
  28343,
  '[R][C++][Packaging] Illegal opcode error on aggregate Array/ChunkedArray of integer'),
 ('https://github.com/apache/arrow/issues/28293',
  28293,
  '[R] Writing to Parquet from tibble Consumes Large Amount of Memory'),
 ('https://github.com/apache/arrow/issues/27825',
  27825,
  '[R] Build fails if dataset enabled but parquet is not'),
 ('https://github.com/apache/arrow/issues/26378',
  26378,
  "[R] If pkg-config finds arrow on defaul

In [18]:
# What PRs need merging/reviewing
[(pr["html_url"], pr["number"], pr["title"]) for pr in open_r_prs]

[('https://github.com/apache/arrow/pull/40982',
  40982,
  'GH-40547: [R][Docs] Add a non-technical introductory R vignette to the functioning of arrow'),
 ('https://github.com/apache/arrow/pull/40356',
  40356,
  ' GH-40342: [C++] move LocalFileSystem to the registry '),
 ('https://github.com/apache/arrow/pull/40226',
  40226,
  'GH-34535: [C++] Move `ChunkResolver` to the public API'),
 ('https://github.com/apache/arrow/pull/39443',
  39443,
  'GH-39273: [R] Add character to dictionary conversion path'),
 ('https://github.com/apache/arrow/pull/39438',
  39438,
  'GH-39645: [Python] Fix read_table for encrypted parquet'),
 ('https://github.com/apache/arrow/pull/38720',
  38720,
  'Update red-arrow.gemspec to add xsimd msys2 dependency'),
 ('https://github.com/apache/arrow/pull/38584',
  38584,
  'GH-38558: [C++] Add support for null sort option per sort key'),
 ('https://github.com/apache/arrow/pull/37811',
  37811,
  'GH-37804: [R] Fix with_language test helper')]