### Download a full Apache project list

In [None]:
import requests

PROJECTS_URL = 'https://issues.apache.org/jira/secure/BrowseProjects.jspa'
payload = {'selectedCategory': 'all'}

resp = requests.get(PROJECTS_URL, params=payload)

### Parse the list and show project names and keys

In [None]:
import json
import re

DATA_HEADER = r'WRM._unparsedData["com.atlassian.jira.project.browse:projects"]="'

header_escaped = re.escape(DATA_HEADER)
json_escaped = re.search(header_escaped + r'(\[.*\])', resp.text).group(1)
projects = json.loads(json_escaped.encode().decode('unicode-escape'))

for project in projects[:10]:
    print(project['name'], project['key'])
print('...')
for project in projects[-10:]:
    print(project['name'], project['key'])
print()
print('In total:', len(projects))

### Query issue stats and output to a file

In [None]:
import time

pattern = re.compile(r'"([\w\-\s]+): (\d+) issues \(([\d\.]+)\%\)"')

def retrieve(url, payload):
    while True:
        try:
            resp = requests.get(url, params=payload)
            return resp
        except Exception as e:
            print('Querying ', payload, type(e), e)
            time.sleep(10)
            continue

def parse(text, labeler):
    filt = { }
    stats = { }
    stats['count'] = 0
    for match in pattern.finditer(text):
        label = match.group(1)
        count = int(match.group(2))
        ratio = float(match.group(3)) / 100
        if not label in filt:
            filt[label] = count
        elif filt[label] == count:
            continue
        else:
            raise LookupError(label + ' has conflicting counts!')
        stats['count'] += count
        if not label in labeler:
            print('New label found: %s (%d)' % (label, count))
            continue
        std_label = labeler[label]
        if std_label in stats:
            stats[std_label] += ratio
        else:
            stats[std_label] = ratio
    return stats

In [None]:
import sys
sys.path.append('../../lib')
from labeler import apache_type, apache_priority

file_name = 'project_issue_stats.csv'

STATS_URL = 'https://issues.apache.org/jira/secure/ConfigureReport.jspa'

queried = set()
try:
    with open(file_name, 'r') as in_file:
        for line in in_file:
            queried.add(line.split(',')[2])
except Exception:
    print('Info: There seem no previous stats.')

with open(file_name, 'a') as out:
    if not queried:
        print('name', 'key', 'id', 'count',
              'feature', 'bug', 'improvement', 'maintenance',
              'high', 'mid', 'low',
              sep=',', file=out)

    payload = {'reportKey': r'com.atlassian.jira.plugin.system.reports:pie-report'}

    for project in projects:
        if str(project['id']) in queried: continue
        
        payload['projectOrFilterId'] = 'project-' + str(project['id'])
        
        for i in range(3):
            payload['statistictype'] = 'issuetype'
            resp = retrieve(STATS_URL, payload)
            type_stats = parse(resp.text, apache_type)
            
            payload['statistictype'] = 'priorities'
            resp = retrieve(STATS_URL, payload)
            priority_stats = parse(resp.text, apache_priority)
            
            print('Queried ' + project['name'])
            if type_stats['count'] == priority_stats['count']:
                if priority_stats['count'] <= 1:
                    print('Warning: Too few issues - ' + resp.url, resp.status_code)
                break
            elif i == 4:
                print("[Error] Failed in retry: " + project['name'], type_stats, priority_stats)
        
        print(project['name'], project['key'], project['id'],
              type_stats['count'],
              type_stats.get('feature', 0), type_stats.get('bug', 0),
              type_stats.get('improvement', 0), type_stats.get('maintenance', 0),
              priority_stats.get('high', 0), priority_stats.get('mid', 0), priority_stats.get('low', 0),
              sep=',', file=out)


**Note: A very small number of projects cannot be queried in this way. Manually check warnings by clicking their links.**

Know inaccurate queries for:

https://issues.apache.org/jira/browse/MYNEWT

https://issues.apache.org/jira/browse/YOKO