In [1]:
import json

import requests
from requests.structures import CaseInsensitiveDict
import pandas as pd

JAN_1_2020 = pd.Timestamp(year=2020, month=1, day=1)
JAN_1_2021 = pd.Timestamp(year=2021, month=1, day=1)

headers = CaseInsensitiveDict()
headers["Accept"] = "application/json"
TOKEN = ""  # This must be filled in with a PAT for GitHub with at least repo scope
headers["Authorization"] = f"Bearer {TOKEN}"
URL = "https://api.github.com/graphql"


In [13]:
query = """\
query cantera_1{{
  repository(name: "{repository}", owner: "Cantera") {{
    issues(orderBy: {{field: CREATED_AT, direction: DESC}}, first: 100{after}) {{
      nodes {{
        createdAt
        closedAt
        closed
        authorAssociation
        number
      }}
      pageInfo {{
        endCursor
        hasNextPage
      }}
    }}
  }}
}}
"""


In [29]:
r = requests.post(
    URL, json={"query": query.format(repository="cantera", after="")}, headers=headers
)
r.raise_for_status()
data = json.loads(r.text)["data"]["repository"]["issues"]
has_next_page = data["pageInfo"]["hasNextPage"]
nodes = data["nodes"]

while has_next_page:
    end_cursor = data["pageInfo"]["endCursor"]
    r = requests.post(
        URL,
        json={
            "query": query.format(repository="cantera", after=f', after:"{end_cursor}"')
        },
        headers=headers,
    )
    r.raise_for_status()
    data = json.loads(r.text)["data"]["repository"]["issues"]
    has_next_page = data["pageInfo"]["hasNextPage"]
    nodes.extend(data["nodes"])

cantera = pd.DataFrame(nodes)


In [30]:
cantera[["createdAt", "closedAt"]] = cantera[["createdAt", "closedAt"]].astype(
    "datetime64"
)
cantera["authorAssociation"] = cantera["authorAssociation"].astype("category")


In [31]:
new_issues = cantera["createdAt"] > JAN_1_2021
print(cantera.loc[new_issues].shape)
new_closed_issues = (cantera["createdAt"] > JAN_1_2021) & (
    cantera["closedAt"] > JAN_1_2021
)
print(cantera.loc[new_closed_issues].shape)


(77, 5)
(62, 5)


In [32]:
for g, v in cantera.loc[new_issues].groupby("authorAssociation"):
    print(g, len(v))


CONTRIBUTOR 14
MEMBER 34
NONE 29


In [33]:
closed_issues_from_2020 = (
    (cantera["createdAt"] > JAN_1_2020)
    & (cantera["createdAt"] < JAN_1_2021)
    & (cantera["closedAt"] > JAN_1_2021)
)
print(cantera.loc[closed_issues_from_2020].shape)


(16, 5)


In [34]:
older_closed_issues = (cantera["createdAt"] < JAN_1_2020) & (
    cantera["closedAt"] > JAN_1_2021
)
print(cantera.loc[older_closed_issues].shape)


(9, 5)


In [36]:
r = requests.post(
    URL,
    json={"query": query.format(repository="enhancements", after="")},
    headers=headers,
)
r.raise_for_status()
data = json.loads(r.text)["data"]["repository"]["issues"]
has_next_page = data["pageInfo"]["hasNextPage"]
nodes = data["nodes"]

while has_next_page:
    end_cursor = data["pageInfo"]["endCursor"]
    r = requests.post(
        URL,
        json={
            "query": query.format(
                repository="enhancements", after=f', after:"{end_cursor}"'
            )
        },
        headers=headers,
    )
    r.raise_for_status()
    data = json.loads(r.text)["data"]["repository"]["issues"]
    has_next_page = data["pageInfo"]["hasNextPage"]
    nodes.extend(data["nodes"])

enhancements = pd.DataFrame(nodes)


In [37]:
enhancements[["createdAt", "closedAt"]] = enhancements[
    ["createdAt", "closedAt"]
].astype("datetime64")
enhancements["authorAssociation"] = enhancements["authorAssociation"].astype("category")


In [39]:
new_issues = enhancements["createdAt"] > JAN_1_2021
print(enhancements.loc[new_issues].shape)
new_closed_issues = (enhancements["createdAt"] > JAN_1_2021) & (
    enhancements["closedAt"] > JAN_1_2021
)
print(enhancements.loc[new_closed_issues].shape)


(33, 5)
(9, 5)


In [40]:
closed_issues_from_2020 = (
    (enhancements["createdAt"] > JAN_1_2020)
    & (enhancements["createdAt"] < JAN_1_2021)
    & (enhancements["closedAt"] > JAN_1_2021)
)
print(enhancements.loc[closed_issues_from_2020].shape)


(11, 5)


In [44]:
r = requests.post(
    URL,
    json={"query": query.format(repository="cantera-jupyter", after="")},
    headers=headers,
)
r.raise_for_status()
data = json.loads(r.text)["data"]["repository"]["issues"]
has_next_page = data["pageInfo"]["hasNextPage"]
nodes = data["nodes"]

while has_next_page:
    end_cursor = data["pageInfo"]["endCursor"]
    r = requests.post(
        URL,
        json={
            "query": query.format(
                repository="cantera-jupyter", after=f', after:"{end_cursor}"'
            )
        },
        headers=headers,
    )
    r.raise_for_status()
    data = json.loads(r.text)["data"]["repository"]["issues"]
    has_next_page = data["pageInfo"]["hasNextPage"]
    nodes.extend(data["nodes"])

jupyter = pd.DataFrame(nodes)
jupyter[["createdAt", "closedAt"]] = jupyter[["createdAt", "closedAt"]].astype(
    "datetime64"
)
jupyter["authorAssociation"] = jupyter["authorAssociation"].astype("category")
new_issues = jupyter["createdAt"] > JAN_1_2021
print(jupyter.loc[new_issues].shape)
new_closed_issues = (jupyter["createdAt"] > JAN_1_2021) & (
    jupyter["closedAt"] > JAN_1_2021
)
print(jupyter.loc[new_closed_issues].shape)
closed_issues_from_2020 = (
    (jupyter["createdAt"] > JAN_1_2020)
    & (jupyter["createdAt"] < JAN_1_2021)
    & (jupyter["closedAt"] > JAN_1_2021)
)
print(jupyter.loc[closed_issues_from_2020].shape)
older_closed_issues = (jupyter["createdAt"] < JAN_1_2020) & (
    jupyter["closedAt"] > JAN_1_2021
)
print(jupyter.loc[older_closed_issues].shape)


(0, 5)
(0, 5)
(1, 5)
(0, 5)


In [45]:
r = requests.post(
    URL,
    json={"query": query.format(repository="cantera-website", after="")},
    headers=headers,
)
r.raise_for_status()
data = json.loads(r.text)["data"]["repository"]["issues"]
has_next_page = data["pageInfo"]["hasNextPage"]
nodes = data["nodes"]

while has_next_page:
    end_cursor = data["pageInfo"]["endCursor"]
    r = requests.post(
        URL,
        json={
            "query": query.format(
                repository="cantera-website", after=f', after:"{end_cursor}"'
            )
        },
        headers=headers,
    )
    r.raise_for_status()
    data = json.loads(r.text)["data"]["repository"]["issues"]
    has_next_page = data["pageInfo"]["hasNextPage"]
    nodes.extend(data["nodes"])

website = pd.DataFrame(nodes)
website[["createdAt", "closedAt"]] = website[["createdAt", "closedAt"]].astype(
    "datetime64"
)
website["authorAssociation"] = website["authorAssociation"].astype("category")
new_issues = website["createdAt"] > JAN_1_2021
print(website.loc[new_issues].shape)
new_closed_issues = (website["createdAt"] > JAN_1_2021) & (
    website["closedAt"] > JAN_1_2021
)
print(website.loc[new_closed_issues].shape)
closed_issues_from_2020 = (
    (website["createdAt"] > JAN_1_2020)
    & (website["createdAt"] < JAN_1_2021)
    & (website["closedAt"] > JAN_1_2021)
)
print(website.loc[closed_issues_from_2020].shape)
older_closed_issues = (website["createdAt"] < JAN_1_2020) & (
    website["closedAt"] > JAN_1_2021
)
print(website.loc[older_closed_issues].shape)


(16, 5)
(9, 5)
(0, 5)
(3, 5)


In [46]:
query = """\
query cantera_pull_requests {{
  repository(name: "{repository}", owner: "cantera") {{
    pullRequests(first: 100, orderBy: {{field: CREATED_AT, direction: DESC}}{after}) {{
      nodes {{
        author {{
          login
        }}
        authorAssociation
        createdAt
        number
        merged
        mergedAt
        mergedBy {{
          login
        }}
        closed
        closedAt
        state
        title
      }}
      pageInfo {{
        endCursor
        hasNextPage
      }}
    }}
  }}
}}
"""


In [56]:
r = requests.post(
    URL, json={"query": query.format(repository="cantera", after="")}, headers=headers
)
r.raise_for_status()
data = json.loads(r.text)["data"]["repository"]["pullRequests"]
has_next_page = data["pageInfo"]["hasNextPage"]
nodes = data["nodes"]

while has_next_page:
    end_cursor = data["pageInfo"]["endCursor"]
    r = requests.post(
        URL,
        json={
            "query": query.format(repository="cantera", after=f', after:"{end_cursor}"')
        },
        headers=headers,
    )
    r.raise_for_status()
    data = json.loads(r.text)["data"]["repository"]["pullRequests"]
    has_next_page = data["pageInfo"]["hasNextPage"]
    nodes.extend(data["nodes"])

for n in nodes:
    n["author"] = n["author"]["login"]
    if n.get("mergedBy", False):
        n["mergedBy"] = n["mergedBy"]["login"]

cantera_pr = pd.DataFrame(nodes)
cantera_pr[["createdAt", "closedAt", "mergedAt"]] = cantera_pr[
    ["createdAt", "closedAt", "mergedAt"]
].astype("datetime64")
cantera_pr[["authorAssociation", "state"]] = cantera_pr[
    ["authorAssociation", "state"]
].astype("category")
new_prs = cantera_pr["createdAt"] > JAN_1_2021
print(cantera_pr.loc[new_prs].shape)
new_closed_prs = (cantera_pr["createdAt"] > JAN_1_2021) & (
    cantera_pr["closedAt"] > JAN_1_2021
)
print(cantera_pr.loc[new_closed_prs].shape)
closed_prs_from_2020 = (
    (cantera_pr["createdAt"] > JAN_1_2020)
    & (cantera_pr["createdAt"] < JAN_1_2021)
    & (cantera_pr["closedAt"] > JAN_1_2021)
)
print(cantera_pr.loc[closed_prs_from_2020].shape)
older_closed_prs = (cantera_pr["createdAt"] < JAN_1_2020) & (
    cantera_pr["closedAt"] > JAN_1_2021
)
print(cantera_pr.loc[older_closed_prs].shape)
for g, v in cantera_pr.loc[new_prs].groupby("authorAssociation"):
    print(g, len(v))
for g, v in cantera_pr.loc[new_prs].groupby("author"):
    print(g, len(v))
for g, v in cantera_pr.loc[new_prs].groupby("mergedBy"):
    print(g, len(v))



(127, 11)
(111, 11)
(10, 11)
(3, 11)
CONTRIBUTOR 23
FIRST_TIME_CONTRIBUTOR 6
MEMBER 98
NONE 0
12Chao 2
BangShiuh 1
ChrisBNEU 1
DavidAkinpelu 1
anthony-walker 2
band-a-prend 1
bryanwweber 14
chinahg 2
d-e-t 1
decaluwe 2
dschmider-HSOG 1
gkogekar 2
hallaali 1
ischoegl 37
jongyoonbae 2
lavdwall 3
lavrenyukiv 1
leesharma 1
mazeau 1
mefuller 6
paulblum 2
speth 41
stijn76 1
tpg2114 1
bryanwweber 23
decaluwe 5
ischoegl 26
speth 50


In [54]:
r = requests.post(
    URL, json={"query": query.format(repository="cantera-jupyter", after="")}, headers=headers
)
r.raise_for_status()
data = json.loads(r.text)["data"]["repository"]["pullRequests"]
has_next_page = data["pageInfo"]["hasNextPage"]
nodes = data["nodes"]

while has_next_page:
    end_cursor = data["pageInfo"]["endCursor"]
    r = requests.post(
        URL,
        json={
            "query": query.format(repository="cantera-jupyter", after=f', after:"{end_cursor}"')
        },
        headers=headers,
    )
    r.raise_for_status()
    data = json.loads(r.text)["data"]["repository"]["pullRequests"]
    has_next_page = data["pageInfo"]["hasNextPage"]
    nodes.extend(data["nodes"])

for n in nodes:
    n["author"] = n["author"]["login"]
    if n.get("mergedBy", False):
        n["mergedBy"] = n["mergedBy"]["login"]

jupyter_pr = pd.DataFrame(nodes)
jupyter_pr[["createdAt", "closedAt", "mergedAt"]] = jupyter_pr[
    ["createdAt", "closedAt", "mergedAt"]
].astype("datetime64")
jupyter_pr[["authorAssociation", "state"]] = jupyter_pr[
    ["authorAssociation", "state"]
].astype("category")
new_prs = jupyter_pr["createdAt"] > JAN_1_2021
print(jupyter_pr.loc[new_prs].shape)
new_closed_prs = (jupyter_pr["createdAt"] > JAN_1_2021) & (
    jupyter_pr["closedAt"] > JAN_1_2021
)
print(jupyter_pr.loc[new_closed_prs].shape)
closed_prs_from_2020 = (
    (jupyter_pr["createdAt"] > JAN_1_2020)
    & (jupyter_pr["createdAt"] < JAN_1_2021)
    & (jupyter_pr["closedAt"] > JAN_1_2021)
)
print(jupyter_pr.loc[closed_prs_from_2020].shape)
older_closed_prs = (jupyter_pr["createdAt"] < JAN_1_2020) & (
    jupyter_pr["closedAt"] > JAN_1_2021
)
print(jupyter_pr.loc[older_closed_prs].shape)
for g, v in jupyter_pr.loc[new_prs].groupby("authorAssociation"):
    print(g, len(v))
for g, v in jupyter_pr.loc[new_prs].groupby("author"):
    print(g, len(v))
for g, v in jupyter_pr.loc[new_prs].groupby("mergedBy"):
    print(g, len(v))


(3, 11)
(1, 11)
(1, 11)
(0, 11)
CONTRIBUTOR 0
FIRST_TIME_CONTRIBUTOR 0
MEMBER 3
NONE 0
bryanwweber 2
gkogekar 1
decaluwe 1


In [55]:
r = requests.post(
    URL, json={"query": query.format(repository="cantera-website", after="")}, headers=headers
)
r.raise_for_status()
data = json.loads(r.text)["data"]["repository"]["pullRequests"]
has_next_page = data["pageInfo"]["hasNextPage"]
nodes = data["nodes"]

while has_next_page:
    end_cursor = data["pageInfo"]["endCursor"]
    r = requests.post(
        URL,
        json={
            "query": query.format(repository="cantera-website", after=f', after:"{end_cursor}"')
        },
        headers=headers,
    )
    r.raise_for_status()
    data = json.loads(r.text)["data"]["repository"]["pullRequests"]
    has_next_page = data["pageInfo"]["hasNextPage"]
    nodes.extend(data["nodes"])

for n in nodes:
    n["author"] = n["author"]["login"]
    if n.get("mergedBy", False):
        n["mergedBy"] = n["mergedBy"]["login"]

website_pr = pd.DataFrame(nodes)
website_pr[["createdAt", "closedAt", "mergedAt"]] = website_pr[
    ["createdAt", "closedAt", "mergedAt"]
].astype("datetime64")
website_pr[["authorAssociation", "state"]] = website_pr[
    ["authorAssociation", "state"]
].astype("category")
new_prs = website_pr["createdAt"] > JAN_1_2021
print(website_pr.loc[new_prs].shape)
new_closed_prs = (website_pr["createdAt"] > JAN_1_2021) & (
    website_pr["closedAt"] > JAN_1_2021
)
print(website_pr.loc[new_closed_prs].shape)
closed_prs_from_2020 = (
    (website_pr["createdAt"] > JAN_1_2020)
    & (website_pr["createdAt"] < JAN_1_2021)
    & (website_pr["closedAt"] > JAN_1_2021)
)
print(website_pr.loc[closed_prs_from_2020].shape)
older_closed_prs = (website_pr["createdAt"] < JAN_1_2020) & (
    website_pr["closedAt"] > JAN_1_2021
)
print(website_pr.loc[older_closed_prs].shape)
for g, v in website_pr.loc[new_prs].groupby("authorAssociation"):
    print(g, len(v))
for g, v in website_pr.loc[new_prs].groupby("author"):
    print(g, len(v))
for g, v in website_pr.loc[new_prs].groupby("mergedBy"):
    print(g, len(v))


(23, 11)
(20, 11)
(4, 11)
(1, 11)
CONTRIBUTOR 7
FIRST_TIME_CONTRIBUTOR 0
MEMBER 16
NONE 0
12Chao 1
band-a-prend 1
bryanwweber 5
chinahg 1
ischoegl 3
jiweiqi 2
jsantner 1
kyleniemeyer 1
mefuller 2
speth 6
bryanwweber 12
decaluwe 2
speth 5


In [58]:
all_prs = pd.concat([cantera_pr, jupyter_pr, website_pr], ignore_index=True)
all_authors = {g: len(v) for g, v in all_prs.loc[all_prs["createdAt"] > JAN_1_2021].groupby("author")}
print(all_authors)
print(len(all_authors))

{'12Chao': 3, 'BangShiuh': 1, 'ChrisBNEU': 1, 'DavidAkinpelu': 1, 'anthony-walker': 2, 'band-a-prend': 2, 'bryanwweber': 21, 'chinahg': 3, 'd-e-t': 1, 'decaluwe': 2, 'dschmider-HSOG': 1, 'gkogekar': 3, 'hallaali': 1, 'ischoegl': 40, 'jiweiqi': 2, 'jongyoonbae': 2, 'jsantner': 1, 'kyleniemeyer': 1, 'lavdwall': 3, 'lavrenyukiv': 1, 'leesharma': 1, 'mazeau': 1, 'mefuller': 8, 'paulblum': 2, 'speth': 47, 'stijn76': 1, 'tpg2114': 1}
27
