In [1]:
import pandas as pd
import sys
sys.path.append('../..')

from utils.dataloader import get_issues

issues = get_issues()

In [2]:
open_issues = issues[issues['prop:state'] == 'open']
# find the percent of open issues that are open at least one year
open_issues_one_year = open_issues[open_issues['prop:resolution'] >= 365]
percent_open_issues_one_year = (open_issues_one_year.shape[0] / open_issues.shape[0]) * 100
print(f"Percent of open issues that are open for at least one year: {percent_open_issues_one_year:.2f}%")

Percent of open issues that are open for at least one year: 79.84%


In [3]:
# resolution max
closed_issues = issues[issues['prop:state'] == 'closed']
max_resolution_issue = closed_issues.loc[closed_issues['prop:resolution'].idxmax()]
print(f"Max resolution time: {max_resolution_issue['prop:resolution']}, {max_resolution_issue['html_url']} days")

# median
print(f"Median resolution time: {closed_issues['prop:resolution'].median()} days")

# 20th percentile
print(f"20th percentile resolution time: {closed_issues['prop:resolution'].quantile(0.2)} days")

# 80th percentile
print(f"80th percentile resolution time: {closed_issues['prop:resolution'].quantile(0.8)} days")

Max resolution time: 3350.830972222222, https://github.com/facebook/infer/issues/8 days
Median resolution time: 5.370891203703704 days
20th percentile resolution time: 0.07823148148148155 days
80th percentile resolution time: 161.9828217592594 days


In [4]:
# median number of comments
median_comments = issues['prop:comments'].median()
print(f"Median number of comments: {median_comments}")
# quantiles
quantiles = issues['prop:comments'].quantile([0.25, 0.5, 0.75])
print(f"Quantiles of number of comments:\n{quantiles}")

# percent of issues that have less than 3 comments
less_than_3_comments = issues[issues['prop:comments'] < 3].shape[0]
total_issues = issues.shape[0]
percent_less_than_3_comments = (less_than_3_comments / total_issues) * 100
print(f"Percent of issues with less than 3 comments: {percent_less_than_3_comments:.2f}%")

# get the issues with the most comments
most_commented_issues = issues.nlargest(10, 'prop:comments')
print("Most commented issues:")
for index, row in most_commented_issues.iterrows():
    print(f"{row['html_url']} - {row['prop:comments']} comments - {row['prop:category']}")

Median number of comments: 2.0
Quantiles of number of comments:
0.25    1.0
0.50    2.0
0.75    5.0
Name: prop:comments, dtype: float64
Percent of issues with less than 3 comments: 50.55%
Most commented issues:
https://github.com/pmd/pmd/issues/2868 - 86 comments - enhancement
https://github.com/facebook/infer/issues/34 - 77 comments - question
https://github.com/spotbugs/spotbugs/issues/8 - 75 comments - enhancement
https://github.com/phpstan/phpstan/issues/1010 - 61 comments - enhancement
https://github.com/phpstan/phpstan/issues/67 - 59 comments - bug
https://github.com/phpstan/phpstan/issues/3931 - 58 comments - bug
https://github.com/phpstan/phpstan/issues/786 - 54 comments - bug
https://github.com/pmd/pmd/issues/419 - 53 comments - enhancement
https://github.com/phpstan/phpstan/issues/4072 - 52 comments - question
https://github.com/wala/WALA/issues/99 - 52 comments - bug


In [5]:
# unique users
unique_users = issues['prop:users'].nunique()
# percent less than 2 users
less_than_2_users = issues[issues['prop:users'] < 2].shape[0]
percent_less_than_2_users = (less_than_2_users / total_issues) * 100
print(f"Percent of issues with less than 2 unique users: {percent_less_than_2_users:.2f}%")

Percent of issues with less than 2 unique users: 42.86%


In [6]:
# print the number of issues with no files changed
no_files_changed = issues[issues['prop:files'] == 0].shape[0]
print(f"Number of issues with no files changed: {no_files_changed/issues.shape[0] * 100:.2f}%")

# median of number of files changed (only those with at least one file changed)
median_files_changed = issues[issues['prop:files'] > 0]['prop:files'].median()
print(f"Median number of files changed: {median_files_changed}")
# 75th percentile of number of files changed
quantile_files_changed = issues[issues['prop:files'] > 0]['prop:files'].quantile(0.75)
print(f"75th percentile of number of files changed: {quantile_files_changed}")

Number of issues with no files changed: 84.01%
Median number of files changed: 5.0
75th percentile of number of files changed: 18.0
