In [None]:
import pandas as pd
from github import Github, Auth
import os
from dotenv import dotenv_values
import sys
sys.path.append('../../')
from utils.dataloader import get_issues, get_pull_requests

In [2]:
config = dotenv_values("../../.env")
auth = Auth.Token(config['GITHUB_TOKEN'])
gh = Github(auth=auth)

In [None]:
data = []
repos = pd.read_csv('../../results/csv/general/repositories.csv')
issues = pd.DataFrame(get_issues())
pull_requests = pd.DataFrame(get_pull_requests())

for index, _repo in repos.iterrows():
    print(f"Processing repository: {_repo['Repository']}")
    if _repo['Host'] == 'Github':
        repo = gh.get_repo(_repo['Repository'])
        stars_count = repo.stargazers_count
    else:
        stars_count = 0
    
    issues_count = 0
    pull_requests_count = 0
    commits_count = 0

    for filename in os.listdir('../../data/issues'):
        if _repo['Repository'].split('/')[-1] in filename:
            issues_count += 1

    for filename in os.listdir('../../data/pull_requests'):
        if _repo['Repository'].split('/')[-1] in filename:
            pull_requests_count += 1

    for filename in os.listdir('../../data/commits'):
        if _repo['Repository'].split('/')[-1] in filename:
            commits_count += 1

    # many issue and pr users are the same, so remove duplicates
    issue_usernames = issues[(issues['repo'] == _repo['Repository']) & (issues['user'].notnull())]['user'].unique()
    pull_request_usernames = pull_requests[(pull_requests['repo'] == _repo['Repository']) & (pull_requests['user'].notnull())]['user'].unique()

    unique_users = len(set(issue_usernames) | set(pull_request_usernames))

    data.append( {
        'Repository': _repo['Repository'],
        'Stars': stars_count,
        'Issues': issues_count,
        'Pull Requests': pull_requests_count,
        'Commits': commits_count,
        'Unique Users': unique_users,
        'Host': _repo['Host']
    })

totals = pd.DataFrame(data).sum()

data.append({
    'Repository': 'Total',
    'Stars': totals['Stars'],
    'Issues': totals['Issues'],
    'Pull Requests': totals['Pull Requests'],
    'Commits': totals['Commits'],
    'Unique Users': totals['Unique Users'],
    'Host': 'Github'
})

Processing repository: soot-oss/soot
Processing repository: soot-oss/SootUp
Processing repository: wala/WALA
Processing repository: secure-software-engineering/FlowDroid
Processing repository: secure-software-engineering/phasar
Processing repository: opalj/opal
Processing repository: facebook/infer
Processing repository: arguslab/Argus-SAF
Processing repository: MIT-PAC/droidsafe-src
Processing repository: vitsalis/PyCG
Processing repository: scottrogowski/code2flow
Processing repository: cs-au-dk/TAJS
Processing repository: diffblue/cbmc
Processing repository: plast-lab/doop
Processing repository: phpstan/phpstan
Processing repository: phpstan/phpstan-src
Processing repository: security-code-scan/security-code-scan
Processing repository: pmd/pmd
Processing repository: spotbugs/spotbugs
Processing repository: yanniss/doop-deprecated


In [None]:
data = pd.DataFrame(data)
# sort by stars count
data = data.sort_values(by='Stars', ascending=False)
data.to_csv('../../results/figures/general/subject_tools.csv', index=False)