In [1]:
import pandas
import matplotlib
import requests
import warnings

import sys
sys.path.append('..')

from helper import load_data , load_repo , GIT_API
warnings.filterwarnings('ignore')

%matplotlib inline

import os
import json
import time

In [2]:
json_store_dir = '..'+os.sep+'data'+os.sep+'json'

In [3]:
testrepo = 'https://github.com/rust-lang/cargo'

ISSUE = 'issue'
PULL = 'pull'

time_sleep = 0.1

OPEN = 'open'
CLOSED = 'closed'

In [28]:
# this function extracts number of open and closed issues of passed repository
def get_open_closed_issues(repo):
    add = repo + '/issues'
    time.sleep(time_sleep)
    req = requests.get(add)
    data = req.text
    
    try:
        open_issues = int(data.split('<svg class="octicon octicon-issue-opened"')[2].split('Open')[0].split('</svg>')[1].strip().replace(",",""))
        closed_issues = int(data.split('<svg class="octicon octicon-check"')[1].split('Closed')[0].split('</svg>')[1].strip().replace(",",""))
    except:
        open_issues = 0
        closed_issues = 0
        
    return open_issues, closed_issues

In [163]:
#this function fetches comment of issues and returns a dataframe of comments
def get_comments(repo,issue_id,is_pull_issue,is_open):
    df = pandas.DataFrame(columns=["project_name","issue_number","comment_id","user_login","created_at","updated_at","author_assoc","is_open"])
    
    repo_parts = repo.split('/')
    repo_full = repo_parts[-2]+'/'+repo_parts[-1]
    
    if is_pull_issue==ISSUE:
        par_folder = json_store_dir+os.sep+'issue_comments'+os.sep+repo_full.replace('/',os.sep)
        json_store_location = par_folder+os.sep+str(issue_id)+'.json'
    elif is_pull_issue==PULL:
        par_folder = json_store_dir+os.sep+'pull_comments'+os.sep+repo_full.replace('/',os.sep)
        json_store_location = par_folder+os.sep+str(issue_id)+'.json'
        
    if os.path.exists(json_store_location):
        with open(json_store_location) as f:
            comments = json.load(f)
    else:
        issue_comment = 'https://api.github.com/repos/{}/{}/issues/{}/comments?client_id={}&client_secret={}'.format(
                repo.split('/')[3] ,
                repo.split('/')[4] ,
                issue_id,
                GIT_API[11].split(':')[0] , GIT_API[11].split(':')[1])
        
        print(issue_comment)
        time.sleep(time_sleep)
        req = requests.get(issue_comment)
        comments = req.json()
        
        if not os.path.exists(par_folder):
            os.makedirs(par_folder)
        with open(json_store_location, 'w') as outfile:
            json.dump(comments, outfile)
            
    for comment in comments:
        df = df.append({
            "project_name": repo,
            "issue_number": issue_id,
            "comment_id": comment['id'],
            "user_login": comment['user']['login'] if comment['user'] else 'null',
            "created_at": comment['created_at'],
            "updated_at": comment['updated_at'],
            "author_assoc": comment['author_association'],
            "is_open": 1 if is_open else 0 },ignore_index=True)
    return df

In [164]:
# this function extracts number of open and closed pull requests of passed repository
def get_open_closed_pulls(repo):
    add = repo + '/pulls'
    time.sleep(time_sleep)
    req = requests.get(add)
    data = req.text
    
    try:
        open_pulls = int(data.split('<svg class="octicon octicon-git-pull-request"')[2].split('Open')[0].split('</svg>')[1].strip().replace(",",""))
        closed_pulls = int(data.split('<svg class="octicon octicon-check"')[1].split('Closed')[0].split('</svg>')[1].strip().replace(",",""))
    except:
        open_pulls = 0
        closed_pulls = 0
        
    return open_pulls, closed_pulls

In [165]:
#this function fetches review comment of pull request and returns a dataframe of review comments
def get_pull_comments(repo,pr_id,is_open):
    df = pandas.DataFrame(columns=["project_name",
                                   "pr_id",
                                   "pr_review_id",
                                   "comment_id",
                                   "commit_id",
                                   "original_commit_id",
                                   "user_login",
                                   "created_at",
                                   "updated_at",
                                   "author_assoc",
                                   "is_open"])
    
    repo_parts = repo.split('/')
    repo_full = repo_parts[-2]+'/'+repo_parts[-1]
    
    par_folder = json_store_dir+os.sep+'code_review_comments'+os.sep+repo_full.replace('/',os.sep)
    json_store_location = par_folder+os.sep+str(pr_id)+'.json'
        
    if os.path.exists(json_store_location):
        with open(json_store_location) as f:
            comments = json.load(f)
    else:
        pr_comment = 'https://api.github.com/repos/{}/{}/pulls/{}/comments?client_id={}&client_secret={}'.format(
                repo.split('/')[3] ,
                repo.split('/')[4] ,
                pr_id,
                GIT_API[11].split(':')[0] , GIT_API[11].split(':')[1])
        time.sleep(time_sleep)
        req = requests.get(pr_comment)
        comments = req.json()
        print(pr_comment)
        if not os.path.exists(par_folder):
            os.makedirs(par_folder)
        with open(json_store_location, 'w') as outfile:
            json.dump(comments, outfile)
        
        
    for comment in comments:
        df = df.append({
            "project_name": repo,
            "pr_id": pr_id,
            "pr_review_id" : comment['pull_request_review_id'],
            "comment_id": comment['id'],
            "commit_id" : comment['commit_id'],
            "original_commit_id" : comment['original_commit_id'],
            "user_login": comment['user']['login'] if comment['user'] else 'null',
            "created_at": comment['created_at'],
            "updated_at": comment['updated_at'],
            "author_assoc": comment['author_association'],
            "is_open": 1 if is_open else 0},ignore_index=True)
    return df

# Dataframes

In [166]:
# extracting number of issues and prs for proposed repo
open_issue , closed_issue = get_open_closed_issues(testrepo)  
open_pr , closed_pr = get_open_closed_pulls(testrepo)  

# Open/Closed Issues Comments

In [167]:
def get_issues_comments(repo,is_open_closed,issues_number):
    issue_comments = pandas.DataFrame()
    for i in range(1,(issues_number//25)+2):
        issues_page_url = repo + ('/issues?page={}&q=is%3Aissue+is%3A'+is_open_closed).format(i)
        print(str(i)+ " -- "+ issues_page_url)
        time.sleep(time_sleep)
        req = requests.get(issues_page_url)
        data = req.text
        issues = data.split('id="issue_')
        for issue in issues:
            if 'DOCTYPE' in issue:
                continue
            if 'octicon octicon-comment' in issue:
                issue_id = issue.split('"')[0]
                issue_comments = issue_comments.append(get_comments(repo,issue_id,ISSUE,is_open_closed),ignore_index=True)
    return issue_comments

# Open/Closed Pull Request Comments

In [168]:
def get_pulls_comments(repo,is_open_closed,pulls_number):
    pulls_comments = pandas.DataFrame()
    pulls_review_comments = pandas.DataFrame()
    for i in range(1,(pulls_number//25)+2):
        pull_page_url = repo + ('/pulls?page={}&q=is%3Apr+is%3A'+is_open_closed).format(i)
        print(str(i) + " -- " +pull_page_url)
        time.sleep(time_sleep)
        req = requests.get(pull_page_url)
        data = req.text
        issues = data.split('id="issue_')
        for issue in issues:
            if 'DOCTYPE' in issue:
                continue
            if 'octicon octicon-comment' in issue:
                pr_id = issue.split('"')[0]
                pulls_comments = pulls_comments.append(get_comments(repo,pr_id,PULL,is_open_closed),ignore_index=True)
                pulls_review_comments = pulls_review_comments.append(get_pull_comments(repo,pr_id,is_open_closed),ignore_index=True)
    return pulls_comments, pulls_review_comments

In [161]:
issue_comments = pandas.DataFrame()
pulls_comments = pandas.DataFrame()
pulls_review_comments = pandas.DataFrame()

In [162]:
selected_repos = pandas.read_csv('../data/selected_origins.csv')

for repo in selected_repos.Repository_URL.unique():
    # extracting number of issues and prs for proposed repo
    open_issue , closed_issue = get_open_closed_issues(repo)
    open_pr , closed_pr = get_open_closed_pulls(repo)
    
    print(repo )
    print("open issues : " + str(open_issue))
    print("closed issues : " + str(closed_issue))
    print("open pulls : " + str(open_pr))
    print("closed pulls : " + str(closed_pr))
    
    issue_comments = issue_comments.append(get_issues_comments(repo,OPEN,open_issue),ignore_index=True)
    issue_comments = issue_comments.append(get_issues_comments(repo,CLOSED,closed_issue),ignore_index=True)
    issue_comments.to_csv('../data/issue_comments.csv.gz',compression='gzip')
    
    pc, prc = get_pulls_comments(repo,OPEN,open_pr)
    pulls_comments = pulls_comments.append(pc,ignore_index=True)
    pulls_review_comments = pulls_review_comments.append(prc,ignore_index=True)
    pc, prc = get_pulls_comments(repo,CLOSED,closed_pr)
    pulls_comments = pulls_comments.append(pc,ignore_index=True)
    pulls_review_comments = pulls_review_comments.append(prc,ignore_index=True)
    pulls_comments.to_csv('../data/pulls_comments.csv.gz',compression='gzip')
    pulls_review_comments.to_csv('../data/pulls_review_comments.csv.gz',compression='gzip')


https://github.com/rust-lang/cargo
open issues : 705
closed issues : 2679
open pulls : 12
closed pulls : 2825
1 -- https://github.com/rust-lang/cargo/issues?page=1&q=is%3Aissue+is%3Aopen
2 -- https://github.com/rust-lang/cargo/issues?page=2&q=is%3Aissue+is%3Aopen
3 -- https://github.com/rust-lang/cargo/issues?page=3&q=is%3Aissue+is%3Aopen
4 -- https://github.com/rust-lang/cargo/issues?page=4&q=is%3Aissue+is%3Aopen
5 -- https://github.com/rust-lang/cargo/issues?page=5&q=is%3Aissue+is%3Aopen
6 -- https://github.com/rust-lang/cargo/issues?page=6&q=is%3Aissue+is%3Aopen
7 -- https://github.com/rust-lang/cargo/issues?page=7&q=is%3Aissue+is%3Aopen
8 -- https://github.com/rust-lang/cargo/issues?page=8&q=is%3Aissue+is%3Aopen
9 -- https://github.com/rust-lang/cargo/issues?page=9&q=is%3Aissue+is%3Aopen
10 -- https://github.com/rust-lang/cargo/issues?page=10&q=is%3Aissue+is%3Aopen
11 -- https://github.com/rust-lang/cargo/issues?page=11&q=is%3Aissue+is%3Aopen
12 -- https://github.com/rust-lang/carg

73 -- https://github.com/rust-lang/cargo/issues?page=73&q=is%3Aissue+is%3Aclosed
74 -- https://github.com/rust-lang/cargo/issues?page=74&q=is%3Aissue+is%3Aclosed
75 -- https://github.com/rust-lang/cargo/issues?page=75&q=is%3Aissue+is%3Aclosed
76 -- https://github.com/rust-lang/cargo/issues?page=76&q=is%3Aissue+is%3Aclosed
77 -- https://github.com/rust-lang/cargo/issues?page=77&q=is%3Aissue+is%3Aclosed
78 -- https://github.com/rust-lang/cargo/issues?page=78&q=is%3Aissue+is%3Aclosed
79 -- https://github.com/rust-lang/cargo/issues?page=79&q=is%3Aissue+is%3Aclosed
80 -- https://github.com/rust-lang/cargo/issues?page=80&q=is%3Aissue+is%3Aclosed
81 -- https://github.com/rust-lang/cargo/issues?page=81&q=is%3Aissue+is%3Aclosed
82 -- https://github.com/rust-lang/cargo/issues?page=82&q=is%3Aissue+is%3Aclosed
83 -- https://github.com/rust-lang/cargo/issues?page=83&q=is%3Aissue+is%3Aclosed
84 -- https://github.com/rust-lang/cargo/issues?page=84&q=is%3Aissue+is%3Aclosed
85 -- https://github.com/rus

69 -- https://github.com/rust-lang/cargo/pulls?page=69&q=is%3Apr+is%3Aclosed
70 -- https://github.com/rust-lang/cargo/pulls?page=70&q=is%3Apr+is%3Aclosed
71 -- https://github.com/rust-lang/cargo/pulls?page=71&q=is%3Apr+is%3Aclosed
72 -- https://github.com/rust-lang/cargo/pulls?page=72&q=is%3Apr+is%3Aclosed
73 -- https://github.com/rust-lang/cargo/pulls?page=73&q=is%3Apr+is%3Aclosed
74 -- https://github.com/rust-lang/cargo/pulls?page=74&q=is%3Apr+is%3Aclosed
75 -- https://github.com/rust-lang/cargo/pulls?page=75&q=is%3Apr+is%3Aclosed
76 -- https://github.com/rust-lang/cargo/pulls?page=76&q=is%3Apr+is%3Aclosed
77 -- https://github.com/rust-lang/cargo/pulls?page=77&q=is%3Apr+is%3Aclosed
78 -- https://github.com/rust-lang/cargo/pulls?page=78&q=is%3Apr+is%3Aclosed
79 -- https://github.com/rust-lang/cargo/pulls?page=79&q=is%3Apr+is%3Aclosed
80 -- https://github.com/rust-lang/cargo/pulls?page=80&q=is%3Apr+is%3Aclosed
81 -- https://github.com/rust-lang/cargo/pulls?page=81&q=is%3Apr+is%3Aclosed

43 -- https://github.com/rust-lang-nursery/rustfmt/issues?page=43&q=is%3Aissue+is%3Aclosed
44 -- https://github.com/rust-lang-nursery/rustfmt/issues?page=44&q=is%3Aissue+is%3Aclosed
45 -- https://github.com/rust-lang-nursery/rustfmt/issues?page=45&q=is%3Aissue+is%3Aclosed
46 -- https://github.com/rust-lang-nursery/rustfmt/issues?page=46&q=is%3Aissue+is%3Aclosed
47 -- https://github.com/rust-lang-nursery/rustfmt/issues?page=47&q=is%3Aissue+is%3Aclosed
48 -- https://github.com/rust-lang-nursery/rustfmt/issues?page=48&q=is%3Aissue+is%3Aclosed
49 -- https://github.com/rust-lang-nursery/rustfmt/issues?page=49&q=is%3Aissue+is%3Aclosed
50 -- https://github.com/rust-lang-nursery/rustfmt/issues?page=50&q=is%3Aissue+is%3Aclosed
51 -- https://github.com/rust-lang-nursery/rustfmt/issues?page=51&q=is%3Aissue+is%3Aclosed
52 -- https://github.com/rust-lang-nursery/rustfmt/issues?page=52&q=is%3Aissue+is%3Aclosed
53 -- https://github.com/rust-lang-nursery/rustfmt/issues?page=53&q=is%3Aissue+is%3Aclosed

8 -- https://github.com/rust-lang-nursery/mdBook/issues?page=8&q=is%3Aissue+is%3Aclosed
9 -- https://github.com/rust-lang-nursery/mdBook/issues?page=9&q=is%3Aissue+is%3Aclosed
10 -- https://github.com/rust-lang-nursery/mdBook/issues?page=10&q=is%3Aissue+is%3Aclosed
11 -- https://github.com/rust-lang-nursery/mdBook/issues?page=11&q=is%3Aissue+is%3Aclosed
12 -- https://github.com/rust-lang-nursery/mdBook/issues?page=12&q=is%3Aissue+is%3Aclosed
1 -- https://github.com/rust-lang-nursery/mdBook/pulls?page=1&q=is%3Apr+is%3Aopen
1 -- https://github.com/rust-lang-nursery/mdBook/pulls?page=1&q=is%3Apr+is%3Aclosed
2 -- https://github.com/rust-lang-nursery/mdBook/pulls?page=2&q=is%3Apr+is%3Aclosed
3 -- https://github.com/rust-lang-nursery/mdBook/pulls?page=3&q=is%3Apr+is%3Aclosed
4 -- https://github.com/rust-lang-nursery/mdBook/pulls?page=4&q=is%3Apr+is%3Aclosed
5 -- https://github.com/rust-lang-nursery/mdBook/pulls?page=5&q=is%3Apr+is%3Aclosed
6 -- https://github.com/rust-lang-nursery/mdBook/pul

3 -- https://github.com/actix/actix-web/issues?page=3&q=is%3Aissue+is%3Aclosed
4 -- https://github.com/actix/actix-web/issues?page=4&q=is%3Aissue+is%3Aclosed
5 -- https://github.com/actix/actix-web/issues?page=5&q=is%3Aissue+is%3Aclosed
6 -- https://github.com/actix/actix-web/issues?page=6&q=is%3Aissue+is%3Aclosed
7 -- https://github.com/actix/actix-web/issues?page=7&q=is%3Aissue+is%3Aclosed
8 -- https://github.com/actix/actix-web/issues?page=8&q=is%3Aissue+is%3Aclosed
9 -- https://github.com/actix/actix-web/issues?page=9&q=is%3Aissue+is%3Aclosed
10 -- https://github.com/actix/actix-web/issues?page=10&q=is%3Aissue+is%3Aclosed
11 -- https://github.com/actix/actix-web/issues?page=11&q=is%3Aissue+is%3Aclosed
12 -- https://github.com/actix/actix-web/issues?page=12&q=is%3Aissue+is%3Aclosed
13 -- https://github.com/actix/actix-web/issues?page=13&q=is%3Aissue+is%3Aclosed
1 -- https://github.com/actix/actix-web/pulls?page=1&q=is%3Apr+is%3Aopen
1 -- https://github.com/actix/actix-web/pulls?page

6 -- https://github.com/sozu-proxy/sozu/issues?page=6&q=is%3Aissue+is%3Aclosed
7 -- https://github.com/sozu-proxy/sozu/issues?page=7&q=is%3Aissue+is%3Aclosed
8 -- https://github.com/sozu-proxy/sozu/issues?page=8&q=is%3Aissue+is%3Aclosed
9 -- https://github.com/sozu-proxy/sozu/issues?page=9&q=is%3Aissue+is%3Aclosed
10 -- https://github.com/sozu-proxy/sozu/issues?page=10&q=is%3Aissue+is%3Aclosed
11 -- https://github.com/sozu-proxy/sozu/issues?page=11&q=is%3Aissue+is%3Aclosed
12 -- https://github.com/sozu-proxy/sozu/issues?page=12&q=is%3Aissue+is%3Aclosed
1 -- https://github.com/sozu-proxy/sozu/pulls?page=1&q=is%3Apr+is%3Aopen
1 -- https://github.com/sozu-proxy/sozu/pulls?page=1&q=is%3Apr+is%3Aclosed
2 -- https://github.com/sozu-proxy/sozu/pulls?page=2&q=is%3Apr+is%3Aclosed
3 -- https://github.com/sozu-proxy/sozu/pulls?page=3&q=is%3Apr+is%3Aclosed
4 -- https://github.com/sozu-proxy/sozu/pulls?page=4&q=is%3Apr+is%3Aclosed
5 -- https://github.com/sozu-proxy/sozu/pulls?page=5&q=is%3Apr+is%3A

1 -- https://github.com/google/tarpc/pulls?page=1&q=is%3Apr+is%3Aopen
1 -- https://github.com/google/tarpc/pulls?page=1&q=is%3Apr+is%3Aclosed
2 -- https://github.com/google/tarpc/pulls?page=2&q=is%3Apr+is%3Aclosed
3 -- https://github.com/google/tarpc/pulls?page=3&q=is%3Apr+is%3Aclosed
4 -- https://github.com/google/tarpc/pulls?page=4&q=is%3Apr+is%3Aclosed
5 -- https://github.com/google/tarpc/pulls?page=5&q=is%3Apr+is%3Aclosed
https://github.com/exonum/exonum-btc-anchoring
open issues : 6
closed issues : 19
open pulls : 1
closed pulls : 110
1 -- https://github.com/exonum/exonum-btc-anchoring/issues?page=1&q=is%3Aissue+is%3Aopen
1 -- https://github.com/exonum/exonum-btc-anchoring/issues?page=1&q=is%3Aissue+is%3Aclosed
1 -- https://github.com/exonum/exonum-btc-anchoring/pulls?page=1&q=is%3Apr+is%3Aopen
1 -- https://github.com/exonum/exonum-btc-anchoring/pulls?page=1&q=is%3Apr+is%3Aclosed
2 -- https://github.com/exonum/exonum-btc-anchoring/pulls?page=2&q=is%3Apr+is%3Aclosed
3 -- https://gi

2 -- https://github.com/trezm/fanta/pulls?page=2&q=is%3Apr+is%3Aclosed
3 -- https://github.com/trezm/fanta/pulls?page=3&q=is%3Apr+is%3Aclosed
https://github.com/lawliet89/rowdy
open issues : 12
closed issues : 16
open pulls : 0
closed pulls : 55
1 -- https://github.com/lawliet89/rowdy/issues?page=1&q=is%3Aissue+is%3Aopen
1 -- https://github.com/lawliet89/rowdy/issues?page=1&q=is%3Aissue+is%3Aclosed
1 -- https://github.com/lawliet89/rowdy/pulls?page=1&q=is%3Apr+is%3Aopen
1 -- https://github.com/lawliet89/rowdy/pulls?page=1&q=is%3Apr+is%3Aclosed
2 -- https://github.com/lawliet89/rowdy/pulls?page=2&q=is%3Apr+is%3Aclosed
3 -- https://github.com/lawliet89/rowdy/pulls?page=3&q=is%3Apr+is%3Aclosed
https://github.com/polachok/libvirt-rpc
open issues : 4
closed issues : 4
open pulls : 0
closed pulls : 54
1 -- https://github.com/polachok/libvirt-rpc/issues?page=1&q=is%3Aissue+is%3Aopen
1 -- https://github.com/polachok/libvirt-rpc/issues?page=1&q=is%3Aissue+is%3Aclosed
1 -- https://github.com/pol

1 -- https://github.com/lukaspustina/rs-collector/issues?page=1&q=is%3Aissue+is%3Aclosed
1 -- https://github.com/lukaspustina/rs-collector/pulls?page=1&q=is%3Apr+is%3Aopen
1 -- https://github.com/lukaspustina/rs-collector/pulls?page=1&q=is%3Apr+is%3Aclosed
2 -- https://github.com/lukaspustina/rs-collector/pulls?page=2&q=is%3Apr+is%3Aclosed
https://github.com/indradb/indradb
open issues : 1
closed issues : 6
open pulls : 1
closed pulls : 29
1 -- https://github.com/indradb/indradb/issues?page=1&q=is%3Aissue+is%3Aopen
1 -- https://github.com/indradb/indradb/issues?page=1&q=is%3Aissue+is%3Aclosed
1 -- https://github.com/indradb/indradb/pulls?page=1&q=is%3Apr+is%3Aopen
1 -- https://github.com/indradb/indradb/pulls?page=1&q=is%3Apr+is%3Aclosed
2 -- https://github.com/indradb/indradb/pulls?page=2&q=is%3Apr+is%3Aclosed
https://github.com/kpcyrd/boxxy-rs
open issues : 9
closed issues : 7
open pulls : 0
closed pulls : 27
1 -- https://github.com/kpcyrd/boxxy-rs/issues?page=1&q=is%3Aissue+is%3Aope