In [None]:
import pandas
import matplotlib
import requests
import warnings

import sys
sys.path.append('..')

from helper import load_data , load_repo , GIT_API
warnings.filterwarnings('ignore')

%matplotlib inline

In [None]:
testrepo = 'https://github.com/rust-lang/cargo'

In [None]:
# this function extracts number of open and closed issues of passed repository
def get_open_closed_issues(repo):
    add = repo + '/issues'
    req = requests.get(add)
    data = req.text
    
    open_issues = int(data.split('<svg class="octicon octicon-issue-opened"')[2].split('Open')[0].split('</svg>')[1].strip().replace(",",""))
    closed_issues = int(data.split('<svg class="octicon octicon-check"')[1].split('Closed')[0].split('</svg>')[1].strip().replace(",",""))
    
    return open_issues, closed_issues

In [None]:
#this function fetches comment of issues and returns a dataframe of comments
def get_comments(repo,issue_id):
    df = pandas.DataFrame(columns=["project_name","issue_number","comment_id","user_login","created_at","updated_at","author_assoc"])
    
    issue_comment = 'https://api.github.com/repos/{}/{}/issues/{}/comments?client_id={}&client_secret={}'.format(
            repo.split('/')[3] ,
            repo.split('/')[4] ,
            issue_id,
            GIT_API[11].split(':')[0] , GIT_API[11].split(':')[1])
    print(issue_comment)
    req = requests.get(issue_comment)
    comments = req.json()
    for comment in comments:
        df = df.append({
            "project_name": repo,
            "issue_number": issue_id,
            "comment_id": comment['id'],
            "user_login": comment['user']['login'],
            "created_at": comment['created_at'],
            "updated_at": comment['updated_at'],
            "author_assoc": comment['author_association'] },ignore_index=True)
    return df

In [None]:
# this function extracts number of open and closed pull requests of passed repository
def get_open_closed_pulls(repo):
    add = repo + '/pulls'
    req = requests.get(add)
    data = req.text
    
    open_issues = int(data.split('<svg class="octicon octicon-git-pull-request"')[2].split('Open')[0].split('</svg>')[1].strip().replace(",",""))
    closed_issues = int(data.split('<svg class="octicon octicon-check"')[1].split('Closed')[0].split('</svg>')[1].strip().replace(",",""))
    
    return open_issues, closed_issues

In [None]:
#this function fetches review comment of pull request and returns a dataframe of review comments
def get_pull_comments(repo,pr_id):
    df = pandas.DataFrame(columns=["project_name",
                                   "pr_id",
                                   "pr_review_id",
                                   "comment_id",
                                   "commit_id",
                                   "original_commit_id",
                                   "user_login",
                                   "created_at",
                                   "updated_at",
                                   "author_assoc"])
    
    pr_comment = 'https://api.github.com/repos/{}/{}/pulls/{}/comments?client_id={}&client_secret={}'.format(
            repo.split('/')[3] ,
            repo.split('/')[4] ,
            pr_id,
            GIT_API[11].split(':')[0] , GIT_API[11].split(':')[1])
    req = requests.get(pr_comment)
    comments = req.json()
    for comment in comments:
        df = df.append({
            "project_name": repo,
            "pr_id": pr_id,
            "pr_review_id" : comment['pull_request_review_id'],
            "comment_id": comment['id'],
            "commit_id" : comment['commit_id'],
            "original_commit_id" : comment['original_commit_id'],
            "user_login": comment['user']['login'],
            "created_at": comment['created_at'],
            "updated_at": comment['updated_at'],
            "author_assoc": comment['author_association'] },ignore_index=True)
    return df

# dataframes

In [None]:
issue_comments = pandas.DataFrame(columns=["project_name","issue_number","comment_id","user_login","created_at","updated_at","author_assoc"])
pulls_comments = pandas.DataFrame(columns=["project_name","issue_number","comment_id","user_login","created_at","updated_at","author_assoc"])
pulls_review_comments = pandas.DataFrame(columns=["project_name","pr_id","pr_review_id","comment_id","commit_id","original_commit_id","user_login","created_at","updated_at","author_assoc"])


In [None]:
# extracting number of issues and prs for proposed repo
open_issue , closed_issue = get_open_closed_issues(testrepo)  
open_pr , closed_pr = get_open_closed_pulls(testrepo)  

# Open Issues Comments

In [None]:
def get_open_issues_comments(repo):
    for i in range(1,(open_issue//25)+2):

        issues_page_url = repo + '/issues?page={}&q=is%3Aissue+is%3Aopen'.format(i)
        req = requests.get(issues_page_url)
        data = req.text
        issues = data.split('id="issue_')
        for issue in issues:
            if 'DOCTYPE' in issue:
                continue

            if 'octicon octicon-comment' in issue:
                issue_id = issue.split('"')[0]
                issue_comments = issue_comments.append(get_comments(repo,issue_id),ignore_index=True)
             

# Closed Issues Comments

In [None]:
def get_closed_issues_comments(repo):
    for i in range(1,(closed_issue//25)+2):
        issues_page_url = repo + '/issues?page={}&q=is%3Aissue+is%3Aclosed'.format(i)
        req = requests.get(issues_page_url)
        data = req.text
        issues = data.split('id="issue_')
        for issue in issues:
            if 'DOCTYPE' in issue:
                continue
            if 'octicon octicon-comment' in issue:
                issue_id = issue.split('"')[0]
                issue_comments = issue_comments.append(get_comments(repo,issue_id),ignore_index=True)

# Open Pull Request Comments

In [None]:
def get_open_pulls_comments(repo):
    pulls_comments = pandas.DataFrame(columns=["project_name","issue_number","comment_id","user_login","created_at","updated_at","author_assoc"])
    pulls_review_comments = pandas.DataFrame(columns=["project_name","pr_id","pr_review_id","comment_id","commit_id","original_commit_id","user_login","created_at","updated_at","author_assoc"])
    for i in range(1,(open_pr//25)+2):
        pull_page_url = repo + '/issues?page={}&q=is%3Apr+is%3Aopen'.format(i)
        req = requests.get(pull_page_url)
        data = req.text
        issues = data.split('id="issue_')
        for issue in issues:
            if 'DOCTYPE' in issue:
                continue
            if 'octicon octicon-comment' in issue:
                pr_id = issue.split('"')[0]
                pulls_comments = pulls_comments.append(get_comments(repo,pr_id),ignore_index=True)
                pulls_review_comments = pulls_review_comments.append(get_pull_comments(repo,pr_id),ignore_index=True)
                
    return pulls_comments, pulls_review_comments
                
get_open_pulls_comments(testrepo)

# Closed Pull Request Comments

In [None]:
def get_closed_pulls_comments(repo):
    pulls_comments = pandas.DataFrame(columns=["project_name","issue_number","comment_id","user_login","created_at","updated_at","author_assoc"])
    pulls_review_comments = pandas.DataFrame(columns=["project_name","pr_id","pr_review_id","comment_id","commit_id","original_commit_id","user_login","created_at","updated_at","author_assoc"])
    for i in range(1,(closed_pr//25)+2):
        pull_page_url = repo + '/issues?page={}&q=is%3Apr+is%3Aclosed'.format(i)
        req = requests.get(pull_page_url)
        data = req.text
        issues = data.split('id="issue_')
        for issue in issues:
            if 'DOCTYPE' in issue:
                continue
            if 'octicon octicon-comment' in issue:
                pr_id = issue.split('"')[0]
                pulls_comments = pulls_comments.append(get_comments(repo,pr_id),ignore_index=True)
                pulls_review_comments = pulls_review_comments.append(get_pull_comments(repo,pr_id),ignore_index=True)
                
    return pulls_comments, pulls_review_comments
                
get_closed_pulls_comments(testrepo)