## Github Repo Metrics Dashboard Using GraphQL API ##

In [1]:
%matplotlib inline

import requests
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import ipywidgets as widgets
import datetime as dt
from IPython.display import display
from IPython.display import clear_output
import seaborn as sns
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from string import Template
from matplotlib.ticker import MaxNLocator
import time
import pylab as pl

In [2]:
analyzer = SentimentIntensityAnalyzer()

ownerQuery = Template(''' 
{ 
    repositoryOwner(login : \"$owner\") {
        login repositories (first : 100) {
            edges {
                node {
                    name
                    diskUsage
                    forkCount 
                }
            }
            totalCount
        } 
    } 
} 
''')

repoQuery = Template(''' 
{ 
    repository(owner: \"$owner\", name: \"$repo\") { 
        vulnerabilityAlerts(first: 100) {
            totalCount
            edges {
                node {
                    packageName
                }
            }
        }
        dependencyGraphManifests(first: 100) {
            totalCount
            edges {
                node {
                    dependenciesCount
                }
            }
        }
        forkCount
        watchers(first: 100) {
            totalCount
        }
        stargazers(first: 100) {
            totalCount
        }
        
        commitComments(first: 100) {
            edges {
                node {
                    bodyText
                    reactions(first: 100) {
                        edges {
                            node {
                                content
                            }
                        }
                    }
                }
            }
        }
        pullRequests(first: 100) { 
            totalCount
            edges { 
                node { 
                    createdAt
                    closedAt
                    closed
                    merged
                    mergedAt
                    comments(first: 100) {
                        edges {
                            node {
                                bodyText
                            }
                        }
                    } 
                } 
            } 
        } 
        issues(first: 100) {
            totalCount
            edges {
                node {
                    bodyText
                    createdAt
                    closedAt
                    closed
                }
            }
        }
    }
}
''')



In [3]:
def getSentimentAPI(phrase):
    endpointSentAPI = 'http://text-processing.com/api/sentiment/'    
    r = requests.post(endpointSentAPI, {"text": phrase})
    return r.json()

def acquireOwnerRepos(ownerName, token):
    endpointGitHub = 'https://api.github.com/graphql'
    headersGitHub = {'Authorization': 'bearer ' + token}
    query = ownerQuery.substitute(owner=ownerName)
    r = requests.post(endpointGitHub, json.dumps({"query": query}), headers=headersGitHub)
    data = r.json()['data']['repositoryOwner']['repositories']['edges']
    repos = []
    for repo in data:
        repos.append(repo['node']['name'])
    return repos

def computeSentiment(edgeData):
    neg = 0
    pos = 0
    neu = 0
    numMessages = 0
    
    for cnode in edgeData:
        phrase = cnode['node']['bodyText']
        numMessages+=1
        vaderScores = analyzer.polarity_scores(phrase)
        neg += vaderScores['neg']
        pos += vaderScores['pos']
        neu += vaderScores['neu']
            
    if(numMessages > 0):
        neg = neg/numMessages
        pos = pos/numMessages
        neu = neu/numMessages
        total = neg + pos + neu
        neg = neg*100.0/total
        neu = neu*100.0/total
        pos = pos*100.0/total          
    return neg, neu, pos

def computePRSentiment(edgeData):
    neg = 0
    pos = 0
    neu = 0
    numMessages = 0
    
    for node in edgeData:
        edges = node['node']['comments']['edges']
        for cnode in edges:
            phrase = cnode['node']['bodyText']
            numMessages+=1
            vaderScores = analyzer.polarity_scores(phrase)
            neg += vaderScores['neg']
            pos += vaderScores['pos']
            neu += vaderScores['neu']
            
    if(numMessages > 0):
        neg = neg/numMessages
        pos = pos/numMessages
        neu = neu/numMessages
        total = neg + pos + neu
        neg = neg*100.0/total
        neu = neu*100.0/total
        pos = pos*100.0/total          
    return neg, neu, pos

def createSentimentDict(sent, msgType):
    ret = [{'MessageType': msgType, 'SentimentType': 'Positive', 'Value': sent[2]},\
        {'MessageType': msgType, 'SentimentType': 'Neutral', 'Value': sent[1]},\
        {'MessageType': msgType, 'SentimentType': 'Negative', 'Value': sent[0]}]
    return ret
        
def parseGitHubUTCTimeStamp(ts):
    return dt.datetime.strptime(ts, '%Y-%m-%dT%H:%M:%SZ')

def determineResolutionTime(opened, closed):
    td = closed - opened
    return abs(td.days)

def computeResponseTimes(edgeData):
    times = []
    for node in edgeData:
        if node['node']['closed'] == True:
            turnaround = determineResolutionTime(parseGitHubUTCTimeStamp(node['node']['closedAt']),parseGitHubUTCTimeStamp(node['node']['createdAt']))
            times.append(turnaround)
    return times

def acquireRepoData(ownerName, repoName, token):
    endpointGitHub = 'https://api.github.com/graphql'
    headersGitHub = {'Authorization': 'bearer ' + token, 
                 'Accept': 'application/vnd.github.hawkgirl-preview+json,application/vnd.github.vixen-preview+json'}
        
    query = repoQuery.substitute(owner=ownerName,repo=repoName)
    r = requests.post(endpointGitHub, json.dumps({"query": query}), headers=headersGitHub)
    return r.json()['data']['repository']

In [4]:
def handleSentiments(repo_data):
    prSentiments = computePRSentiment(repo_data['pullRequests']['edges'])
    commitSentiments = computeSentiment(repo_data['commitComments']['edges'])
    issueSentiments = computeSentiment(repo_data['issues']['edges'])

    sdf = pd.DataFrame(createSentimentDict(prSentiments,'Pull Request'))\
        .append(createSentimentDict(issueSentiments,'Issues'),ignore_index=True)\
        .append(createSentimentDict(commitSentiments,'Commit'),ignore_index=True)
    
    sns.set_style("whitegrid")
    pal = dict(Positive="Green",Neutral="Gray",Negative="Red")
    g = sns.factorplot(x="SentimentType",y="Value",col="MessageType",\
                        data=sdf,kind="bar", ci=None,\
                        palette=pal,saturation=0.5, aspect=0.8)

    g=g.set_axis_labels("", "Percentage")\
            .set(ylim=(0, 100))\
            .set_titles("{col_name}")\
            .despine(left=True)

    plt.subplots_adjust(top=0.8)
    g=g.fig.suptitle("Sentiments by Repo Message Type for " + ownerText.value + '/' + repoText.value)

In [5]:
def handlePRs(repo_data):
    prTimes = computeResponseTimes(repo_data['pullRequests']['edges'])
    openedPRs = repo_data['pullRequests']['totalCount']
    sns.set()
    sns.set()
    cx = plt.figure().gca() 

    sns.distplot(prTimes,\
                 kde=False,bins=70,color='red')
    titleStr=('Pull Request Time to Close: (' + str(openedPRs) + ' opened, ' + str(len(prTimes)) + ' closed)')
    cx.set(ylabel="",xlabel="Days")
    plt.title(titleStr,fontsize=15)
    cx.yaxis.set_major_locator(MaxNLocator(integer=True))    

In [6]:
def handleIssues(repo_data):
    issueTimes = computeResponseTimes(repo_data['issues']['edges'])
    openedIssues = repo_data['issues']['totalCount']
    sns.set()
    bx = plt.figure().gca() 

    sns.distplot(issueTimes,\
                 kde=False,bins=70,color='red')
    titleStr=('Issue Time to Close: (' + str(openedIssues) + ' opened, ' + str(len(issueTimes)) + ' closed)')
    bx.set(ylabel="",xlabel="Days")
    plt.title(titleStr,fontsize=15)
    bx.yaxis.set_major_locator(MaxNLocator(integer=True))    

In [7]:
def handleMisc(repo_data):
    retString = ''
    if(repo_data['vulnerabilityAlerts']['totalCount']==0):
        retString += 'You may want to enable vulnerability alerts\n'
    else:
        retString += str(repo_data['vulnerabilityAlerts']['totalCount']) + ' resolved vulnerability alerts\n'
    retString += str(repo_data['forkCount']) + ' forks\n'
    retString += str(repo_data['watchers']['totalCount']) + ' watchers\n'
    retString += str(repo_data['stargazers']['totalCount']) + ' stargazers\n'

    return retString

In [9]:
out = widgets.Output()

def on_compute_clicked(b):
    repo_data = acquireRepoData(ownerText.value, repoText.value, tokenText.value)
    with out:
        clear_output(True)
        handleSentiments(repo_data)
        handlePRs(repo_data)
        handleIssues(repo_data)
        outputText.visible=True
        outString = handleMisc(repo_data)
        outputText.value=outString
        display(outputText)
        plt.show()

repoText = widgets.Text(description='Repo')
ownerText = widgets.Text(description='Repo Owner')
tokenText = widgets.Text(description='API Token')
outputText = widgets.Textarea(visible=False)
computeButton = widgets.Button(description='Compute Metrics')
computeButton.on_click(on_compute_clicked)

display(repoText)
display(ownerText)
display(tokenText)
display(computeButton)
 
out

Text(value='', description='Repo')

Text(value='', description='Repo Owner')

Text(value='', description='API Token')

Button(description='Compute Metrics', style=ButtonStyle())

Output()

In [None]:
import dash
import dash_core_components as dcc
import dash_html_components as html
import plotly.graph_objs as go
from dash.dependencies import Input, Output

In [None]:
app = dash.Dash()

app.layout = html.Div([
    html.Div([

        html.Div([
            dcc.Input(id='owner-id', value='Enter Github repo owner name', type='text'),
            dcc.Input(id='repo-id', value='Enter Github repo name', type='text'),
            dcc.Input(id='token-id', value='Enter Github API token', type='text'),
            html.Button('Compute Metrics', id='button')
        ],
        style={'width': '48%', 'display': 'inline-block'}),
    ]),

    dcc.Graph(id='sentiment-graphic'),
    dcc.Graph(id='PR-graphic'),
    dcc.Graph(id='Issue-graphic')
])

@app.callback(
    dash.dependencies.Output('sentiment-graphic', 'figure'),
    #dash.dependencies.Output('PR-graphic', 'figure'),
    #dash.dependencies.Output('Issue-graphic', 'figure'),
    [dash.dependencies.Input('button', 'n_clicks')],
    [dash.dependencies.State('owner-id', 'value'),
    dash.dependencies.State('repo-id', 'value'),
    dash.dependencies.State('token-id', 'value')]) 
def update_output(n_clicks, value):
    return 'Repo {} Owner {} Token {}'.format(
        repoValue,
        ownerValue,
        tokenValue
    )


from  IPython import display
import os
def show_app(app, port = 8055, 
             width = 700, 
             height = 350, 
             offline = False,
            in_binder = None):
    url = 'http://localhost:%d' % port
        
    iframe = '<a href="{url}" target="_new">Open in new window</a><hr><iframe src="{url}" width={width} height={height}></iframe>'.format(url = url, 
                                                                                  width = width, 
                                                                                  height = height)
    
    display.display_html(iframe, raw = True)
    if offline:
        app.css.config.serve_locally = True
        app.scripts.config.serve_locally = True
    return app.run_server(debug=False, # needs to be false in Jupyter
                          port=port)


show_app(app)