In [52]:
import requests
from graphqlclient import GraphQLClient
import json
import csv
from datetime import datetime
import time
from dotenv import DotEnv
import os

In [53]:
client = GraphQLClient('https://api.github.com/graphql')
client.inject_token(f'bearer {env.get("GITHUB_TOKEN")}')
code_gov_api_url = 'https://api.code.gov'
env = DotEnv()
code_gov_headers={
    "X-Api-Key": env.get('CODE_GOV_API_TOKEN')
}
request_size = 4000
request_from = 0
code_gov_params = {
    'permissions.usageType': 'opensource',
    'size': request_size,
    'from': request_from
}
github_repos = []

In [58]:
def get_code_gov_repos():
    response = requests.get(f'{code_gov_api_url}/repos', headers=code_gov_headers, params=code_gov_params)
    return response.json()

In [59]:
def get_github_query(owner, repo_name):
    return '''query {
        repository(owner: "%s", name: "%s") {
            nameWithOwner
            issues {
                totalCount
            }
            forkCount
            watchers {
                totalCount
            }
            stargazers {
                totalCount
            }
            pullRequests {
                totalCount
            }
        }
        rateLimit {
            limit
            cost
            remaining
            resetAt
        }
    }
    ''' % (owner, repo_name)

In [None]:
def get_github_data(owner, repo)
    data = {
        'owner': owner,
        'github_repo': repo,
        'nameWithOwner': None,
        'total_issues': None,
        'total_pull_requests': None,
        'total_forks': None,
        'total_watchers': None,
        'total_stars': None,
    }
    
    if owner and repo:
        response = client.execute(query)
        json_response = json.loads(response)
        if json_response['data']:
            data['nameWithOwner'] = json_response['data']['nameWithOwner']
            data['total_issues'] = json_response['data']['issues']['totalCount']
            data['total_pull_requests'] = json_response['data']['pullRequests']['totalCount']
            data['total_forks'] = json_response['data']['forkCount']
            data['total_watchers'] = json_response['data']['watchers']['totalCount']
            data['total_stars'] = json_response['data']['stargazers']['totalCount']

        remaining = json_response['data']['rateLimit']['remaining']
        limit = json_response['data']['rateLimit']['limit']
        percent_remaining = remaining / limit
        reset_at = json_response['data']['rateLimit']['resetAt']
        if percent_remaining < 0.15:
            reset_at = datetime.strptime(reset_at, '%Y-%m-%dT%H:%M:%SZ')
            current_time = datetime.now()
            time_diff = current_time - reset_at
            seconds = time_diff.total_seconds()
            time.sleep(seconds)
        else:
            time.sleep(2)

    return data

In [61]:
code_gov_json = get_code_gov_repos()

total = code_gov_json['total']

print(f"Fetched {total}")

repos = code_gov_json['repos']

github_repos = [(repo['repositoryURL'], repo['name']) for repo in repos if 'github' in repo['repositoryURL']]
github_data = []
for repo in github_repos:
    url_breakup = repo[0].split('/')
    if len(url_breakup) == 4:
        print(f'{repo[1]} - does not have a correct github url')
    else:
        owner, repo_name = url_breakup[-2:]
        response = client.execute(get_github_query(owner, repo_name))
        github_data.append(json.loads(response))
        
# while total_left > 0:
#     code_gov_params['from'] = request_from
#     response = requests.get(f'{code_gov_api_url}/repos', headers=code_gov_headers, params=code_gov_params)


Fetched 3815
usepa-harmonization - does not have a correct github url
FDTool - does not have a correct github url
Phytoplankton data analysis project - does not have a correct github url
BMDS Model Averaging - does not have a correct github url
GREENSCOPE Tool - does not have a correct github url
Montana Data Search Tools - Demo Code - does not have a correct github url
Water Network Tool for Resilience - does not have a correct github url
US EPA Region 7 Introduction to R Workshop - does not have a correct github url
Drupal WebCMS GitHub Repository - does not have a correct github url
CityWaterBalance - does not have a correct github url
modelling_hab_indices - does not have a correct github url
dev_1_fish_advisories - does not have a correct github url
Create public APEX Page - does not have a correct github url
Software Design, Development and Implementation QA Tracking Database Model - does not have a correct github url
Envite - 2.0 - does not have a correct github url
EPAs Avoided

HTTPError: HTTP Error 502: Bad Gateway

In [64]:
print(github_data)

[{'data': {'repository': {'nameWithOwner': 'nsacyber/Windows-Secure-Host-Baseline', 'issues': {'totalCount': 58}, 'forkCount': 160, 'watchers': {'totalCount': 171}, 'stargazers': {'totalCount': 902}, 'pullRequests': {'totalCount': 6}}}}, {'data': {'repository': {'nameWithOwner': 'nsacyber/WALKOFF-Apps', 'issues': {'totalCount': 1}, 'forkCount': 19, 'watchers': {'totalCount': 20}, 'stargazers': {'totalCount': 53}, 'pullRequests': {'totalCount': 2}}}}, {'data': {'repository': {'nameWithOwner': 'usnationalarchives/Twitterbot', 'issues': {'totalCount': 1}, 'forkCount': 4, 'watchers': {'totalCount': 6}, 'stargazers': {'totalCount': 3}, 'pullRequests': {'totalCount': 0}}}}, {'data': {'repository': {'nameWithOwner': 'fecgov/fec-proxy', 'issues': {'totalCount': 22}, 'forkCount': 3, 'watchers': {'totalCount': 24}, 'stargazers': {'totalCount': 3}, 'pullRequests': {'totalCount': 74}}}}, {'data': {'repository': {'nameWithOwner': 'fecgov/fec-pattern-library', 'issues': {'totalCount': 63}, 'forkCoun