In [9]:
# imports
import requests
import json
import csv
import time
import datetime
from string import Template
from Config import *

In [10]:
# Variables
headers = {"Authorization": API_TOKEN}
min_stars = 0
max_stars = 1000
last_activity = 90 # within the last __ days
created = 364 * 4 # within the last __ days
min_pull_num = 0 # amount of pull requests a repository needs
watchers = 0 # amount of watchers a repository needs

In [11]:
# Builds the query filter string compatible to github
def query_filter( min_stars, max_stars, last_activity, created ):
    date_last_act = datetime.datetime.now() - datetime.timedelta( days=last_activity )
    date_created = datetime.datetime.now() - datetime.timedelta( days=created )
    stars = f'{min_stars}..{max_stars}'

    return f'is:public archived:false fork:false stars:{stars} pushed:20{date_last_act:%y-%m-%d}..* created:20{date_created:%y-%m-%d}'

In [12]:
# Funtion that uses requests.post to make the API call
def run_query(query, variables):
    request = requests.post('https://api.github.com/graphql', 
                            json={'query': query, 'variables' : variables}, headers=headers)
    if request.status_code == 200:
        return request.json()
    else:
        raise Exception(f'ERROR [{request.status_code}]: Query failed to execute...\nRESPONSE: {request.text}')

In [13]:
def setup_query( end_cursor ) :
    query_string = query_filter( min_stars, max_stars, last_activity, created )
    variables = {
        "query_string" : query_filter( min_stars, max_stars, last_activity, created),
        "end_cursor" : end_cursor
    }
    query = '''
    query($query_string : String!) {
        rateLimit{
            cost
            remaining
            resetAt
        }
        search(query: $query_string, type: REPOSITORY, first:50) {
        pageInfo {
            hasNextPage
            endCursor
        }
            repositoryCount
            nodes {
                ... on Repository {
                    id
                    name
                    owner {
                        login
                    }
                    createdAt
                    isMirror
                    isFork
                    diskUsage
                    primaryLanguage {
                        name
                    }
                    languages(first:100) {
                        totalCount
                        nodes {
                            name
                        }
                    }
                    constributors : mentionableUsers {
                        totalCount
                    }
                    watchers {
                        totalCount
                    }
                    stargazers {
                        totalCount
                    }
                    forkCount
                    issues {
                        totalCount
                    }
                    commits : defaultBranchRef {
                        target {
                            ... on Commit {
                                history {
                                    totalCount
                                }
                            }
                        }
                    }
                    pullRequests {
                        totalCount
                    }
                    releases {
                        totalCount
                    }
                    licenseInfo {
                        id
                        name
                        pseudoLicense
                    }
                    url
                }
            }
        }
    }'''
    query2 = '''
    query($query_string : String!, $end_cursor : String!) {
        rateLimit{
            cost
            remaining
            resetAt
        }
        search(query: $query_string, type: REPOSITORY, first:50, after: $end_cursor) {
        pageInfo {
            hasNextPage
            endCursor
        }
            repositoryCount
            nodes {
                ... on Repository {
                    owner {
                        login
                    }
                    id
                    name
                    description
                    createdAt
                    pushedAt
                    isDisabled
                    isMirror
                    isFork
                    isLocked
                    diskUsage
                    primaryLanguage {
                        name
                    }
                    languages(first:100) {
                        totalCount
                        nodes {
                            name
                        }
                    }
                    constributors : mentionableUsers {
                        totalCount
                    }
                    watchers {
                        totalCount
                    }
                    stargazers {
                        totalCount
                    }
                    forkCount
                    issues {
                        totalCount
                    }
                    commits : defaultBranchRef {
                        target {
                            ... on Commit {
                                history {
                                    totalCount
                                }
                            }
                        }
                    }
                    pullRequests {
                        totalCount
                    }
                    releases {
                        totalCount
                    }
                    licenseInfo {
                        id
                        name
                        pseudoLicense
                    }
                    url
                }
            }
        }
    }'''
    if(end_cursor == "") : return (query, variables)
    else : return (query2, variables)

In [14]:
# Runs the query and iterates through all pages of repositories
def find_repos():

    end_cursor = ""
    end_cursor_string = ""
    hasNextPage = True
    index = 0
    
    print("[WORKING] Running script to collect all repositories. ")
    while( hasNextPage ):
        query = setup_query( end_cursor_string )
        result = run_query( query[0], query[1] )
        print(json.dumps(result))
        #repo_checker( result, db_collection, total_pull_num )

        try:
            # if there is a next page, update the endcursor string and continue loop
            if( result["data"]["search"]["pageInfo"]["hasNextPage"] ):
                end_cursor = result["data"]["search"]["pageInfo"]["endCursor"]
                end_cursor_string = f', after:"{end_cursor}"'
            else:
                hasNextPage = False
        except KeyError:
            hasNextPage = False
            print(json.dumps(result))

        index += 1
        time.sleep(1)

In [15]:
find_repos()

[WORKING] Running script to collect all repositories. 
{"data": {"rateLimit": {"cost": 1, "remaining": 4887, "resetAt": "2020-02-10T21:48:46Z"}, "search": {"pageInfo": {"hasNextPage": true, "endCursor": "Y3Vyc29yOjUw"}, "repositoryCount": 279, "nodes": [{"owner": {"login": "bol-van"}, "id": "MDEwOlJlcG9zaXRvcnk1MTc1NzQ1OA==", "name": "zapret", "description": "\u041e\u0431\u0445\u043e\u0434 DPI \u0432 linux", "createdAt": "2016-02-15T13:30:50Z", "pushedAt": "2020-02-05T13:57:49Z", "isDisabled": false, "isMirror": false, "isFork": false, "isLocked": false, "diskUsage": 10107, "primaryLanguage": {"name": "C"}, "languages": {"totalCount": 5, "nodes": [{"name": "Makefile"}, {"name": "Shell"}, {"name": "C"}, {"name": "C++"}, {"name": "Objective-C"}]}, "constributors": {"totalCount": 3}, "watchers": {"totalCount": 83}, "stargazers": {"totalCount": 915}, "forkCount": 139, "issues": {"totalCount": 43}, "commits": {"target": {"history": {"totalCount": 50}}}, "pullRequests": {"totalCount": 4}, "r

{"errors": [{"type": "INVALID_CURSOR_ARGUMENTS", "path": ["search", "pageInfo", "hasNextPage"], "locations": [{"line": 10, "column": 13}], "message": "`, after:\"Y3Vyc29yOjUw\"` does not appear to be a valid cursor."}]}
{"errors": [{"type": "INVALID_CURSOR_ARGUMENTS", "path": ["search", "pageInfo", "hasNextPage"], "locations": [{"line": 10, "column": 13}], "message": "`, after:\"Y3Vyc29yOjUw\"` does not appear to be a valid cursor."}]}
