In [2]:
from github import Github
import pandas as pd
import os
g = Github(os.environ['GITHUBTOKEN'])
import requests

# Repository Query (get request)

The following request is made to get all repository with stars > 500. They're sorted by number of stars

In [91]:
request = requests.get('https://api.github.com/search/repositories?q=stars:>500&sort=stars&order=desc')
results = request.json()
results

{'total_count': 52449,
 'incomplete_results': False,
 'items': [{'id': 28457823,
   'node_id': 'MDEwOlJlcG9zaXRvcnkyODQ1NzgyMw==',
   'name': 'freeCodeCamp',
   'full_name': 'freeCodeCamp/freeCodeCamp',
   'private': False,
   'owner': {'login': 'freeCodeCamp',
    'id': 9892522,
    'node_id': 'MDEyOk9yZ2FuaXphdGlvbjk4OTI1MjI=',
    'avatar_url': 'https://avatars0.githubusercontent.com/u/9892522?v=4',
    'gravatar_id': '',
    'url': 'https://api.github.com/users/freeCodeCamp',
    'html_url': 'https://github.com/freeCodeCamp',
    'followers_url': 'https://api.github.com/users/freeCodeCamp/followers',
    'following_url': 'https://api.github.com/users/freeCodeCamp/following{/other_user}',
    'gists_url': 'https://api.github.com/users/freeCodeCamp/gists{/gist_id}',
    'starred_url': 'https://api.github.com/users/freeCodeCamp/starred{/owner}{/repo}',
    'subscriptions_url': 'https://api.github.com/users/freeCodeCamp/subscriptions',
    'organizations_url': 'https://api.github.com/u

# Request results

The number of results is limited to 30. So there must be made a new request made with page=2

The request returns a header with the name link, where the next url is provided

So to iterate over all projects we have to do the following steps:

1. make first request
2. process results
3. parse `request.headers['link']` and get the next link
4. continue with 1 but with the next link

In [87]:
len(results['items'])

30

In [80]:
request.headers['link'].split(',')[0]

'<https://api.github.com/search/repositories?q=stars%3A%3E500&sort=stars&order=desc&page=2>; rel="next"'

In [90]:
for url in request.headers['link'].split(', '):
    if url.find("rel=\"next\"") != -1:
        next_url = url.split(';')[0][1:-1]
        print(f'found url: {next_url}')

found url: https://api.github.com/search/repositories?q=stars%3A%3E500&sort=stars&order=desc&page=2


# Check if repo contains language 'Gherkin'

1. make request for languages
2. find 'Gherkin' in list of languages

In [93]:
wanted = []

for repo in results['items']:
    if ('Gherkin' in requests.get(results['items'][0]['languages_url']).json().keys()):
        wanted.append(repo)
wanted

[]

# Other stuff

In [46]:
df = pd.DataFrame(results['items'])
df.head()

Unnamed: 0,id,node_id,name,full_name,private,owner,html_url,description,fork,url,...,mirror_url,archived,disabled,open_issues_count,license,forks,open_issues,watchers,default_branch,score
0,28457823,MDEwOlJlcG9zaXRvcnkyODQ1NzgyMw==,freeCodeCamp,freeCodeCamp/freeCodeCamp,False,"{'login': 'freeCodeCamp', 'id': 9892522, 'node...",https://github.com/freeCodeCamp/freeCodeCamp,freeCodeCamp.org's open source codebase and cu...,False,https://api.github.com/repos/freeCodeCamp/free...,...,,False,False,292,"{'key': 'bsd-3-clause', 'name': 'BSD 3-Clause ...",25073,292,316958,master,1.0
1,177736533,MDEwOlJlcG9zaXRvcnkxNzc3MzY1MzM=,996.ICU,996icu/996.ICU,False,"{'login': '996icu', 'id': 48942249, 'node_id':...",https://github.com/996icu/996.ICU,Repo for counting stars and contributing. Pres...,False,https://api.github.com/repos/996icu/996.ICU,...,,False,False,16784,"{'key': 'other', 'name': 'Other', 'spdx_id': '...",21116,16784,252571,master,1.0
2,11730342,MDEwOlJlcG9zaXRvcnkxMTczMDM0Mg==,vue,vuejs/vue,False,"{'login': 'vuejs', 'id': 6128107, 'node_id': '...",https://github.com/vuejs/vue,"🖖 Vue.js is a progressive, incrementally-adopt...",False,https://api.github.com/repos/vuejs/vue,...,,False,False,542,"{'key': 'mit', 'name': 'MIT License', 'spdx_id...",27271,542,175598,dev,1.0
3,13491895,MDEwOlJlcG9zaXRvcnkxMzQ5MTg5NQ==,free-programming-books,EbookFoundation/free-programming-books,False,"{'login': 'EbookFoundation', 'id': 14127308, '...",https://github.com/EbookFoundation/free-progra...,:books: Freely available programming books,False,https://api.github.com/repos/EbookFoundation/f...,...,,False,False,56,"{'key': 'other', 'name': 'Other', 'spdx_id': '...",39220,56,166055,master,1.0
4,10270250,MDEwOlJlcG9zaXRvcnkxMDI3MDI1MA==,react,facebook/react,False,"{'login': 'facebook', 'id': 69631, 'node_id': ...",https://github.com/facebook/react,"A declarative, efficient, and flexible JavaScr...",False,https://api.github.com/repos/facebook/react,...,,False,False,651,"{'key': 'mit', 'name': 'MIT License', 'spdx_id...",31668,651,159484,master,1.0


In [47]:
df[['full_name', 'stargazers_count']]

Unnamed: 0,full_name,stargazers_count
0,freeCodeCamp/freeCodeCamp,316958
1,996icu/996.ICU,252571
2,vuejs/vue,175598
3,EbookFoundation/free-programming-books,166055
4,facebook/react,159484
5,tensorflow/tensorflow,150749
6,sindresorhus/awesome,146483
7,twbs/bootstrap,145379
8,jwasham/coding-interview-university,142716
9,kamranahmedse/developer-roadmap,136065


In [48]:
df.columns

Index(['id', 'node_id', 'name', 'full_name', 'private', 'owner', 'html_url',
       'description', 'fork', 'url', 'forks_url', 'keys_url',
       'collaborators_url', 'teams_url', 'hooks_url', 'issue_events_url',
       'events_url', 'assignees_url', 'branches_url', 'tags_url', 'blobs_url',
       'git_tags_url', 'git_refs_url', 'trees_url', 'statuses_url',
       'languages_url', 'stargazers_url', 'contributors_url',
       'subscribers_url', 'subscription_url', 'commits_url', 'git_commits_url',
       'comments_url', 'issue_comment_url', 'contents_url', 'compare_url',
       'merges_url', 'archive_url', 'downloads_url', 'issues_url', 'pulls_url',
       'milestones_url', 'notifications_url', 'labels_url', 'releases_url',
       'deployments_url', 'created_at', 'updated_at', 'pushed_at', 'git_url',
       'ssh_url', 'clone_url', 'svn_url', 'homepage', 'size',
       'stargazers_count', 'watchers_count', 'language', 'has_issues',
       'has_projects', 'has_downloads', 'has_wiki', 'has

In [49]:
gherkin_repos = requests.get('https://api.github.com/search/repositories?q=language:Gherkin&sort=stars&order=desc').json()

In [50]:
gherkin_repos

{'total_count': 4385,
 'incomplete_results': False,
 'items': [{'id': 5573275,
   'node_id': 'MDEwOlJlcG9zaXRvcnk1NTczMjc1',
   'name': 'sdkman-cli',
   'full_name': 'sdkman/sdkman-cli',
   'private': False,
   'owner': {'login': 'sdkman',
    'id': 2574205,
    'node_id': 'MDEyOk9yZ2FuaXphdGlvbjI1NzQyMDU=',
    'avatar_url': 'https://avatars0.githubusercontent.com/u/2574205?v=4',
    'gravatar_id': '',
    'url': 'https://api.github.com/users/sdkman',
    'html_url': 'https://github.com/sdkman',
    'followers_url': 'https://api.github.com/users/sdkman/followers',
    'following_url': 'https://api.github.com/users/sdkman/following{/other_user}',
    'gists_url': 'https://api.github.com/users/sdkman/gists{/gist_id}',
    'starred_url': 'https://api.github.com/users/sdkman/starred{/owner}{/repo}',
    'subscriptions_url': 'https://api.github.com/users/sdkman/subscriptions',
    'organizations_url': 'https://api.github.com/users/sdkman/orgs',
    'repos_url': 'https://api.github.com/user

In [84]:
request = requests.get('https://api.github.com/search/repositories?q=stars:>500&sort=stars&order=desc')
request.json()

{'total_count': 52448,
 'incomplete_results': False,
 'items': [{'id': 28457823,
   'node_id': 'MDEwOlJlcG9zaXRvcnkyODQ1NzgyMw==',
   'name': 'freeCodeCamp',
   'full_name': 'freeCodeCamp/freeCodeCamp',
   'private': False,
   'owner': {'login': 'freeCodeCamp',
    'id': 9892522,
    'node_id': 'MDEyOk9yZ2FuaXphdGlvbjk4OTI1MjI=',
    'avatar_url': 'https://avatars0.githubusercontent.com/u/9892522?v=4',
    'gravatar_id': '',
    'url': 'https://api.github.com/users/freeCodeCamp',
    'html_url': 'https://github.com/freeCodeCamp',
    'followers_url': 'https://api.github.com/users/freeCodeCamp/followers',
    'following_url': 'https://api.github.com/users/freeCodeCamp/following{/other_user}',
    'gists_url': 'https://api.github.com/users/freeCodeCamp/gists{/gist_id}',
    'starred_url': 'https://api.github.com/users/freeCodeCamp/starred{/owner}{/repo}',
    'subscriptions_url': 'https://api.github.com/users/freeCodeCamp/subscriptions',
    'organizations_url': 'https://api.github.com/u

In [None]:
from .find_gherkin_repos import 
while get_next_link(request.headers['link']) != -1:
    results = request.json()
    for repo in results['items']:
        if 'Gherkin' in requests.get(results['items'][0]['languages_url']).json().keys():
            logger.info(f'found repo: {repo.full_name}')
            wanted.append(repo)

    logger.info(wanted)
    request = requests.get(get_next_link(request.headers['link']))