In [1]:
import requests
import pandas as pd

In [3]:
# Function to fetch all pages of data
def fetch_all_pages(url, params, headers):
    all_data = []
    while url:
        response = requests.get(url,params=params, headers=headers)
        if response.status_code == 200:
            data = response.json()
            all_data.extend(data)
            url = response.links.get('next', {}).get('url')  # Get URL for next page if available
        else:
            print(f"Error code {response.status_code}: Failed to fetch data from {url}")
            break
    return all_data

def fetch_data(owner, repo, params={}, wanted_data="issues"):
    per_page = 100
    # GitHub API endpoints
    base_url = f"https://api.github.com/repos/{owner}/{repo}"
    url = f"{base_url}/{wanted_data}?per_page={per_page}"
    headers = {'User-Agent': 'request'}
    return fetch_all_pages(url, params, headers)

In [4]:
owner = "rails"
repo = "rails"

In [5]:
wanted_data = "milestones"
milestones_data = fetch_data(owner=owner, repo=repo, wanted_data=wanted_data)
milestones_df = pd.DataFrame.from_records(milestones_data)
milestones_df.columns

Index(['url', 'html_url', 'labels_url', 'id', 'node_id', 'number', 'title',
       'description', 'creator', 'open_issues', 'closed_issues', 'state',
       'created_at', 'updated_at', 'due_on', 'closed_at'],
      dtype='object')

In [6]:
milestones_df.iloc[0]

url              https://api.github.com/repos/rails/rails/miles...
html_url               https://github.com/rails/rails/milestone/75
labels_url       https://api.github.com/repos/rails/rails/miles...
id                                                         5391770
node_id                               MDk6TWlsZXN0b25lNTM5MTc3MA==
number                                                          75
title                                                        6.0.x
description                                                       
creator          {'login': 'eugeneius', 'id': 432189, 'node_id'...
open_issues                                                      3
closed_issues                                                   28
state                                                         open
created_at                                    2020-05-06T20:51:20Z
updated_at                                    2023-10-09T00:01:01Z
due_on                                                        

In [5]:
wanted_data = "releases"
releases_data = fetch_data(owner=owner, repo=repo, wanted_data=wanted_data)

In [7]:
releases_df = pd.DataFrame.from_records(releases_data)
releases_df.columns

Index(['url', 'assets_url', 'upload_url', 'html_url', 'id', 'author',
       'node_id', 'tag_name', 'target_commitish', 'name', 'draft',
       'prerelease', 'created_at', 'published_at', 'assets', 'tarball_url',
       'zipball_url', 'body', 'reactions'],
      dtype='object')

In [22]:
releases_df[["name", "prerelease", "created_at", "published_at", "body", "target_commitish"]]

Unnamed: 0,name,prerelease,created_at,published_at,body,target_commitish
0,,False,2024-02-21T21:45:37Z,2024-02-21T21:52:08Z,\r\n## Active Support\r\n\r\n* No changes.\r...,main
1,7.1.3.1,False,2024-02-21T18:46:05Z,2024-02-21T18:55:01Z,\r\n## Active Support\r\n\r\n* No changes.\r...,main
2,7.0.8.1,False,2024-02-21T18:42:34Z,2024-02-21T18:53:35Z,\r\n## Active Support\r\n\r\n* No changes.\r...,main
3,6.1.7.7,False,2024-02-21T18:38:36Z,2024-02-21T18:51:33Z,\r\n## Active Support\r\n\r\n* No changes.\r...,main
4,7.1.3,False,2024-01-16T22:53:08Z,2024-01-16T23:02:49Z,\r\n## Active Support\r\n\r\n* Handle nil `b...,main
...,...,...,...,...,...,...
109,5.1.7,False,2019-03-28T02:46:34Z,2019-03-28T02:53:55Z,## Active Support\r\n\r\n* No changes.\r\n\r...,master
110,5.2.3.rc1,True,2019-03-22T03:34:11Z,2019-03-22T03:38:45Z,## Active Support\r\n\r\n* Add `ActiveSuppor...,master
111,5.1.7.rc1,True,2019-03-22T04:11:59Z,2019-03-22T04:18:42Z,## Active Support\r\n\r\n* No changes.\r\n\r...,master
112,6.0.0.beta1,True,2019-01-18T20:43:03Z,2019-01-18T21:32:44Z,## Active Support\r\n\r\n* Remove deprecated...,master


In [23]:
wanted_data = "branches"
branches_data = fetch_data(owner=owner, repo=repo, wanted_data=wanted_data)

In [24]:
branches_df = pd.DataFrame.from_records(branches_data)
branches_df.columns

Index(['name', 'commit', 'protected'], dtype='object')

In [25]:
branches_df

Unnamed: 0,name,commit,protected
0,1-2-stable,{'sha': '5b3f7563ae1b4a7160fda7fe34240d40c5777...,True
1,2-0-stable,{'sha': '81d828a14c82b882e31612431a56f830bdc10...,True
2,2-1-stable,{'sha': 'b5d759fd2848146f7ee7a4c1b1a4be39e2f1a...,True
3,2-2-stable,{'sha': 'c6cb5a5ab00ac9e857e5b2757d2bce6a5ad14...,True
4,2-3-stable,{'sha': '89322cd467fee8d4fcc16f67a9e7fce5817f7...,True
...,...,...,...
80,zzak/39921,{'sha': 'c57a7cf6fb4764576451adf744d13be7d4c98...,False
81,zzak-39921,{'sha': 'c57a7cf6fb4764576451adf744d13be7d4c98...,False
82,zzak-debug-ci,{'sha': 'cfb574ad444b69bf48110534f747c5b6c2d4a...,False
83,zzak-patch-1,{'sha': '93974b7afd75924da3d421befeb117e46fc39...,False
