#API Configuration

##API base *URL*, *access* token, and any required headers.

In [3]:
import requests
import json
from google.colab import userdata

BASE_URL = "https://api.github.com/"

ACCESS_TOKEN = userdata.get("ACCESS_TOKEN")

ACCEPT_HEADER_DEFAULT = "application/vnd.github+json"
ACCEPT_HEADER_TEXT_MATCH = "application/vnd.github.text-match+json"

HEADERS = {
    "Authorization": f"Bearer {ACCESS_TOKEN}",
    "Accept": ACCEPT_HEADER_DEFAULT,
}

def print_response(response):
  if response.status_code == 200:
    print("Success")
  else:
    print(f"Error: {response.status_code}")

  print(f"Response Headers:\n{json.dumps(dict(response.headers), indent=2)}")
  print()
  print(f"Response Body:\n{json.dumps(response.json(), indent=2)}")

##Handling Rate Limit

In [4]:
import time

def get_remaining_rate_limit(headers: dict) -> int:
  """
  Extract the remaining rate limit from headers.

  Args:
    headers (dict): The response headers containing rate-limit information.

  Returns:
    int: The number of remaining requests.
  """
  return int(headers.get("X-RateLimit-Remaining", 1))

def is_rate_limit_exceeded(status_code: int, headers: dict) -> bool:
  """
  Check if the rate limit has been exceeded.

  Args:
    status_code (int): The HTTP status code from the response.
    headers (dict): The response headers containing rate-limit information.

  Returns:
    bool: True if the rate limit is exceeded, False otherwise.
  """
  remaining = get_remaining_rate_limit(headers)
  return status_code in (403, 429) and remaining == 0

def handle_rate_limit(headers: dict) -> None:
  """
  Handle the rate limit by checking the remaining requests and pausing if needed.

  Args:
    headers (dict): The response headers containing rate-limit information.
  """
  remaining = get_remaining_rate_limit(headers)
  reset_timestamp = int(headers.get("X-RateLimit-Reset", time.time()))

  if remaining == 0:
    reset_time = max(reset_timestamp - time.time(), 0)
    print(f"Rate limit reached. Sleeping for {reset_time:.2f} seconds.")
    time.sleep(reset_time + 5)  # Add a small buffer before resuming

##Handling Pagination

In [5]:
def next_page_exists(headers: dict) -> bool:
  """
  Check if there is a next page in the 'Link' header.

  Args:
    headers (dict): The response headers.

  Returns:
    bool: True if a next page exists, False otherwise.
  """
  if "Link" in headers:
    links = headers["Link"].split(",")
    return any('rel="next"' in link for link in links)
  return False

def fetch_all_data(
    url: str,
    headers: dict,
    params: dict,
    data_field_name: str = None,
) -> dict | list:
  """
  Fetch all data from a paginated API endpoint.

  Args:
    url (str): The API endpoint URL.
    headers (dict): Headers for the request.
    params (dict): Query parameters for the request.
    data_field_name (str): Optional. The field name in the response JSON to aggregate data.

  Returns:
    dict | list: The aggregated data from all pages, either as a dictionary or list.
  """
  all_data = []

  while True:
    params["page"] = params.get("page", 1)

    response = requests.get(url, headers=headers, params=params)

    if is_rate_limit_exceeded(response.status_code, response.headers):
      handle_rate_limit(response.headers)
      continue

    response_data = response.json()

    if response.status_code == 200:
      if data_field_name:
        all_data.extend(response_data.get(data_field_name, []))
      else:
        all_data.extend(response_data if isinstance(response_data, list) else [response_data])

      if next_page_exists(response.headers):
        params["page"] += 1
      else:
        break
    else:
      print(f"Error: {response.status_code} - {response.text}")
      break

  return all_data

#Search Repositories

In [6]:
SEARCH_REPOSITORIES_PATH = "search/repositories"
SEARCH_REPOSITORIES_URL = BASE_URL + SEARCH_REPOSITORIES_PATH

##Headers

| Name     | Type   | Description|
|----------|--------|-------------------------------------------------------------------------------------------------------------------|
| `accept` | string | `application/vnd.github+json` - is recommended.<br>`application/vnd.github.text-match+json` - when searching for repositories, you can get text match metadata for the name and description fields when you pass the text-match media type. For more details about how to receive highlighted search results, see [Text match metadata](https://docs.github.com/rest/search/search#text-match-metadata).|

##Query Parameters
| Name       | Type      | Description|
|------------|-----------|--------------------------------------------------------------------------------------------------------------|
| `q`        | string    | **Required.** The query contains one or more search keywords and qualifiers. Qualifiers allow you to limit your search to specific areas of GitHub. The REST API supports the same qualifiers as the web interface for GitHub.<br>To learn more about the format of the query, see [Constructing a search query](https://docs.github.com/en/github/searching-for-information-on-github/constructing-a-search-query). See [Searching for repositories](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories) for a detailed list of qualifiers.                                                                   |
| `sort`     | string    | Sorts the results of your query by number of stars, forks, help-wanted-issues, or how recently the items were updated.<br>**Default:** `best match`<br>**Can be one of:** `stars`, `forks`, `help-wanted-issues`, `updated`                           |
| `order`    | string    | Determines whether the first search result returned is the highest number of matches (`desc`) or lowest number of matches (`asc`).<br>This parameter is ignored unless you provide `sort`.<br>**Default:** `desc`<br>**Can be one of:** `desc`, `asc`                                                                                                                                   |
| `per_page` | integer   | The number of results per page (**max 100**).<br>For more information, see ["Using pagination in the REST API"](https://docs.github.com/rest/guides/using-pagination-in-the-rest-api).<br>**Default:** `30`                                            |
| `page`     | integer   | The page number of the results to fetch.<br>For more information, see ["Using pagination in the REST API"](https://docs.github.com/rest/guides/using-pagination-in-the-rest-api).<br>**Default:** `1`                                             |


In [7]:
OPTIONS_SORT = ["stars", "help-wanted-issues", "forks", "updated"]
OPTIONS_ORDER = ["desc", "asc"]

params = {
    "q": "machine learning",
    "sort": OPTIONS_SORT[0],
    "order": OPTIONS_ORDER[0],
    "per_page": 100,
    "page": 1,
}

##API Requests

###Single request

In [8]:
response = requests.get(SEARCH_REPOSITORIES_URL, headers = HEADERS, params = params)
print_response(response)

Success
Response Headers:
{
  "Date": "Sat, 30 Nov 2024 18:03:31 GMT",
  "Content-Type": "application/json; charset=utf-8",
  "Cache-Control": "no-cache",
  "Vary": "Accept, Authorization, Cookie, X-GitHub-OTP,Accept-Encoding, Accept, X-Requested-With",
  "github-authentication-token-expiration": "2024-12-01 00:00:00 +0300",
  "X-GitHub-Media-Type": "github.v3; format=json",
  "Link": "<https://api.github.com/search/repositories?q=machine+learning&sort=stars&order=desc&per_page=100&page=2>; rel=\"next\", <https://api.github.com/search/repositories?q=machine+learning&sort=stars&order=desc&per_page=100&page=10>; rel=\"last\"",
  "x-accepted-github-permissions": "allows_permissionless_access=true",
  "x-github-api-version-selected": "2022-11-28",
  "X-RateLimit-Limit": "30",
  "X-RateLimit-Remaining": "29",
  "X-RateLimit-Reset": "1732989871",
  "X-RateLimit-Used": "1",
  "X-RateLimit-Resource": "search",
  "Access-Control-Expose-Headers": "ETag, Link, Location, Retry-After, X-GitHub-OTP,

###Extract All Repositories (up to 1000)

In [9]:
all_repositories = fetch_all_data(SEARCH_REPOSITORIES_URL, headers = HEADERS, params = params, data_field_name="items")
print(f"All Repositories:\n{json.dumps(all_repositories, indent=2)}")

Output hidden; open in https://colab.research.google.com to view.

#List commits

In [10]:
def get_list_commits_path(owner: str, repo: str) -> str:
  """
  Construct the API endpoint URL for retrieving a list of commits.

  Args:
    owner (str): The owner of the repository.
    repo (str): The repository name.

  Returns:
    str: The full API URL for the commits endpoint.
  """
  return f"{BASE_URL}repos/{owner}/{repo}/commits"

##Headers

| Name     | Type   | Description                                    |
|----------|--------|------------------------------------------------|
| `accept` | string | `application/vnd.github+json` - is recommended.|

## Path Parameters
| Name    | Type   | Description                                               |
|---------|--------|-----------------------------------------------------------|
| `owner` | string | **Required**. The account owner of the repository. The name is not case sensitive.                                                    |
| `repo`  | string | **Required**. The name of the repository without the `.git` extension. The name is not case sensitive.                                |

In [11]:
list_commits_path = get_list_commits_path("qGiS", "QwC2")

##Query Parameters
| Name        | Type     | Description                                        |
|-------------|----------|----------------------------------------------------|
| `sha`       | string   | SHA or branch to start listing commits from. **Default**: the repository’s default branch (usually `main`).                |
| `path`      | string   | Only commits containing this file path will be returned.                                                                     |
| `author`    | string   | GitHub username or email address to use to filter by commit author.                                                                |
| `committer` | string   | GitHub username or email address to use to filter by commit committer.                                                             |
| `since`     | string   | Only show results that were last updated after the given time. This is a timestamp in [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) format: `YYYY-MM-DDTHH:MM:SSZ`. Due to Git limitations, timestamps must be between `1970-01-01` and `2099-12-31` (inclusive), or unexpected results may occur.                                                            |
| `until`     | string   | Only commits before this date will be returned. This is a timestamp in [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) format: `YYYY-MM-DDTHH:MM:SSZ`. Due to Git limitations, timestamps must be between `1970-01-01` and `2099-12-31` (inclusive), or unexpected results may occur.   |
| `per_page`  | integer  | The number of results per page (**max 100**). For more information, see ["Using pagination in the REST API"](https://docs.github.com/en/rest/guides/using-pagination-in-the-rest-api). **Default**: `30`.      |
| `page`      | integer  | The page number of the results to fetch. For more information, see ["Using pagination in the REST API"](https://docs.github.com/en/rest/guides/using-pagination-in-the-rest-api). **Default**: `1`.           |


In [12]:
params = {
    "sha": "4ad2e232866b451d341129e69b877d563228d2a6",
    "path": ".github/workflows/jest.yml",
    "author": "BenediktSeidlSWM",
    "since": "2024-05-31",
    "until": "2024-10-31",
    "per_page": 100,
    "page": 1,
}

##API Requests

###Single request

In [13]:
response = requests.get(list_commits_path, headers = HEADERS, params = params)
print_response(response)

Success
Response Headers:
{
  "Date": "Sat, 30 Nov 2024 18:04:00 GMT",
  "Content-Type": "application/json; charset=utf-8",
  "Cache-Control": "private, max-age=60, s-maxage=60",
  "Vary": "Accept, Authorization, Cookie, X-GitHub-OTP,Accept-Encoding, Accept, X-Requested-With",
  "ETag": "W/\"09a2870a2438bdf5970bd4f99019c1e58e657cf09afa8eccb7016f5e60254dc0\"",
  "Last-Modified": "Thu, 27 Jun 2024 07:29:45 GMT",
  "github-authentication-token-expiration": "2024-12-01 00:00:00 +0300",
  "X-GitHub-Media-Type": "github.v3; format=json",
  "x-accepted-github-permissions": "contents=read",
  "x-github-api-version-selected": "2022-11-28",
  "X-RateLimit-Limit": "5000",
  "X-RateLimit-Remaining": "4999",
  "X-RateLimit-Reset": "1732993440",
  "X-RateLimit-Used": "1",
  "X-RateLimit-Resource": "core",
  "Access-Control-Expose-Headers": "ETag, Link, Location, Retry-After, X-GitHub-OTP, X-RateLimit-Limit, X-RateLimit-Remaining, X-RateLimit-Used, X-RateLimit-Resource, X-RateLimit-Reset, X-OAuth-Sco

###Extract All Commits

In [None]:
params = {
    "per_page": 100,
    "page": 1,
}

all_commits = fetch_all_data(list_commits_path, headers = HEADERS, params = params)
print(f"All Commits:\n{json.dumps(all_commits, indent=2)}")

#Get Repository Content

In [None]:
def get_repository_content_path(owner: str, repo: str, path: str) -> str:
  """
  Construct the API endpoint URL for retrieving the contents of a repository.

  Args:
    owner (str): The username or organization name that owns the repository.
    repo (str): The name of the repository.
    path (str): The file or directory path within the repository.

  Returns:
    str: The constructed URL for accessing the repository contents.
  """
  return f"{BASE_URL}/repos/{owner}/{repo}/contents/{path}"

##Headers
| Name    | Type   | Description                                               |
|---------|--------|-----------------------------------------------------------|
| accept  | string | `application/vnd.github+json` - is recommended.<br>`application/vnd.github.raw+json` - returns the raw file contents for files and symlinks.<br>`application/vnd.github.html+json` - returns the file contents in HTML. Markup languages are rendered to HTML using GitHub's open-source Markup library.<br>`application/vnd.github.object+json` - returns the contents in a consistent object format regardless of the content type. For example, instead of an array of objects for a directory, the response will be an object with an `entries` attribute containing the array of objects.                   |


## Path Parameters
| Name  | Type   | Description                                                 |
|-------|--------|-------------------------------------------------------------|
| owner | string | Required. The account owner of the repository. The name is not case sensitive.                                                            |
| repo  | string | Required. The name of the repository without the `.git` extension. The name is not case sensitive.                                     |
| path  | string | Required. The file path or directory in the repository.     |

In [None]:
repository_content_path = get_repository_content_path("qGiS", "QgIs", "src")

##Query Parameters
| Name  | Type   | Description                                                 |
|-------|--------|-------------------------------------------------------------|
| ref   | string | The name of the commit/branch/tag. Default: the repository’s default branch.                                                                |


In [None]:
params = {
    "ref": "empty_interior_wkt"
}

##API Requests

In [None]:
response = requests.get(repository_content_path, headers = HEADERS, params = params)
print_response(response)

Success
Response Headers:
{
  "Date": "Sat, 30 Nov 2024 18:04:22 GMT",
  "Content-Type": "application/json; charset=utf-8",
  "Cache-Control": "private, max-age=60, s-maxage=60",
  "Vary": "Accept, Authorization, Cookie, X-GitHub-OTP,Accept-Encoding, Accept, X-Requested-With",
  "ETag": "W/\"43630bf533b5a3d4821a6a89feed85a222f07254\"",
  "Last-Modified": "Sat, 30 Nov 2024 16:22:27 GMT",
  "github-authentication-token-expiration": "2024-12-01 00:00:00 +0300",
  "X-GitHub-Media-Type": "github.v3; format=json",
  "x-accepted-github-permissions": "contents=read",
  "x-github-api-version-selected": "2022-11-28",
  "X-RateLimit-Limit": "5000",
  "X-RateLimit-Remaining": "4956",
  "X-RateLimit-Reset": "1732993440",
  "X-RateLimit-Used": "44",
  "X-RateLimit-Resource": "core",
  "Access-Control-Expose-Headers": "ETag, Link, Location, Retry-After, X-GitHub-OTP, X-RateLimit-Limit, X-RateLimit-Remaining, X-RateLimit-Used, X-RateLimit-Resource, X-RateLimit-Reset, X-OAuth-Scopes, X-Accepted-OAuth-S