In [17]:
!pip install requests
!pip install python-dotenv



In [22]:
import requests
import os

from dotenv import load_dotenv
load_dotenv()

True

In [None]:
GH_KEY = os.getenv('GH_KEY')
GH_ROOT = os.getenv('GH_ROOT')
GH_ACCEPT = os.getenv('GH_ACCEPT')

# Public repositories

**Endpoint**: https://api.gihub.com/repositories

**Method**  : **GET**

**Headers**
- Authorization: Bearer
- Accept       : application/vnd.github+json (optional)

| Status Code | Description |
|-----------|-----------|
| **200 **   | OK    |
| **304**    | Not Modified    |
| **422**    | Validation failed, or the endpoint has been spammed    |



In [None]:
headers = {
    'Authorization': f'Bearer {GH_KEY}',
    'Accept': GH_ACCEPT
}
params = {
    'since': 1000
}
response = requests.request('GET', f'{GH_ROOT}/repositories?since=1000000', headers=headers, params=params)
# Gets 100 repositories
print(f"""
Status Code: {response.status_code}
Records:     {len(response.json())}
Content:     {response.content}
""")


Status Code: 422
Records:     3
Content:     b'{"message":"The since parameter must be an integer.","documentation_url":"https://docs.github.com/rest/repos/repos#list-public-repositories","status":"422"}'



# Commits

**Endpoint**: https://api.gihub.com/repos/OWNER/REPO/commits

**Method**  : **GET**

**Headers**
- Authorization: Bearer
- Accept       : application/vnd.github+json (optional)

**Path Parameters**
- OWNER
- REPO

| Status Code | Description |
|-----------|-----------|
| **200 **   | OK    |
| **400**    | Not Modified    |
| **404**    | NOT FOUND    |
| **409**    | Conflict    |
| **500**    | Internal error / Server Error   |

In [None]:
headers = {
    'Authorization': f'Bearer {GH_KEY}',
    'Accept': GH_ACCEPT
}

response = requests.request('GET', f'{GH_ROOT}/repos/KevinMrkz3221/Data-Source-API-Analyst-Test/commits', headers=headers)

print(f"""
Status Code: {response.status_code}
Records:     {len(response.json())}
Content:     {response.content}
""")


Status Code: 200
Records:     1
Content:     b'[{"sha":"d4e38340606ee14335b2293a9bbd96efe8b0f7d9","node_id":"C_kwDOPAKgedoAKGQ0ZTM4MzQwNjA2ZWUxNDMzNWIyMjkzYTliYmQ5NmVmZThiMGY3ZDk","commit":{"author":{"name":"Kevin Andres Rosales Marquez","email":"kevin.rosales98@hotmail.com","date":"2025-06-24T02:13:07Z"},"committer":{"name":"Kevin Andres Rosales Marquez","email":"kevin.rosales98@hotmail.com","date":"2025-06-24T02:13:07Z"},"message":"first commit","tree":{"sha":"3088daf39cb2e870b8ff65d524cbb244c16f7d74","url":"https://api.github.com/repos/KevinMrkz3221/Data-Source-API-Analyst-Test/git/trees/3088daf39cb2e870b8ff65d524cbb244c16f7d74"},"url":"https://api.github.com/repos/KevinMrkz3221/Data-Source-API-Analyst-Test/git/commits/d4e38340606ee14335b2293a9bbd96efe8b0f7d9","comment_count":0,"verification":{"verified":false,"reason":"unsigned","signature":null,"payload":null,"verified_at":null}},"url":"https://api.github.com/repos/KevinMrkz3221/Data-Source-API-Analyst-Test/commits/d4e38340606ee1

# Contents

**Endpoint**: https://api.gihub.com/repos/OWNER/REPO/contents/PATH

**Method**  : **GET**

**Headers**
- Authorization: Bearer
- Accept       : application/vnd.github+json (optional)

**Path Parameters**
- OWNER
- REPO
- PATH -> Optional

| Status Code | Description |
|-----------|-----------|
| **200**   | OK    |
| **302**    | Found   |
| **304**    | Not modified    |
| **403**    | Forbidden    |
| **404**    | Not Found   |

In [None]:
headers = {
    'Authorization': f'Bearer {GH_KEY}',
    'Accept': GH_ACCEPT
}

response = requests.request('GET', f'{GH_ROOT}/repos/KevinMrkz3221/Data-Source-API-Analyst-Test/content', headers=headers)

print(f"""
Status Code: {response.status_code}
Content:     {response.content}
""")


Status Code: 404
Content:     b'{"message":"Not Found","documentation_url":"https://docs.github.com/rest","status":"404"}'



# Extra

In [33]:
import time

def handle_rate_limit(response):
    if response.status_code == 403 and 'X-RateLimit-Remaining' in response.headers:
        remaining = int(response.headers['X-RateLimit-Remaining'])
        if remaining == 0:
            reset_time = int(response.headers['X-RateLimit-Reset'])
            sleep_time = max(reset_time - int(time.time()), 0) + 1
            print(f"Rate limit reached. Sleeping for {sleep_time} seconds...")
            time.sleep(sleep_time)
            return True
    return False

In [37]:
# initial id from you want to start
# iterations: Number of times you want to made the request incrementing 100 by 100
def pagination_repositories(url, headers, params=None, initial=100,iterations=3):
    results = []
    page = 1
    while page <= iterations:
        if params:
            params['since'] = initial
        else:
            params = {'since': initial}
        response = requests.get(url, headers=headers, params=params)
        if handle_rate_limit(response):
            continue
        if response.status_code != 200:
            print(f"Error: {response.status_code}")
            break
        data = response.json()
        if not data:
            break
        results.extend(data)
        print(f"Number of repositories: {len(data)} records")
        print(f"First repository of page \n {data[0]}")

        page += 1
        initial += 100
    return results

In [38]:
commits_url = f"{GH_ROOT}/repositories"
all_commits = pagination_repositories(commits_url, headers)
print(f"Total commits extracted: {len(all_commits)}")
print("Sample commit:", all_commits[0] if all_commits else "No commits found.")

Number of repositories: 100 records
First repository of page 
 {'id': 102, 'node_id': 'MDEwOlJlcG9zaXRvcnkxMDI=', 'name': 'gsa-prototype', 'full_name': 'jnewland/gsa-prototype', 'private': False, 'owner': {'login': 'jnewland', 'id': 47, 'node_id': 'MDQ6VXNlcjQ3', 'avatar_url': 'https://avatars.githubusercontent.com/u/47?v=4', 'gravatar_id': '', 'url': 'https://api.github.com/users/jnewland', 'html_url': 'https://github.com/jnewland', 'followers_url': 'https://api.github.com/users/jnewland/followers', 'following_url': 'https://api.github.com/users/jnewland/following{/other_user}', 'gists_url': 'https://api.github.com/users/jnewland/gists{/gist_id}', 'starred_url': 'https://api.github.com/users/jnewland/starred{/owner}{/repo}', 'subscriptions_url': 'https://api.github.com/users/jnewland/subscriptions', 'organizations_url': 'https://api.github.com/users/jnewland/orgs', 'repos_url': 'https://api.github.com/users/jnewland/repos', 'events_url': 'https://api.github.com/users/jnewland/events{/