In [1]:
import requests # To make HTTP requests
import pandas as pd # For data manipulation, specifically to transform responses JSON data to DataFrame
import os # To handle file paths
import logging # For logging execution messages
import time # To introduce delays between requests when getting rate-limited

# Configure logging messages format
logging.basicConfig(format='%(levelname)s: %(message)s')
logging.getLogger().setLevel(logging.INFO)

BASE_URL = "https://api.github.com/" # Base URL for the GitHub API

# Optional github API token set

In [2]:
GITHUB_TOKEN = input("Enter your GitHub token: ").strip()  # Prompt user for GitHub token

if GITHUB_TOKEN is None or GITHUB_TOKEN == "":
    logging.info("GITHUB_TOKEN was not set. Some endpoints may not work, others may return limited data and the rate limit is smaller.")
    GITHUB_TOKEN = None
else:
    logging.info("GITHUB_TOKEN is set.")


INFO: GITHUB_TOKEN was not set. Some endpoints may not work, others may return limited data and the rate limit is smaller.


# Utility functions

Here I define some utility functions.

In [3]:
# Function to add authentication header if GITHUB_TOKEN was provided in the cell above
def add_auth_header_if_set(headers=None):
    """Add authentication header if GITHUB_TOKEN is provided."""
    if headers is None:
        headers = {}
    if GITHUB_TOKEN:
        headers['Authorization'] = f'Bearer {GITHUB_TOKEN}'
    return headers

# Function to save JSON response data to a JSON file
def save_json_to_file(data, filename, output_dir='output'):
    """Save JSON data to a file."""
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    file_path = os.path.join(output_dir, filename)
    with open(file_path, 'w') as f:
        import json
        json.dump(data, f, indent=4)
    logging.info(f"Data saved to {file_path}")

# Function to save JSON response data to a CSV file using pandas function 'json_normalize'
def save_json_to_csv(data, filename, output_dir='output'):
    """Save JSON data to a CSV file."""
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    file_path = os.path.join(output_dir, filename) 
    df = pd.json_normalize(data)
    df.to_csv(file_path, index=False)
    logging.info(f"Data saved to {file_path}")


# Function used to filter nested fields from the response dictionary got from the GitHub API
def extract_nested_fields(item, fields):
    """Recursively extract nested fields from a dictionary."""
    result = {}
    for field in fields:
        if isinstance(field, dict):
            # Handle nested fields like {"commit": ["message", {"author": ["name"]}]}
            for key, subfields in field.items():
                if key in item and isinstance(item[key], dict):
                    result[key] = extract_nested_fields(item[key], subfields)
        elif isinstance(field, str):
            # Handle simple string fields
            if field in item:
                result[field] = item[field]
    return result



# Rate limit check

In [4]:
def get_rate_limit():
    """Fetches the rate limit status for the authenticated user."""
    endpoint_path = "rate_limit"
    headers = {
    "Accept": "application/vnd.github+json",
    "X-GitHub-Api-Version": "2022-11-28"
    }
    headers = add_auth_header_if_set()
    response = requests.get(
        BASE_URL + endpoint_path,
        headers=headers
    )
    if response.status_code == 200:
        return response.json()
    else:
        logging.error(f"Failed to fetch rate limit: {response.status_code} - {response.text}")
        return []
    

In [5]:
rate_limit_json = get_rate_limit()
core_rate_limit = rate_limit_json.get('resources', {}).get('core', {})

# Calculate the time to reset the rate limit
reset_datetime = pd.to_datetime(core_rate_limit.get('reset', 'N/A'), unit='s', utc=True)
time_to_reset = reset_datetime - pd.Timestamp.utcnow()
time_to_reset_comps = time_to_reset.components

# Log the rate limit information
logging.info(f"Rate limit information for the current {'authenticated' if GITHUB_TOKEN else 'unauthenticated'} user:")
logging.info(f"Limit: {core_rate_limit.get('limit', 'N/A')}")
logging.info(f"Remaining: {core_rate_limit.get('remaining', 'N/A')}")
logging.info(f"Used: {core_rate_limit.get('used', 'N/A')}")
logging.info(f"Reset: {reset_datetime} (in {time_to_reset_comps.minutes}m {time_to_reset_comps.seconds}s)")


INFO: Rate limit information for the current unauthenticated user:
INFO: Limit: 60
INFO: Remaining: 52
INFO: Used: 8
INFO: Reset: 2025-06-24 04:41:45+00:00 (in 13m 24s)


# List public repositories

Documentation: https://docs.github.com/en/rest/repos/repos?apiVersion=2022-11-28#list-public-repositories

In [6]:
def get_public_repos(
        since_repository_id=None,
        filter_fields=False, fields=["name","description","html_url"]
    ):
    """Fetch repositories for a given organization."""
    
    endpoint_path = "/repositories"
    
    headers = {
    "Accept": "application/vnd.github+json",
    "X-GitHub-Api-Version": "2022-11-28"
    }
    headers = add_auth_header_if_set()
    
    query_params = {}
    if since_repository_id:
        if not isinstance(since_repository_id, int):
            raise ValueError("Since parameter must be an integer representing the repository ID.")
        query_params['since'] = since_repository_id
    
    response = requests.get(
        BASE_URL + endpoint_path + ("?" + "&".join(f"{key}={value}" for key, value in query_params.items()) if query_params else ""),
        headers=headers
    )

    if response.status_code == 200:
        if filter_fields:
            # Filter the response to include only specified fields
            contents = response.json()
            return [{key: item[key] for key in fields if key in item} for item in contents]
        else:
            return response.json()
    else:
        logging.error(f"Failed to fetch repositories: {response.status_code} - {response.text}")
        return []


In [7]:
response_json = get_public_repos(
    since_repository_id = 927842, 
    filter_fields=True)

save_json_to_file(response_json, "repos.json")  # Save the response to a file
save_json_to_csv(response_json, "repos.csv")  # Save the response to a CSV file
df = pd.json_normalize(response_json)  # Convert JSON to DataFrame
df

INFO: Data saved to output\repos.json
INFO: Data saved to output\repos.csv


Unnamed: 0,name,description,html_url
0,unit1,The unit 1 project for se362,https://github.com/SageKaz/unit1
1,soundslike,new search command filter based on soundex,https://github.com/marinus/soundslike
2,rack-esi,An implementation of a small (but still very u...,https://github.com/ook/rack-esi
3,batch_translations,Helpers to allow saving multiple Globalize2 tr...,https://github.com/eliasbaixas/batch_translations
4,sessionpicker,A session picker for ScaleCamp,https://github.com/dvydra/sessionpicker
...,...,...,...
95,MBProgressHUD,"MBProgressHUD, an iOS activity indicator view ...",https://github.com/NickTrienens/MBProgressHUD
96,myspec,,https://github.com/mahhek/myspec
97,python-calais,"my fork of python calais (for submodules, most...",https://github.com/eyeseast/python-calais
98,erlmediaserver,Erlang upnp mediaserver,https://github.com/joearms/erlmediaserver


# List organization repositories

Documentation: https://docs.github.com/en/rest/repos/repos?apiVersion=2022-11-28#list-organization-repositories

In [8]:
def get_org_repos(
        org,
        type=None, sort=None, direction=None,
        per_page=100, page=1,
        filter_fields=False, fields=["name","description","homepage","stargazers_count","watchers_count","created_at","updated_at","pushed_at","size","git_url","html_url"]
    ):

    """Fetch repositories for a given organization."""
    
    endpoint_path = "orgs/{org}/repos"
    
    headers = {
    "Accept": "application/vnd.github+json",
    "X-GitHub-Api-Version": "2022-11-28"
    }
    headers = add_auth_header_if_set()
    
    query_params = {}
    if type:
        if type not in ['all', 'public', 'private', 'forks', 'sources', 'member']:
            raise ValueError("Type must be one of: 'all', 'public', 'private', 'forks', 'sources', 'member'.")
        query_params['type'] = type
    if sort:
        if sort not in ['created', 'updated', 'pushed', 'full_name']:
            raise ValueError("Sort must be one of: 'created', 'updated', 'pushed', 'full_name'.")
        query_params['sort'] = sort
    if direction:
        if direction not in ['asc', 'desc']:
            raise ValueError("Direction must be either 'asc' or 'desc'.")
        query_params['direction'] = direction
    if per_page:
        if not isinstance(per_page, int) or per_page <= 0:
            raise ValueError("Per page must be a positive integer.")
        if per_page > 100:
            logging.warning("The maximum allowed per_page is 100, value was updated to 100.") # GitHub API allows a maximum of 100 items per page
            per_page = 100
        query_params['per_page'] = per_page
    if page:
        if not isinstance(page, int) or page <= 0:
            raise ValueError("Page must be a positive integer.")
        query_params['page'] = page

    response = requests.get(
        BASE_URL + endpoint_path.format(org=org) + ("?" + "&".join(f"{key}={value}" for key, value in query_params.items()) if query_params else "") ,
        headers=headers
    )
    
    if response.status_code == 200:
        if filter_fields:
            # Filter out fields that are not relevant
            contents = response.json()
            return [{key: item[key] for key in fields if key in item} for item in contents]
        else:
            # Return all contents including directories
            return response.json()
    
    else:
        logging.error(f"Failed to fetch repositories for '{org}': {response.status_code} - {response.json().get('message', 'No message')}")
        return []


In [9]:
response_json = get_org_repos(
    org = "freecodecamp", 
    type = "all",
    sort = "created",
    direction = "desc",
    per_page = 10,
    page = 1,
    filter_fields=True
    )

save_json_to_file(response_json, "org_repos.json")  # Save the response to a file
save_json_to_csv(response_json, "org_repos.csv")  # Save the response to a CSV file
df = pd.json_normalize(response_json)  # Convert JSON to DataFrame
df

INFO: Data saved to output\org_repos.json
INFO: Data saved to output\org_repos.csv


Unnamed: 0,name,description,homepage,stargazers_count,watchers_count,created_at,updated_at,pushed_at,size,git_url,html_url
0,exam-creator,,,0,0,2025-06-20T13:50:18Z,2025-06-23T12:25:58Z,2025-06-23T13:24:34Z,133,git://github.com/freeCodeCamp/exam-creator.git,https://github.com/freeCodeCamp/exam-creator
1,language-curricula,This repository will be used as a source for t...,,0,0,2025-05-27T20:07:18Z,2025-05-27T20:43:01Z,2025-05-27T20:15:43Z,483,git://github.com/freeCodeCamp/language-curricu...,https://github.com/freeCodeCamp/language-curri...
2,api-server,The legacy (loopback.io) based API,,1,1,2025-05-25T06:39:49Z,2025-06-18T20:48:02Z,2025-05-25T06:40:01Z,3548,git://github.com/freeCodeCamp/api-server.git,https://github.com/freeCodeCamp/api-server
3,daily-challenges,,,0,0,2025-03-21T13:42:10Z,2025-06-10T17:20:29Z,2025-06-10T17:20:26Z,100,git://github.com/freeCodeCamp/daily-challenges...,https://github.com/freeCodeCamp/daily-challenges
4,learn-relational-databases-by-building-a-datab...,,,3,3,2025-02-11T15:30:41Z,2025-06-13T18:06:09Z,2025-02-11T18:21:24Z,539,git://github.com/freeCodeCamp/learn-relational...,https://github.com/freeCodeCamp/learn-relation...
5,podcast-upload-app,,,0,0,2024-12-03T14:41:41Z,2025-02-27T07:41:34Z,2024-12-05T08:24:10Z,215,git://github.com/freeCodeCamp/podcast-upload-a...,https://github.com/freeCodeCamp/podcast-upload...
6,exam-env,The freeCodeCamp Exam Environment desktop appl...,,10,10,2024-11-11T23:38:45Z,2025-05-22T12:26:56Z,2025-05-22T12:42:39Z,2239,git://github.com/freeCodeCamp/exam-env.git,https://github.com/freeCodeCamp/exam-env
7,campfire-vscode-extension,,,2,2,2024-09-04T15:27:49Z,2025-02-27T07:41:26Z,2024-09-12T15:16:53Z,509,git://github.com/freeCodeCamp/campfire-vscode-...,https://github.com/freeCodeCamp/campfire-vscod...
8,articles-auto-translate-action,,,1,1,2024-07-28T07:12:28Z,2025-02-05T10:07:38Z,2025-02-05T10:07:34Z,543,git://github.com/freeCodeCamp/articles-auto-tr...,https://github.com/freeCodeCamp/articles-auto-...
9,article-webpage-to-markdown-action,Automatically generate Markdown files based on...,,2,2,2024-06-29T14:54:41Z,2025-02-27T07:41:20Z,2024-08-18T11:20:23Z,2228,git://github.com/freeCodeCamp/article-webpage-...,https://github.com/freeCodeCamp/article-webpag...


# Get repository content

Official documentation: https://docs.github.com/en/rest/repos/contents?apiVersion=2022-11-28#get-repository-content

In [10]:
def get_repo_contents(
        owner, repo, path="", 
        filter_fields=False, fields=["name","path","type","size","html_url","download_url"]):
    """Fetch contents of a repository."""
    
    endpoint_path = "repos/{owner}/{repo}/contents/{path}"
    
    headers = {
    "Accept": "application/vnd.github+json",
    "X-GitHub-Api-Version": "2022-11-28"
    }
    headers = add_auth_header_if_set(headers)

    

    response = requests.get(
        BASE_URL + endpoint_path.format(owner=owner, repo=repo, path=path),
        headers=headers
    )
    
    if response.status_code == 200:
        if filter_fields:
            contents = response.json()
            return [{key: item[key] for key in fields if key in item} for item in contents]
        else:
            return response.json()
    
    else:
        logging.error(f"Failed to fetch contents for '{owner}/{repo}': {response.status_code} - {response.json().get('message', 'No message')}")
        return []


In [11]:
response_json = get_repo_contents(
    owner = "freeCodeCamp", 
    repo = "freeCodeCamp", 
    path = "api/src", 
    
    filter_fields=True)  # Example usage, replace with your owner and repo

save_json_to_file(response_json, "repo_contents.json")
save_json_to_csv(response_json, "repo_contents.csv")
df = pd.json_normalize(response_json)
df

INFO: Data saved to output\repo_contents.json
INFO: Data saved to output\repo_contents.csv


Unnamed: 0,name,path,type,size,html_url,download_url
0,app.ts,api/src/app.ts,file,8409,https://github.com/freeCodeCamp/freeCodeCamp/b...,https://raw.githubusercontent.com/freeCodeCamp...
1,db,api/src/db,dir,0,https://github.com/freeCodeCamp/freeCodeCamp/t...,
2,exam-environment,api/src/exam-environment,dir,0,https://github.com/freeCodeCamp/freeCodeCamp/t...,
3,instrument.ts,api/src/instrument.ts,file,633,https://github.com/freeCodeCamp/freeCodeCamp/b...,https://raw.githubusercontent.com/freeCodeCamp...
4,plugins,api/src/plugins,dir,0,https://github.com/freeCodeCamp/freeCodeCamp/t...,
5,reset.d.ts,api/src/reset.d.ts,file,37,https://github.com/freeCodeCamp/freeCodeCamp/b...,https://raw.githubusercontent.com/freeCodeCamp...
6,routes,api/src/routes,dir,0,https://github.com/freeCodeCamp/freeCodeCamp/t...,
7,schema.test.ts,api/src/schema.test.ts,file,1892,https://github.com/freeCodeCamp/freeCodeCamp/b...,https://raw.githubusercontent.com/freeCodeCamp...
8,schemas.ts,api/src/schemas.ts,file,3303,https://github.com/freeCodeCamp/freeCodeCamp/b...,https://raw.githubusercontent.com/freeCodeCamp...
9,schemas,api/src/schemas,dir,0,https://github.com/freeCodeCamp/freeCodeCamp/t...,


# Get commits list

Official documentation: https://docs.github.com/en/rest/commits/commits?apiVersion=2022-11-28#list-commits

In [12]:
def get_commits(
        owner, repo, 
        path=None, author=None,since=None, until=None, 
        per_page=None, page=None, 
        filter_fields=False, fields=["html_url", {"commit": ["message", {"author": ["name", "email", "date"]}, {"verification": ["verified"]}]}]
    ):
    
    endpoint_path = "repos/{owner}/{repo}/commits"

    headers = {
        "Accept": "application/vnd.github+json",
        "X-GitHub-Api-Version": "2022-11-28"
    }

    headers = add_auth_header_if_set(headers)
    
    query_params = {}
    if path:
        if not isinstance(path, str):
            raise ValueError("Path must be a string.")
        query_params['path'] = path
    if author:
        if not isinstance(author, str):
            raise ValueError("Author must be a string.")
        query_params['author'] = author
    if since:
        if not isinstance(since, str) or pd.to_datetime(since, errors='coerce') is pd.NaT:
            raise ValueError("Since must be a string in ISO 8601 format (e.g., '2023-01-01T00:00:00Z').") # GitHub API expects dates in ISO 8601 format
        query_params['since'] = since
    if until:
        if not isinstance(until, str) or pd.to_datetime(until, errors='coerce') is pd.NaT:
            raise ValueError("Until must be a string in ISO 8601 format (e.g., '2023-01-01T00:00:00Z').") # GitHub API expects dates in ISO 8601 format
        query_params['until'] = until
    if per_page:
        if not isinstance(per_page, int) or per_page <= 0:
            raise ValueError("Per page must be a positive integer.")
        if per_page > 100:
            logging.warning("The maximum allowed per_page is 100, value was updated to 100.") # GitHub API allows a maximum of 100 items per page
            per_page = 100
        query_params['per_page'] = per_page
    if page:
        if not isinstance(page, int) or page <= 0:
            raise ValueError("Page must be a positive integer.")
        query_params['page'] = page
    
    response = requests.get(
        BASE_URL + endpoint_path.format(owner=owner, repo=repo) + ("?" + "&".join(f"{key}={value}" for key, value in query_params.items()) if query_params else ""),
        headers=headers
    )
    
    if response.status_code == 200:
        if filter_fields:
            contents = response.json()
            return [extract_nested_fields(item, fields) for item in contents] # Extract only specified fields using the extract_nested_fields function defined at the beginning of the notebook
        else:
            return response.json()
    
    else:
        error_message = f"Failed to fetch commits: {response.status_code} - {response.json().get('message', 'No message')}"
        raise ValueError(error_message)


In [13]:
# response_json = get_commits("freeCodeCamp", "freeCodeCamp", filter_fields=True)
response_json = get_commits(
    per_page=10,
    page=1,
    owner = "freeCodeCamp", 
    repo = "freeCodeCamp",
    path = "api/src",
    author = "ojeytonwilliams@gmail.com",
    since = "2020-01-01T00:00:00Z",
    until = "2025-12-31T23:59:59Z",
    filter_fields=True
    )

save_json_to_file(response_json, "commits.json")
save_json_to_csv(response_json, "commits.csv")
df = pd.json_normalize(response_json)
df


INFO: Data saved to output\commits.json
INFO: Data saved to output\commits.csv


Unnamed: 0,html_url,commit.message,commit.author.name,commit.author.email,commit.author.date,commit.verification.verified
0,https://github.com/freeCodeCamp/freeCodeCamp/c...,fix(api): duplicate reply on error in /daily-c...,Oliver Eyton-Williams,ojeytonwilliams@gmail.com,2025-06-23T13:15:26Z,True
1,https://github.com/freeCodeCamp/freeCodeCamp/c...,chore(api): add user's id to errors sent to Se...,Oliver Eyton-Williams,ojeytonwilliams@gmail.com,2025-06-09T20:21:02Z,True
2,https://github.com/freeCodeCamp/freeCodeCamp/c...,chore(api): migrate to fastify v5 (#57576),Oliver Eyton-Williams,ojeytonwilliams@gmail.com,2025-06-02T02:37:57Z,True
3,https://github.com/freeCodeCamp/freeCodeCamp/c...,fix: handle when userinfo has no email address...,Oliver Eyton-Williams,ojeytonwilliams@gmail.com,2025-05-30T17:02:55Z,True
4,https://github.com/freeCodeCamp/freeCodeCamp/c...,test: inform devs when db connection not estab...,Oliver Eyton-Williams,ojeytonwilliams@gmail.com,2025-05-28T15:52:11Z,True
5,https://github.com/freeCodeCamp/freeCodeCamp/c...,refactor: remove unused format rules from resp...,Oliver Eyton-Williams,ojeytonwilliams@gmail.com,2025-05-27T16:16:32Z,True
6,https://github.com/freeCodeCamp/freeCodeCamp/c...,test(api): stop reporting request logs during ...,Oliver Eyton-Williams,ojeytonwilliams@gmail.com,2025-05-27T16:05:25Z,True
7,https://github.com/freeCodeCamp/freeCodeCamp/c...,fix(api): handle expected Auth0 errors (#60499),Oliver Eyton-Williams,ojeytonwilliams@gmail.com,2025-05-27T04:21:03Z,True
8,https://github.com/freeCodeCamp/freeCodeCamp/c...,fix(api): handle string challengeType (#60491),Oliver Eyton-Williams,ojeytonwilliams@gmail.com,2025-05-23T12:56:18Z,True
9,https://github.com/freeCodeCamp/freeCodeCamp/c...,fix(api): handle users without email addresses...,Oliver Eyton-Williams,ojeytonwilliams@gmail.com,2025-05-22T09:28:56Z,True


# Get all commits list (iteration over pages)

In [14]:
def get_all_commits(
        owner, repo, 
        path=None, author=None,since=None, until=None, 
        per_page=100, max_pages=30, 
        filter_fields=False, fields=["html_url", {"commit": ["message", {"author": ["name", "email", "date"]}, {"verification": ["verified"]}]}]
    ):
    """Fetch all commits for a repository with pagination."""
    all_commits = []
    page = 1

    consecutive_rate_limit_exceeded = 0  # Counter for consecutive rate limit exceeded errors

    while page <= max_pages:
        try:
            commits = get_commits(
                owner=owner,
                repo=repo,
                path=path,
                author=author,
                since=since,
                until=until,
                per_page=per_page,
                page=page,
                filter_fields=filter_fields,
                fields=fields
            )
            
            logging.info(f"Page {page} fetched with {len(commits)} commits, total so far: {len(all_commits) + len(commits)}")
            
            if not commits:
                break  # No more commits to fetch
            
            all_commits.extend(commits)
            consecutive_rate_limit_exceeded = 0  # Reset counter if successful
            page += 1
        except Exception as e:
            logging.error(f"Error fetching commits on page {page}: {e}")
            
            # If returns "Not Found" error, it might be due to an invalid owner, repo, path, author or date range
            if "Not Found" in str(e):
                logging.error("Please check your string parameters and date ranges, the API failed to found data with the given parameters. Stopping further requests.")
                break # Stop further requests if the error is "Not Found"
            
            # If the rate limit is reached, wait for 60 seconds before retrying
            if "API rate limit exceeded" in str(e):
                
                consecutive_rate_limit_exceeded += 1 # Increment the counter for consecutive rate limit exceeded errors
                if consecutive_rate_limit_exceeded >= 3: # If rate limit exceeded 3 times, stop further requests
                    logging.error("Rate limit exceeded 3 times. Stopping further requests.")
                    logging.warning("Returning all commits fetched so far, the information may be incomplete.")
                    break
                
                # If the rate limit is exceeded, wait for 60 seconds before retrying
                logging.info("Rate limit exceeded. Waiting for 60 seconds before retrying...")
                time.sleep(60)
                logging.info("Retrying...") # Retry the iteration (avoiding page+=1)

    return all_commits

In [15]:
all_commits_json = get_all_commits(
    per_page=1,
    max_pages=100000,
    owner = "freeCodeCamp", 
    repo = "freeCodeCamp",
    path = "",
    since = "2020-01-01T00:00:00Z",
    until = "2025-12-31T23:59:59Z",
    filter_fields=True
    )

INFO: Page 1 fetched with 1 commits, total so far: 1
INFO: Page 2 fetched with 1 commits, total so far: 2
INFO: Page 3 fetched with 1 commits, total so far: 3
INFO: Page 4 fetched with 1 commits, total so far: 4
INFO: Page 5 fetched with 1 commits, total so far: 5
INFO: Page 6 fetched with 1 commits, total so far: 6
INFO: Page 7 fetched with 1 commits, total so far: 7
INFO: Page 8 fetched with 1 commits, total so far: 8
INFO: Page 9 fetched with 1 commits, total so far: 9
INFO: Page 10 fetched with 1 commits, total so far: 10
INFO: Page 11 fetched with 1 commits, total so far: 11
INFO: Page 12 fetched with 1 commits, total so far: 12
INFO: Page 13 fetched with 1 commits, total so far: 13
INFO: Page 14 fetched with 1 commits, total so far: 14
INFO: Page 15 fetched with 1 commits, total so far: 15
INFO: Page 16 fetched with 1 commits, total so far: 16
INFO: Page 17 fetched with 1 commits, total so far: 17
INFO: Page 18 fetched with 1 commits, total so far: 18
INFO: Page 19 fetched with 1

In [16]:
save_json_to_file(all_commits_json, "all_commits.json")
save_json_to_csv(all_commits_json, "all_commits.csv")


INFO: Data saved to output\all_commits.json
INFO: Data saved to output\all_commits.csv
