## Loading a table by name

Imports and utilities:

In [None]:
import csv
import pandas as pd

def load_patentsview_table(table_name: str, **kwargs):
    """
    Download table from PatentsViews' bulk data downloads (https://patentsview.org/download/data-download-tables).

    args:
        table_name: Name of the table to read, e.g. "g_patent".
    **kwargs: 
        Additional arguments passed to pandas' read_csv function.
    """
    if table_name.startswith('g_'):
        database_url = 'https://s3.amazonaws.com/data.patentsview.org/download'
    elif table_name.startswith('pg_'):
        database_url = 'https://s3.amazonaws.com/data.patentsview.org/pregrant_publications'
    else:
        raise ValueError("Invalid table name:", table_name)
    
    table_url = f"{database_url}/{table_name}.tsv.zip"

    return pd.read_csv(table_url, delimiter="\t", dtype='str[pyarrow]', quoting=csv.QUOTE_NONNUMERIC, **kwargs)

Load a granted patent data table:

In [None]:
load_patentsview_table('g_applicant_not_disambiguated', nrows=5)

Load a pre-grant publication data table:

In [None]:
load_patentsview_table('pg_applicant_not_disambiguated', nrows=5)

## Dictionary with all table names

In [None]:
import yaml
from pprint import pprint

with open("sources.yml") as file:
    sources = yaml.safe_load(file)

pprint(sources, compact=True, width=120)