### Loading a table by name

In [1]:
import csv
import pandas as pd

def load_patentsview_table(table_name: str, **kwargs):
    if table_name.startswith('g_'):
        database_url = 'https://s3.amazonaws.com/data.patentsview.org/download'
    elif table_name.startswith('pg_'):
        database_url = 'https://s3.amazonaws.com/data.patentsview.org/pregrant_publications'
    else:
        raise ValueError("Invalid table name:", table_name)
    
    table_url = f"{database_url}/{table_name}.tsv.zip"

    return pd.read_csv(table_url, delimiter="\t", dtype='str[pyarrow]', quoting=csv.QUOTE_NONNUMERIC, **kwargs)

Load a granted patent data table:

In [2]:
load_patentsview_table('g_applicant_not_disambiguated', nrows=5)

Unnamed: 0,patent_id,applicant_sequence,raw_applicant_name_first,raw_applicant_name_last,raw_applicant_organization,applicant_type,applicant_designation,applicant_authority,rawlocation_id
0,9069405,3,David,Bordui,,applicant,us-only,,emcq5f1g8kprye06olw1zqdv8
1,9117193,6,James W.,Seaman,,applicant,us-only,,fa4w8lgydmaocda6g3a1h65lg
2,9764256,2,Terence Arthur,Devlin,,applicant,us-only,,4pwy4ca2zmjod52ex94xh9sxy
3,10947428,1,,,"PPG Industries Ohio, Inc.",applicant,us-only,obligated-assignee,tkwrlo9614r87fa2f18d9gx8s
4,11212562,1,,,"Amazon Technologies, Inc.",applicant,us-only,assignee,csjl4jp4tpws94s01mlyq6h1i


Load a pre-grant publication data table:

In [3]:
load_patentsview_table('pg_applicant_not_disambiguated', nrows=5)

Unnamed: 0,pgpub_id,applicant_sequence,raw_applicant_name_first,raw_applicant_name_last,raw_applicant_organization,applicant_type,applicant_designation,applicant_authority,rawlocation_id
0,20210012690,1,David,General,,applicant,us-only,,a7f527d6-3b8a-11ee-ae79-0a1e30f810d7
1,20200326458,1,,,3M INNOVATIVE PROPERTIES COMPANY,applicant,us-only,assignee,a5ed2c55-3b8a-11ee-ae79-0a1e30f810d7
2,20200075033,1,,,"Babblelabs, Inc.",applicant,us-only,assignee,0000c51a-0d76-11eb-a344-121df0c29c1e
3,20200075034,1,,,Yissum Research Development Company of The Heb...,applicant,us-only,assignee,0000c676-0d76-11eb-a344-121df0c29c1e
4,20200075035,1,Thomas,"Garth, III",,applicant,us-only,,0000c7a1-0d76-11eb-a344-121df0c29c1e


**Dictionary with all table names:**

In [4]:
import yaml
from pprint import pprint

with open("sources.yml") as file:
    sources = yaml.safe_load(file)

pprint(sources, compact=True, width=120)

{'granted': {'database': 'https://s3.amazonaws.com/data.patentsview.org/download',
             'description': 'https://patentsview.org/download/data-download-dictionary',
             'loader': '{database}/{table_name}.tsv.zip',
             'tables': ['g_applicant_not_disambiguated', 'g_application', 'g_assignee_disambiguated',
                        'g_assignee_not_disambiguated', 'g_attorney_disambiguated', 'g_attorney_not_disambiguated',
                        'g_botanic', 'g_cpc_at_issue', 'g_cpc_current', 'g_cpc_title', 'g_examiner_not_disambiguated',
                        'g_figures', 'g_foreign_citation', 'g_foreign_priority', 'g_gov_interest',
                        'g_gov_interest_contracts', 'g_gov_interest_org', 'g_inventor_disambiguated',
                        'g_inventor_not_disambiguated', 'g_ipc_at_issue', 'g_location_disambiguated',
                        'g_location_not_disambiguated', 'g_other_reference', 'g_patent', 'g_patent_abstract',
                    