In [1]:
'''
Script to get the metadata for each institution that each JPL author has a direct connection to (via affiliation)
'''

'\nScript to get the metadata for each institution that each JPL author has a direct connection to (via affiliation)\n'

In [2]:
import json
import requests

from helper_funcs import *


In [3]:
INSTITUTIONS_URL = "https://api.openalex.org/institutions"
PAGE_SIZE = 200  # maximum per OpenAlex API

In [4]:
inst_to_res = {}

In [5]:
def process_institution_entry(inst_record):
    """
    Process a single institution record from the /institutions endpoint.
    Determine its type (ORG or GROUP) based on 'associated_institutions' relationships,
    and store metadata for later use.
    """
    # 1) Extract the stripped institution ID from the full URL
    full_id = inst_record.get('id', '')                # e.g. "https://openalex.org/I12345"
    i_id = full_id.rsplit("/", 1)[-1]                  # e.g. "I12345"
    
    # 2) If we haven't processed this institution yet, do so
    if i_id not in inst_to_res:
        metadata = inst_record  # use the full record from the endpoint
                
        # 4) Store into our result dictionary
        inst_to_res[i_id] = {
            "metadata": metadata
        }

In [6]:
cursor = "*"
while cursor is not None:
    params = {
        "per_page": PAGE_SIZE,
        "cursor":   cursor
    }
    response = requests.get(INSTITUTIONS_URL, params=params)
    response.raise_for_status()
    data = response.json()
    
    # Process each institution record on this page
    for inst_record in data.get("results", []):
        process_institution_entry(inst_record)
    
    # Advance to the next cursor (will be None when no more pages)
    cursor = data.get("meta", {}).get("next_cursor")

In [None]:
write_json_to_local(inst_to_res,'./JPL_Data/inst_open_alex_raw.json')