In [2]:
# zotero.ipynb, a Python script for retrieving data from the Zotero API.

# (c) 2024 Vanderbilt University. This program is released under a GNU General Public License v3.0 http://www.gnu.org/licenses/gpl-3.0
# Author: Steve Baskauf

version = '0.0.1'
created = '2024-01-25'

# Zotero API developer guide: https://www.zotero.org/support/dev/web_api/v3/start
# Example request URL: https://api.zotero.org/groups/2267085/items?format=json&amp;include=bib,data,coins,citation&amp;style=chicago-fullnote-bibliography

# Global variables.
BASE_URL = 'https://api.zotero.org'
VERSION_HTTP_HEADER = {'Zotero-API-Version': '3'}

# Import modules.
import requests
from typing import List, Dict, Tuple, Optional, Any
import requests_cache
import json
import sys
from time import sleep

# Set up cache for HTTP requests to prevent unnecessary repeat requests.
requests_cache.install_cache('zotero_cache', backend='sqlite', expire_after=300, allowable_methods=['GET', 'POST'])


In [11]:
# Define functions.

def get_zotero_data(agent_id: str, library_type='groups', what_to_include='bib,data,coins,citation', endpoint='items', citation_style='chicago-fullnote-bibliography') -> Tuple[int, str]:
    """Make a Zotero API request and return the JSON data.

    Parameters
    ----------
    agent_id : str
        Zotero user or group ID
    library_type : str
        "users" or "groups"
    what_to_include : str
        Text list of what to include in the response. Default is based on the example request URL.
        See https://www.zotero.org/support/dev/web_api/v3/basics for other options.
    endpoint : str
        Particular endpoint to query. Possible values are "collections", "items", "searches", and "tags".
    citation_style : str
        Citation style to use.

    Returns
    -------
    Tuple consisting of the HTTP status code, the response header, and the data in text format. 
        If status 200, the data are JSON. Otherwise they are probably an error message.
    """
    query_string_dict = {
        'format': 'json',
        'include': what_to_include,
        'style': citation_style
    }
    url = BASE_URL + '/' + library_type + '/' + agent_id + '/' + endpoint
    r = requests.get(url, params=query_string_dict, headers=VERSION_HTTP_HEADER)
    
    return r.status_code, r.headers, r.text


In [23]:
def retrieve_page_of_data(agent_id: str, backoff_time: int, library_type='groups', what_to_include='bib,data,coins,citation', endpoint='items', citation_style='chicago-fullnote-bibliography') -> List[Dict[str, Any]]:
    """Retrieve a page of Zotero data.

    Parameters
    ----------
    agent_id : str
        Zotero user or group ID
    backoff_time : int
        Number of seconds to wait before trying again if the server is overloaded.
    library_type : str
        "users" or "groups"
    what_to_include : str
        Text list of what to include in the response. Default is based on the example request URL.
        See https://www.zotero.org/support/dev/web_api/v3/basics for other options.
    endpoint : str
        Particular endpoint to query. Possible values are "collections", "items", "searches", and "tags".
    citation_style : str
        Citation style to use.

    Returns
    -------
    Tuple consisting of a list of dictionaries with each dictionary representing one reference, 
    and an integer indicating the number of seconds to wait before trying again if the server is overloaded.
    """
    try_again = True
    max_tries = 10
    tries = 0
    while try_again:
        tries += 1

        if backoff_time > 0:
            print('Server overloaded. Waiting', backoff_time, 'seconds before trying again.')
            sleep(backoff_time)

        # Make HTTP request to API.
        code, headers, data_string = get_zotero_data(agent_id, library_type=library_type, what_to_include=what_to_include, endpoint=endpoint, citation_style=citation_style)
        print(headers)

        if 'Backoff' in headers:
            backoff_time = int(headers['Backoff'])
        else:
            backoff_time = 0

        if code == 200:
            data_structure = json.loads(data_string)
            try_again = False
        elif code == 304:
            try_again = False
            pass # Use the cached data.
        elif code == 429: # Too many requests. Wait the indicated number of seconds and try again.
            if tries >= max_tries + 1:
                print('Too many tries. Giving up.')
                sys.exit(0)
            else:
                print('Too many requests. Waiting', headers['Retry-After'], 'seconds and trying again.')
                delay = int(headers['Retry-After'])
                sleep(delay)
        elif code == 500:
            print('500 status code. Did you specify a valid agent ID?')
            print('message:', data_string)
            sys.exit(0)
        elif code == 503: # Service unavailable. Wait the indicated number of seconds and try again.
            if tries >= max_tries + 1:
                print('Too many tries. Giving up.')
                sys.exit(0)
            else:
                print('Service unavailable. Waiting', headers['Retry-After'], 'seconds and trying again.')
                delay = int(headers['Retry-After'])
                sleep(delay)
        else:
            print(code, 'status code, message:', data_string)
            sys.exit(0) # Exit the program if there is an undetermined error.

    #print(json.dumps(data_structure, indent=2))
    return data_structure, backoff_time


In [24]:
agent_id = '2267085'
backoff_time = 0

results = retrieve_page_of_data(agent_id, backoff_time)
#print(json.dumps(results, indent=2))


{'Date': 'Fri, 26 Jan 2024 02:04:21 GMT', 'Content-Type': 'application/json', 'Content-Length': '8618', 'Connection': 'keep-alive', 'Server': 'Apache/2.4.57 ()', 'Strict-Transport-Security': 'max-age=31536000; includeSubDomains; preload', 'Zotero-API-Version': '3', 'Zotero-Schema-Version': '28', 'Total-Results': '1273', 'Link': '<https://api.zotero.org/groups/2267085/items?include=bib%2Ccitation%2Ccoins%2Cdata&start=25&style=chicago-fullnote-bibliography>; rel="next", <https://api.zotero.org/groups/2267085/items?include=bib%2Ccitation%2Ccoins%2Cdata&start=1250&style=chicago-fullnote-bibliography>; rel="last", <https://www.zotero.org/groups/2267085/items>; rel="alternate"', 'Last-Modified-Version': '6018', 'Vary': 'Accept-Encoding', 'Content-Encoding': 'gzip'}
