# Download newspaper records from the SLV catalogue

This notebook downloads the results of a [search for Victorian newspapers](https://find.slv.vic.gov.au/discovery/search?query=lds03,exact,Australia--Victoria&tab=searchProfile&search_scope=slv_local&vid=61SLV_INST:SLV) from the State Library of Victoria's catalogue. The search looks for records where 'Australia--Victoria' appears in the 'Place newspaper published' field (`lds03`). It gets the results data from a Primo JSON API and paginates through the data by updating the `offset` parameter. The results are save in a newline-delimited JSON (NDJSON) file.

The newspaper data is linked to place information in the [processing newspaper data](process_newspapers.ipynb) notebook.

In [1]:
import requests
import time
import pandas as pd
import re
from requests_cache import CachedSession
from tqdm.auto import tqdm
from pathlib import Path
import json


sess = CachedSession(timeout=60)
tqdm.pandas()

In [None]:
records = []
params = {
    "inst": "61SLV_INST",
    "vid": "61SLV_INST:SLV",
    "limit": 50,
    "offset": 0,
    "qInclude": "facet_genre,exact,Newspapers",
    "facet": "genre,include,Newspapers",
    "q": f"lds03,exact,Australia--Victoria"
}

more = True
while more:
    response = sess.get("https://find.slv.vic.gov.au/primaws/rest/pub/pnxs", params=params)
    data = response.json()
    for item in data["docs"]:
        records.append(item["pnx"]["display"])
    #    alma_id = item["pnx"]["control"]["sourcerecordid"][0]
    #    title = item["pnx"]["display"]["title"]
        # Get other metadata
    #    records.append({"alma_id": alma_id, "title": title})
    if data["docs"]:
        params["offset"] += 50
    else:
        more = False
    print(len(records))
    if not response.from_cache:
        time.sleep(5)

In [191]:
with Path("newspapers.ndjson").open("w") as ndjson:
    for record in records:
        ndjson.write(json.dumps(record) + "\n")