Objective: Make a selection of nine paintings for the exhibition catalogue to be selected from Wikidata and rendered multi-format in Quarto.

The below Python code uses SPARQLWrapper to retrieve data from Wikidata based on a SPARQL query.

In [1]:
%pip install SPARQLWrapper
from SPARQLWrapper import SPARQLWrapper, JSON
from PIL import Image
import requests




In [2]:
from SPARQLWrapper import SPARQLWrapper, JSON
from PIL import Image
import requests

In [3]:
# VARIABLES

sparql_endpoint_url = 'https://query.wikidata.org/bigdata/namespace/wdq/sparql'
wikibase_url = 'https://www.wikidata.org'
api_url = '/w/api.php'

# Wikidata requires a user agent header to prevent spam requests
user_agent = 'Paintings_conference_bot/0.0 (https://github.com/Pgxe9zu1/catalogue-003; lisa.sommer@stud.hs-hannover.de)'

# SPARQL query
# see in Wikidata's Query Service GUI at:
# https://w.wiki/6qQ2
query = """
#defaultView:ImageGrid
SELECT DISTINCT ?wissenschaftlicher_Name ?Bild ?item ?übergeordnetes_Taxon ?übergeordnetes_TaxonLabel ?Gefährdungsstufe__IUCN_ ?Gefährdungsstufe__IUCN_Label 
WHERE {
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],de". }
  ?item wdt:P31 wd:Q16521;
    wdt:P195 wd:Q119251813;
    wdt:P18 ?Bild.
  OPTIONAL {
    ?item wdt:P225 ?wissenschaftlicher_Name;
      wdt:P171 ?übergeordnetes_Taxon;
      wdt:P3342 ?relevante_Person;
      wdt:P141 ?Gefährdungsstufe__IUCN_.
  }
}
LIMIT 10
"""


In [4]:

# for each result, print various data fields
for item in result['results']['bindings']:
    print('Download RDF: ' + '[' + item['item']['value'] + ']' + '(' + item['item']['value'] + ')' + '\n')
    print('Title: ' + item['title']['value'] + '\n')
    print('Creator: ' + item['creatorLabel']['value'] + '\n')
    print('Year: ' + item['inception']['value'] + '\n')
    print('Inventory number: ' + item['inventory_number']['value'] + '\n')
    print('Copyright: ' + item['copyright_statusLabel']['value'] + '\n')
    print('Material: ' + item['made_from_materialLabel']['value'] + '\n')

NameError: name 'result' is not defined

In [None]:
# SUBROUTINES

def get_delay(date):
    try:
        date = datetime.datetime.strptime(date, '%a, %d %b %Y %H:%M:%S GMT')
        timeout = int((date - datetime.datetime.now()).total_seconds())
    except ValueError:
        timeout = int(date)
    return timeout

def get_image(url, headers):
    r = requests.get(url, headers=headers, stream=True)
    if r.status_code == 200:
        im = Image.open(r.raw)
        return im
    if r.status_code == 500:
        return None
    if r.status_code == 403:
        return None
    if r.status_code == 429:
        timeout = get_delay(r.headers['retry-after'])
        print('Timeout {} m {} s'.format(timeout // 60, timeout % 60))
        time.sleep(timeout)
        get_image(url, headers)


In [None]:
# MAIN PROGRAM

# create SPARQL query
sparql = SPARQLWrapper(sparql_endpoint_url, agent=user_agent)

# retrieve results and convert to JSON format
sparql.setQuery(query)
sparql.setReturnFormat(JSON)
result = sparql.query().convert()

# for each result, print various data fields
for item in result['results']['bindings']:
    print('Wikidata link: ' + '[' + item['item']['value'] + ']' + '(' + item['item']['value'] + ')' + '\n')
    print('Title: ' + item['title']['value'] + '\n')
    print('Creator: ' + item['creatorLabel']['value'] + '\n')
    print('Year: ' + item['inception']['value'] + '\n')
    print('Inventory number: ' + item['inventory_number']['value'] + '\n')
    print('Copyright: ' + item['copyright_statusLabel']['value'] + '\n')
    print('Material: ' + item['made_from_materialLabel']['value'] + '\n')
    

    
    # get image from image URL and display resized version
    image_url=item['image']['value']
    headers = {'User-Agent': 'Ex_Books_conference_bot/0.0 (https://github.com/Pgxe9zu1/catalogue-003; lisa.sommer@stud.hs-hannover.de)'}
    im = get_image(image_url, headers)
    im.thumbnail((500, 500), Image.Resampling.LANCZOS)
    display(im)
    print('\n\n')