In [1]:
from SPARQLWrapper import SPARQLWrapper, JSON
import requests
import json
import numpy as np

In [2]:
def fetch_coordinates(qid):
    url = "https://www.wikidata.org/w/api.php"
    params = {
        "action": "wbgetentities",
        "ids": qid,
        "props": "claims",
        "format": "json"
    }
    response = requests.get(url, params=params)
    data = response.json()
    
    # Extract coordinates from property P625
    claims = data.get("entities", {}).get(qid, {}).get("claims", {})
    coordinate_claims = claims.get("P625", [])
    if coordinate_claims:
        coordinates = coordinate_claims[0]["mainsnak"]["datavalue"]["value"]
        lat, lon = coordinates["latitude"], coordinates["longitude"]
        return lat, lon
    else:
        return None

def fetch_wikidata_entity(qid):
    url = "https://www.wikidata.org/w/api.php"
    params = {
        "action": "wbgetentities",
        "ids": qid,
        "format": "json"
    }
    response = requests.get(url, params=params)
    if response.status_code == 200:
        return response.json()
    else:
        raise Exception(f"Failed to fetch data: {response.status_code}")

In [5]:
def query_wikidata(query):
  sparql.setQuery(query)
  sparql.setReturnFormat(JSON)
  results = sparql.query().convert()
  return [{var_key:result_item[var_key]['value'] for var_key in results['head']['vars'] if var_key in result_item}
  for result_item in results['results']['bindings']]

sparql = SPARQLWrapper("https://query.wikidata.org/sparql")

persons = query_wikidata(f""" 
              SELECT ?person ?personLabel ?description WHERE {{
                ?person wdt:P1343 wd:Q678259;  # Described by Nordisk Familjeboken
                        wdt:P31 wd:Q5.         # Instance of human
                OPTIONAL {{ ?person schema:description ?description. FILTER(LANG(?description) = "sv"). }}
                SERVICE wikibase:label {{ bd:serviceParam wikibase:language "sv". }}
              }}
              """)

In [4]:
non_desc_persons = list(map(lambda p: "description" not in p ,persons))
desc_persons = list(map(lambda p: "description" in p ,persons))
sum(non_desc_persons), sum(desc_persons)

(3413, 6019)