This notebook is a demonstration for the new Wikidata traversal based functions.

The new functions can be accessed by the import show below, from exogenous_signals in the news_signals library, using the **WikidataSearch** object.

In [1]:
import news_signals
from news_signals.exogenous_signals import entity_name_to_wikidata_id, WikidataRelatedEntitiesSearcher, wikidata_ids_to_labels

Using **entity_name_to_wikidata_id**, we can get the wikidataID for a named entity with this function.


:param entity_name: Name of the entity to search for

:return: Wikidata ID as a string, or None if not found

In [3]:
entity_id = entity_name_to_wikidata_id("Albert Einstein")
print(entity_id)

Q937


**WikidataRelatedEntitiesSearcher** performs a Breadth-First Search on the outgoing links of an entity's Wikidata page. 

:param wikidata_id: The starting Wikidata ID.

:param depth: The number of hops (levels) to traverse.

:param return_labels: Whether to return Wikidata IDs or human-readable labels.

:param query: Optional custom SPARQL query for retrieving related entities.

:return: A list of related Wikidata IDs.
        
For this instance, lets set return_labels to True, and pass in our previous entity_id into the function

In [5]:
related_ids = WikidataRelatedEntitiesSearcher(entity_id, depth=1,return_labels=True)
print(f"Related entities: {related_ids}")

Related entities: ['Q48835067', 'Q1001', 'Q2497232', 'Q659080', 'Q11942', 'Q153238', 'Q6173448', 'Q435651', 'Q118253', 'Q270794', 'Q7322195', 'Q200639', 'Q355245', 'Q8487137', 'Q39', 'Q4397938', 'Q55594631', 'Q188771', 'Q463303', 'Q64', 'Q216738', 'Q22095877', 'Q87554', 'Q37160', 'Q338432', 'Q468357', 'Q123885', 'Q708038', 'Q902624', 'Q70', 'Q26963166', 'Q103505599', 'Q414188', 'Q38193', 'Q68761', 'Q328195', 'Q2945826', 'Q76346', 'Q3603946', 'Q28861731', 'Q533534', 'Q206702', 'Q21200226', 'Q15056034', 'Q43287', 'Q684415', 'Q9095', 'Q42299', 'Q21578', 'Q35802', 'Q9009', 'Q191583', 'Q635642', 'Q253439', 'Q2095524', 'Q72', 'Q168756', 'Q1876751', 'Q685539', 'Q152087', 'Q390003', 'Q3012', 'Q25820', 'Q1726', 'Q30', 'Q1729754', 'Q675617', 'Q543804', 'Q31519', 'Q19185', 'Q942842', 'Q2370801', 'Q464344', 'Q39934978', 'Q183', 'Q57193', 'Q5460604', 'Q14708020', 'Q466089', 'Q156598', 'Q42309226', 'Q7213562', 'Q4175282', 'Q1085', 'Q138518', 'Q310794', 'Q41688', 'Q124500735', 'Q116635', 'Q93996', 'Q

Now after getting the labels, we can use the function **wikidata_to_ids** to get the list of labels back with their entity names.

:param wikidata_ids: List of Wikidata IDs (can include composite IDs).

:param language: Language code for labels.

:return: Dictionary mapping Wikidata IDs to labels.

In [7]:
labels = wikidata_ids_to_labels(related_ids,language="en")
print(f"Related entities: {labels}")

Related entities: {'Q48835067': 'outline of Albert Einstein', 'Q1001': 'Mahatma Gandhi', 'Q659080': 'University of Bern', 'Q2497232': 'Brazilian Academy of Sciences', 'Q11942': 'ETH Zurich', 'Q153238': 'Leó Szilárd', 'Q6173448': 'Wikipedia:Vital articles/Level/4', 'Q435651': 'old Kantonsschule (Albert Einstein House)', 'Q118253': 'Eduard Einstein', 'Q270794': 'National Academy of Sciences', 'Q7322195': 'Riazuddin', 'Q200639': 'Paul Valéry', 'Q355245': 'Henry George', 'Q329464': 'Royal Prussian Academy of Sciences', 'Q8487137': 'WikiProject Mathematics', 'Q39': 'Switzerland', 'Q4397938': 'Russian Academy of Sciences (1917–1925)', 'Q55594631': 'Lina Einstein', 'Q188771': 'French Academy of Sciences', 'Q463303': 'American Academy of Arts and Sciences', 'Q64': 'Berlin', 'Q216738': 'Maja Einstein', 'Q22095877': 'Albert Einstein Archives', 'Q87554': 'Ernst G. Straus', 'Q37160': 'David Hume', 'Q338432': 'Lincean Academy', 'Q468357': 'Lieserl (Einstein)', 'Q123885': 'Royal Society', 'Q708038':

In this instance, we can easily skip the multiple steps above and by not specifying labels parameter as True (or just letting it default to False). 

We just need to pass in the entity name string now.

In [8]:
entity = "Albert Einstein"
related_ids2 = WikidataRelatedEntitiesSearcher(entity, depth=1)
print(f"Related entities: {related_ids2}")

Related entities: {'Q48835067': 'outline of Albert Einstein', 'Q1001': 'Mahatma Gandhi', 'Q659080': 'University of Bern', 'Q2497232': 'Brazilian Academy of Sciences', 'Q11942': 'ETH Zurich', 'Q153238': 'Leó Szilárd', 'Q6173448': 'Wikipedia:Vital articles/Level/4', 'Q435651': 'old Kantonsschule (Albert Einstein House)', 'Q118253': 'Eduard Einstein', 'Q270794': 'National Academy of Sciences', 'Q7322195': 'Riazuddin', 'Q200639': 'Paul Valéry', 'Q355245': 'Henry George', 'Q329464': 'Royal Prussian Academy of Sciences', 'Q8487137': 'WikiProject Mathematics', 'Q39': 'Switzerland', 'Q4397938': 'Russian Academy of Sciences (1917–1925)', 'Q55594631': 'Lina Einstein', 'Q188771': 'French Academy of Sciences', 'Q463303': 'American Academy of Arts and Sciences', 'Q64': 'Berlin', 'Q216738': 'Maja Einstein', 'Q22095877': 'Albert Einstein Archives', 'Q87554': 'Ernst G. Straus', 'Q37160': 'David Hume', 'Q338432': 'Lincean Academy', 'Q468357': 'Lieserl (Einstein)', 'Q123885': 'Royal Society', 'Q708038':

The default query for the function is

        """
            SELECT ?related ?relatedLabel WHERE {{
                wd:{item} ?prop ?related .
                FILTER(isIRI(?related))
                FILTER EXISTS {{
                    ?related wdt:P31/wdt:P279* ?type .
                    VALUES ?type {{ wd:Q5 wd:Q43229 wd:Q4830453 wd:Q2424752 wd:Q431289 wd:Q732577 wd:Q11424 wd:Q571 }}
                }}
                SERVICE wikibase:label {{ bd:serviceParam wikibase:language \"[AUTO_LANGUAGE],en\" }}
            }}
        """

However, using the query param, we can pass in our own custom query as well.

In [10]:
entity = "Albert Einstein"
#query to retrieve only get entities that are people (only get wd:Q5)
new_query = """
    SELECT ?related ?relatedLabel WHERE {{
        wd:{item} ?prop ?related .
        FILTER(isIRI(?related))
        FILTER EXISTS {{
            ?related wdt:P31/wdt:P279* wd:Q5 .
        }}
        SERVICE wikibase:label {{ bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" }}
    }}
"""
related_ids3 = WikidataRelatedEntitiesSearcher(entity, depth=1,return_labels=False,query=new_query)
print(f"Related entities: {related_ids3}")

Related entities: {'Q87554': 'Ernst G. Straus', 'Q216738': 'Maja Einstein', 'Q37160': 'David Hume', 'Q9095': 'James Clerk Maxwell', 'Q468357': 'Lieserl (Einstein)', 'Q1001': 'Mahatma Gandhi', 'Q42299': 'Bernhard Riemann', 'Q57193': 'Moritz Schlick', 'Q35802': 'Benedictus de Spinoza', 'Q4175282': 'Alfred Kleiner', 'Q153238': 'Leó Szilárd', 'Q310794': 'Karl Pearson', 'Q41688': 'Hendrik Lorentz', 'Q116635': 'Heinrich Friedrich Weber', 'Q93996': 'Ernst Mach', 'Q118253': 'Eduard Einstein', 'Q7322195': 'Riazuddin', 'Q38193': 'Arthur Schopenhauer', 'Q88665': 'Hermann Einstein', 'Q68761': 'Elsa Einstein', 'Q25820': 'Thomas Young', 'Q200639': 'Paul Valéry', 'Q355245': 'Henry George', 'Q4357787': 'Pauline Koch', 'Q76346': 'Mileva Marić', 'Q55594631': 'Lina Einstein', 'Q19185': 'George Bernard Shaw', 'Q123371': 'Hans Albert Einstein', 'Q97154': 'Heinrich Burkhardt', 'Q991': 'Fyodor Dostoyevsky', 'Q57246': 'Hermann Minkowski', 'Q935': 'Isaac Newton'}
