In [2]:
import requests
from SPARQLWrapper import SPARQLWrapper, JSON
import pandas

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas


In [48]:
class Generator:
    def __init__(self, sister_predicates):
        self.sparql = SPARQLWrapper("https://query.wikidata.org/sparql")
        self.sister_predicates = sister_predicates
    
    def run_query(self, query):
        '''
        Description:
            Takes in query and requests its output
        
        Arguments:
            query:string
        
        Returns:
            results:JSON
        '''

        # Set the query and the return format (JSON)
        self.sparql.setQuery(query)
        self.sparql.setReturnFormat(JSON)

        # Perform the query and convert the result to a Python dictionary
        results = self.sparql.query().convert()
        return results

    def find_uri_by_label(self, label):
        '''
        Description:
            Takes in label and outputs its URI
        
        Arguments:
            label:string
        
        Returns:
            uri:string - 'http://www.wikidata.org/entity/XXXXXXX'
        '''
        
        # Create SPARQL query to find the URI for a given label
        query = f'''SELECT ?item WHERE {{ 
                    ?item rdfs:label "{label.replace('"', '\"')}"@en.
                    SERVICE wikibase:label {{ bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }} 
                }} LIMIT 1'''
        
        try:
            results = self.run_query(query)

            # The first matching URI
            binding = results["results"]["bindings"][0]
            result = binding["item"]["value"]
            return result
        except Exception as e:
            print(f"An error occurred: {e}")
            return None

    def find_ID_by_label(self, label):
        '''
        Description:
            Takes in label and outputs its ID
        
        Arguments:
            label:string
        
        Returns:
            id:string - 'QXXXXXX'
        '''
        uri = self.find_uri_by_label(label)
        print(uri)
        id = uri.split("/")[-1]
        return id

    def sister_topic(self, label, exceptions=[]):
        '''
        Description:
            Takes in label and outputs related topics
        
        Arguments:
            label:string
            exceptions: list - Unimplemented
        
        Returns:
            results:JSON - Sister topics of label
            
        '''
        topicID = self.find_ID_by_label(label)
        #########
        # exceptions not yet tested
        exeptions = [f'FILTER NOT EXISTS {{?item "{exception}" }}\n' for exception in exceptions]
        #########
        
        #SPARQL query to find topics that are similar to the given topic
        query = f'''
        SELECT ?item ?label WHERE {{
            VALUES ?predicates {{ {self.sister_predicates} }}
            wd:{topicID} ?predicates ?class.
            ?item ?predicates ?class.
            ?item rdfs:label ?label.

            FILTER(LANG(?label) = "en")
            FILTER(?item != wd:{topicID})
            {"\n".join(exeptions)}
            SERVICE wikibase:label {{ bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }}
        }}
        
        LIMIT 100
        '''
        ###
        '''ORDER BY RAND() didn't work
        
        query = """SELECT ?item (GROUP_CONCAT(DISTINCT ?predicate; separator=", ") AS ?predicates)
                    (GROUP_CONCAT(DISTINCT ?label; separator=", ") AS ?labels) 
                    WHERE {
                        VALUES ?predicate { """ + predicates + """ }
                        wd:""" + topicQ + """ ?predicate ?class.
                        ?item ?predicate ?class.
                        ?item rdfs:label ?label.
                        FILTER(?item != wd:""" + topicQ + """)
                        
                        SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                    }
                    GROUP BY ?item
                    LIMIT 100"""'''
        ###
        results = self.run_query(query)
        return results

    def display_as_table(self, results, n_items):
        '''
        Description:
            Creates a table of the n_items of the queried results
        
        Arguments:
            results:JSON - sister topics
            n_items:int
        
        Returns:
            None
        '''
        df = pandas.DataFrame.from_dict(results["results"]["bindings"])
        df = df.applymap(lambda x: x["value"])
        pandas.set_option('display.max_rows', n_items)
        print(df)

    def sparql_question(self, topic_label, predicate_label):
        '''
        Description:
           Creates query based on topic and predicate and outputs results
        
        Arguments:
            topic_label:string
            topic_label:string
        
        Returns:
            results:JSON
        '''
        topicID = self.find_ID_by_label(topic_label)
        predicateID = self.find_ID_by_label(predicate_label)

        # Define the SPARQL query
        sparql_query = f"""
            SELECT ?thing WHERE {{
                ?thing wdt:{predicateID} wd:{topicID}.
                SERVICE wikibase:label {{ bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }}
            }}
            LIMIT 10
            """

        return self.run_query(sparql_query)

In [49]:
named_after = "wdt:P138"
occupation = "wdt:P106"
sensible_sister_predicates = f"wdt:P279 wdt:P361 wdt:P101 wdt:P361 wdt:P921 wdt:P131 wdt:P150 {named_after} {occupation}"

generator = Generator(sister_predicates=sensible_sister_predicates)

# Display mustard sister topics
results = generator.sister_topic("mustard")
generator.display_as_table(results, 100)





http://www.wikidata.org/entity/Q131748
                                         item                       label
0      http://www.wikidata.org/entity/Q841470                    Vegemite
1     http://www.wikidata.org/entity/Q1056554                      sambal
2     http://www.wikidata.org/entity/Q1268555                Rose hip jam
3         http://www.wikidata.org/entity/Q195                   chocolate
4     http://www.wikidata.org/entity/Q1499073                tomato paste
5     http://www.wikidata.org/entity/Q1541119                   yuzukoshō
6     http://www.wikidata.org/entity/Q1589638                      panade
7     http://www.wikidata.org/entity/Q2964630            horseradish root
8     http://www.wikidata.org/entity/Q2976457                     Satsivi
9     http://www.wikidata.org/entity/Q4426758        fermented bean paste
10    http://www.wikidata.org/entity/Q5098942                 chili sauce
11    http://www.wikidata.org/entity/Q5195232                 curry paste

  df = df.applymap(lambda x: x["value"])


In [50]:
print("On what topic do you want questions?")
topic = input()
print("How many questions do you want?")
num_questions = int(input())

generator.sparql_question(topic, "instance of")

On what topic do you want questions?
How many questions do you want?
http://www.wikidata.org/entity/Q131748
http://www.wikidata.org/entity/P31


{'head': {'vars': ['thing']},
 'results': {'bindings': [{'thing': {'type': 'uri',
     'value': 'http://www.wikidata.org/entity/Q2727838'}},
   {'thing': {'type': 'uri',
     'value': 'http://www.wikidata.org/entity/Q7856511'}},
   {'thing': {'type': 'uri',
     'value': 'http://www.wikidata.org/entity/Q21427661'}},
   {'thing': {'type': 'uri',
     'value': 'http://www.wikidata.org/entity/Q60336988'}}]}}

In [None]:

def get_wikidata_item(item_id):
    # Wikidata API endpoint
    url = "https://www.wikidata.org/w/api.php"
    
    # Parameters for the API request
    params = {
        "action": "wbgetentities",  # Action to get data about entities
        "ids": item_id,  # ID of the Wikidata item (e.g., Q64 for Berlin)
        "format": "json",  # Response format
        "props": "labels|descriptions|aliases",  # Properties to retrieve: labels, descriptions, and aliases
        "languages": "en"  # Language filter
    }
    
    # Make the GET request to the Wikidata API
    response = requests.get(url, params=params)
    
    # Check if the request was successful
    if response.status_code == 200:
        # Parse the JSON response
        data = response.json()
        
        # Access the item's data
        item_data = data['entities'][item_id]
        
        # Extract and print the label, description, and aliases
        label = item_data['labels']['en']['value']
        description = item_data['descriptions']['en']['value']
        aliases = [alias['value'] for alias in item_data['aliases']['en']]
        
        print(f"Label: {label}")
        print(f"Description: {description}")
        print(f"Aliases: {', '.join(aliases)}")
    else:
        print("Failed to retrieve data")

# Example usage
get_wikidata_item("Q64")

Label: Berlin
Description: federated state, capital and largest city of Germany
Aliases: Berlin, Germany, Berlin (Germany), DE-BE
