In [1]:
import requests
from SPARQLWrapper import SPARQLWrapper, JSON
import pandas
import question
import random

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas


In [2]:
# Predicates
SUBCLASS_OF = "P279"
PART_OF = "P361"
INSTANCE_OF = "P31"

predicates = [SUBCLASS_OF, PART_OF, INSTANCE_OF]

In [3]:
class Generator:
    def __init__(self, sister_predicates):
        self.sparql = SPARQLWrapper("https://query.wikidata.org/sparql")
        self.sister_predicates = sister_predicates
    
    def run_query(self, query):
        '''
        Description:
            Takes in query and requests its output
        
        Arguments:
            query:string
        
        Returns:
            results:JSON
        '''
        #print(query)
        # Set the query and the return format (JSON)
        self.sparql.setQuery(query)
        self.sparql.setReturnFormat(JSON)

        # Perform the query and convert the result to a Python dictionary
        results = self.sparql.query().convert()
        return results
    
    def QID_to_label(self, QID):
        '''
        Description:
            Takes in QID and outputs its label
        
        Arguments:
            QID:string
        
        Returns:
            label:string
        '''
        query = f'''
        SELECT ?itemLabel WHERE {{
            wd:{QID} rdfs:label ?itemLabel.
            FILTER(LANG(?itemLabel) = "en")
        }}
        '''
        results = self.run_query(query)
        return results["results"]["bindings"][0]["itemLabel"]["value"]
    def find_uri_by_label(self, label):
        '''
        Description:
            Takes in label and outputs its URI
        
        Arguments:
            label:string
        
        Returns:
            uri:string - 'http://www.wikidata.org/element/XXXXXXX'
        '''
        
        # Create SPARQL query to find the URI for a given label
        query = f'''SELECT ?item WHERE {{ 
                    ?item rdfs:label "{label.replace('"', '\"')}"@en.
                    SERVICE wikibase:label {{ bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }} 
                }} LIMIT 1'''
        
        try:
            results = self.run_query(query)

            # The first matching URI
            binding = results["results"]["bindings"][0]
            result = binding["item"]["value"]
            return result
        except Exception as e:
            print(f"An error occurred: {e}")
            return None
    def find_ID_by_label(self, label):
        '''
        Description:
            Takes in label and outputs its ID
        
        Arguments:
            label:string
        
        Returns:
            id:string - 'QXXXXXX'
        '''
        uri = self.find_uri_by_label(label)
        id = uri.split("/")[-1]
        return id
    def find_ID_by_uri(self, uri):
        '''
        Description:
            Takes in uri and outputs its ID
        
        Arguments:
            uri:string - http://www.wikidata.org/element/QXXXXX
        
        Returns:
            id:string - 'QXXXXXX'
        '''
        return uri.split('/')[4]

    def choose_adequate_relation(self, label):
        '''
        Description:
            Takes in label and outputs a relation that applies to the label
            Relations are pairs of predicates and objects
        Arguments:
            label:string
        
        Returns:
            Relation object
        '''
        topicID = self.find_ID_by_label(label)
        # Randomly choose a ?predicate ?object pair
        
        query = f'''
        SELECT ?predicate ?object WHERE {{
            VALUES ?predicate {{ {self.sister_predicates} }}
            
            ?object rdfs:label ?label.
            wd:{topicID} ?predicate ?object.
            SERVICE wikibase:label {{ bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }}
        }}
        '''
        results = self.run_query(query)
        # Choose a random pair
        random_index = random.randint(0, len(results["results"]["bindings"]) - 1)
        predicate = results["results"]["bindings"][random_index]["predicate"]["value"].split("/")[-1]
        object = results["results"]["bindings"][random_index]["object"]["value"].split("/")[-1]
        return question.Relation(predicate, object)
    
    
    def sister_topic(self, label, exceptions=[], n_items=100):
        '''
        Description:
            Takes in label and outputs n_items related topics
        
        Arguments:
            label:string
            exceptions: list - Unimplemented
            n_items:int
        
        Returns:
            results:JSON - Sister topics of label
            
        '''
        topicID = self.find_ID_by_label(label)
        
        #exeptions = [f'FILTER NOT EXISTS {{?item {exception} .}}\n' for exception in exceptions]
        exception_patterns = []
        for exception in exceptions:
            predicate, object = exception.split()
            pattern = f"""FILTER NOT EXISTS {{
                            ?item {predicate} ?exceptionClass .
                            ?exceptionClass (wdt:P279)* {object} .
                        }}\n"""
            exception_patterns.append(pattern)

        exceptions_string = "".join(exception_patterns)
        
        #SPARQL query to find topics that are similar to the given topic
        query = f'''
        SELECT ?item ?label WHERE {{
            VALUES ?predicates {{ {self.sister_predicates} }}
            wd:{topicID} ?predicates ?class.
            ?item ?predicates ?class.
            ?item rdfs:label ?label.

            FILTER(LANG(?label) = "en")
            FILTER(?item != wd:{topicID})
            {exceptions_string}
            SERVICE wikibase:label {{ bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }}
        }}
        
        LIMIT {n_items}
        '''
        
        results = self.run_query(query)
        return results
    
    def random_sister_topic(self, sister_json, count):
        '''
        Description:
            Takes in a json of sister topics and outputs a list of random topic Q number strings
        
        Arguments:
            sister_json:JSON
            count:int
        
        Returns:
            list of Q numbers
        '''
        sister_topics = sister_json["results"]["bindings"]
        random_sister_topics = random.sample(sister_topics, count)
        return [topic["item"]["value"].split("/")[-1] for topic in random_sister_topics]
    
    def display_as_table(self, results, n_items):
        '''
        Description:
            Creates a table of the n_items of the queried results
        
        Arguments:
            results:JSON - sister topics
            n_items:int
        
        Returns:
            None
        '''
        df = pandas.DataFrame.from_dict(results["results"]["bindings"][:n_items])
        df = df.applymap(lambda x: x["value"])
        pandas.set_option('display.max_rows', n_items) # n_items doesnt work here
        print(df)

    def element_question(self, element_label, num_wrong_answers=3):
        '''
        Description:
           Creates a question with the element as the correct answer
        
        Arguments:
            element_label:string
            num_wrong_answers:int
        
        Returns:
            question:Question
        '''
        elementID = self.find_ID_by_label(element_label)
        
        relation = self.choose_adequate_relation(element_label)
        wrong_answers = self.sister_topic(element_label, [str(relation)], num_wrong_answers)

        selected_answers = self.random_sister_topic(wrong_answers, num_wrong_answers)
        #print(selected_answers)
        relation = question.Relation(self.QID_to_label(relation.predicate), self.QID_to_label(relation.object))
        return question.Question(relation, elementID, selected_answers)
    
    ####
    def find_category(self, elementID, predicateID=SUBCLASS_OF):
        '''
        Description:
            Inputs an element ID and outputs a category ID is belongs to based on the predicate
        
        Arguments:
            elementID:string
            predicateID:string
        
        Returns:
            categoryID:string
        '''
        query = f'''
        SELECT ?category ?label WHERE {{
            wd:{elementID} wdt:{predicateID} ?category.
            ?category rdfs:label ?label.

            SERVICE wikibase:label {{ bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }}
            FILTER(LANG(?label) = "en")
        }}
        '''
           
        categories = self.run_query(query)['results']['bindings']
        random_index = random.randint(0, len(categories) - 1)
        random_category = categories[random_index]['category']['value']
        
        categoryID = self.find_ID_by_uri(random_category)
        return categoryID
    
    def sister_category(self, categoryID, elementID, n_items=3):
        '''
        Description:
            Takes in category and element IDS and outputs n_items related categories that don't contain the element
        
        Arguments:
            categoryID:string
            elementID:string
            n_items:int
        
        Returns:
            results:JSON - Sister categories of categoryID
            
        '''
        query = f'''
                SELECT ?superCategory ?superCategoryLabel ?category ?categoryLabel WHERE {{
                    SERVICE wikibase:label {{ bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }}
                    wd:{categoryID} wdt:{SUBCLASS_OF} ?superCategory.
                    ?category wdt:{SUBCLASS_OF} ?superCategory.
                    FILTER(?category != wd:{categoryID})
                    FILTER NOT EXISTS {{
                        wd:{elementID} wdt:{SUBCLASS_OF} ?category .
                    }}
                
                    ?superCategory rdfs:label ?superCategoryLabel .
                    FILTER(LANG(?superCategoryLabel) = "en")
                    ?category rdfs:label ?categoryLabel .
                    FILTER(LANG(?categoryLabel) = "en")
             
  
                }}
                LIMIT {n_items}
        '''
        results = self.run_query(query)
        return results

    def category_question(self, element_label, num_wrong_answers=3):
        '''
        Description:
            Inputs an element and outputs 4 categories, one being the one that contains the element
        
        Arguments:
            element_label:string
        
        Returns:
            question:Question
        '''
        # Get category of the element
            # Different predicates (i.e., instance of, subclass of) can be used for this. Maybe randomly select them?
        elementID = self.find_ID_by_label(element_label)
        categoryID = self.find_category(elementID) # Default is SUBCLASS_OF
        
        # Get 3 other sister categories that the element is not a part of
        results = self.sister_category(categoryID, elementID, n_items=num_wrong_answers)['results']['bindings']
        selected_answers = []
        for c in results:
            selected_answers.append(self.find_ID_by_uri(c['category']['value']))

        # Return the 3 sisters and the correct category
        relation = question.Relation("category", f'{self.QID_to_label(elementID)} belongs to')
        
        return question.Question(relation, categoryID, selected_answers)
    
    def print_question(self, question):
        '''
        Description:
            Prints the question
        
        Arguments:
            question:Question
        
        Returns:
            None
        '''
        print(f"Which of the following is a {question.relation.predicate} {question.relation.object}?")
       
        print(question.all_answers)
        for index, answer in enumerate(question.all_answers):
            print(f"{index + 1}. {self.QID_to_label(answer)}")

In [4]:
print("What element do you want the correct answer to the question to be?", end=": ")
element = input()
print(element)
print("How many questions do you want?", end=": ")
num_questions = int(input())
print(num_questions)

What element do you want the correct answer to the question to be?: mustard
How many questions do you want?: 3


In [5]:
named_after = "wdt:P138"
occupation = "wdt:P106"
has_use = "wdt:P366"
studied_in = "wdt:P2579"
sensible_sister_predicates = f"wdt:P279 wdt:P361 wdt:P101 wdt:P361 wdt:P921 wdt:P131 wdt:P150 {named_after} {occupation} {has_use} {studied_in}"

generator = Generator(sister_predicates=sensible_sister_predicates)

In [6]:
import time
def question_answer(question):
    generator.print_question(question)
    time.sleep(0.2)

    answer = int(input("What is your answer?")) - 1
    user_answer = generator.QID_to_label(question.all_answers[answer])
    print(f"Your answer was {user_answer}")
    correct_answer = generator.QID_to_label(question.correct_answer)
    print(f"The correct answer was {correct_answer}")
    if user_answer == correct_answer:
        print("You are correct!")
    else:
        print("You are incorrect!")

In [7]:
cat_q = generator.category_question("baseball")
question_answer(cat_q)

Which of the following is a category baseball belongs to?
['Q2164355', 'Q2028935', 'Q216048', 'Q2094922']
1. roller sport
2. Kronum
3. team sport
4. flight figure
Your answer was team sport
The correct answer was team sport
You are correct!


In [8]:
my_question = generator.element_question(element, 3)
question_answer(my_question)

Which of the following is a subclass of condiment?
['Q67892518', 'Q129031', 'Q115607763', 'Q131748']
1. crab flesh
2. tapenade
3. bean paste
4. mustard
Your answer was mustard
The correct answer was mustard
You are correct!


In [7]:

def get_wikidata_item(item_id):
    # Wikidata API endpoint
    url = "https://www.wikidata.org/w/api.php"
    
    # Parameters for the API request
    params = {
        "action": "wbgetentities",  # Action to get data about entities
        "ids": item_id,  # ID of the Wikidata item (e.g., Q64 for Berlin)
        "format": "json",  # Response format
        "props": "labels|descriptions|aliases",  # Properties to retrieve: labels, descriptions, and aliases
        "languages": "en"  # Language filter
    }
    
    # Make the GET request to the Wikidata API
    response = requests.get(url, params=params)
    
    # Check if the request was successful
    if response.status_code == 200:
        # Parse the JSON response
        data = response.json()
        
        # Access the item's data
        item_data = data['entities'][item_id]
        
        # Extract and print the label, description, and aliases
        label = item_data['labels']['en']['value']
        description = item_data['descriptions']['en']['value']
        aliases = [alias['value'] for alias in item_data['aliases']['en']]
        
        print(f"Label: {label}")
        print(f"Description: {description}")
        print(f"Aliases: {', '.join(aliases)}")
    else:
        print("Failed to retrieve data")

# Example usage
get_wikidata_item("Q64")

Label: Berlin
Description: federated state, capital and largest city of Germany
Aliases: Berlin, Germany, Berlin (Germany), DE-BE
