In [32]:
import requests
from SPARQLWrapper import SPARQLWrapper, JSON
import pandas
import question
import random

In [84]:
class Generator:
    def __init__(self, sister_predicates):
        self.sparql = SPARQLWrapper("https://query.wikidata.org/sparql")
        self.sister_predicates = sister_predicates
    
    def run_query(self, query):
        '''
        Description:
            Takes in query and requests its output
        
        Arguments:
            query:string
        
        Returns:
            results:JSON
        '''
        #print(query)
        # Set the query and the return format (JSON)
        self.sparql.setQuery(query)
        self.sparql.setReturnFormat(JSON)

        # Perform the query and convert the result to a Python dictionary
        results = self.sparql.query().convert()
        return results
    def QID_to_label(self, QID):
        '''
        Description:
            Takes in QID and outputs its label
        
        Arguments:
            QID:string
        
        Returns:
            label:string
        '''
        query = f'''
        SELECT ?itemLabel WHERE {{
            wd:{QID} rdfs:label ?itemLabel.
            FILTER(LANG(?itemLabel) = "en")
        }}
        '''
        results = self.run_query(query)
        return results["results"]["bindings"][0]["itemLabel"]["value"]
    def find_uri_by_label(self, label):
        '''
        Description:
            Takes in label and outputs its URI
        
        Arguments:
            label:string
        
        Returns:
            uri:string - 'http://www.wikidata.org/entity/XXXXXXX'
        '''
        
        # Create SPARQL query to find the URI for a given label
        query = f'''SELECT ?item WHERE {{ 
                    ?item rdfs:label "{label.replace('"', '\"')}"@en.
                    SERVICE wikibase:label {{ bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }} 
                }} LIMIT 1'''
        
        try:
            results = self.run_query(query)

            # The first matching URI
            binding = results["results"]["bindings"][0]
            result = binding["item"]["value"]
            return result
        except Exception as e:
            print(f"An error occurred: {e}")
            return None

    def find_ID_by_label(self, label):
        '''
        Description:
            Takes in label and outputs its ID
        
        Arguments:
            label:string
        
        Returns:
            id:string - 'QXXXXXX'
        '''
        uri = self.find_uri_by_label(label)
        print(uri)
        id = uri.split("/")[-1]
        return id
    def choose_adequate_relation(self, label):
        '''
        Description:
            Takes in label and outputs a relation that applies to the label
            Relations are pairs of predicates and objects
        Arguments:
            label:string
        
        Returns:
            Relation object
        '''
        topicID = self.find_ID_by_label(label)
        # Randomly choose a ?predicate ?object pair
        
        query = f'''
        SELECT ?predicate ?object WHERE {{
            VALUES ?predicate {{ {self.sister_predicates} }}
            
            ?object rdfs:label ?label.
            wd:{topicID} ?predicate ?object.
            SERVICE wikibase:label {{ bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }}
        }}
        '''
        results = self.run_query(query)
        # Choose a random pair
        random_index = random.randint(0, len(results["results"]["bindings"]) - 1)
        predicate = results["results"]["bindings"][random_index]["predicate"]["value"].split("/")[-1]
        object = results["results"]["bindings"][random_index]["object"]["value"].split("/")[-1]
        return question.Relation(predicate, object)
    def sister_topic(self, label, exceptions=[], n_items=100):
        '''
        Description:
            Takes in label and outputs n_items related topics
        
        Arguments:
            label:string
            exceptions: list - Unimplemented
            n_items:int
        
        Returns:
            results:JSON - Sister topics of label
            
        '''
        topicID = self.find_ID_by_label(label)
        
        #exeptions = [f'FILTER NOT EXISTS {{?item {exception} .}}\n' for exception in exceptions]
        exception_patterns = []
        for exception in exceptions:
            predicate, object = exception.split()
            pattern = f"""FILTER NOT EXISTS {{
                            ?item {predicate} ?exceptionClass .
                            ?exceptionClass (wdt:P279)* {object} .
                        }}\n"""
            exception_patterns.append(pattern)

        exceptions_string = "".join(exception_patterns)
        
        #SPARQL query to find topics that are similar to the given topic
        query = f'''
        SELECT ?item ?label WHERE {{
            VALUES ?predicates {{ {self.sister_predicates} }}
            wd:{topicID} ?predicates ?class.
            ?item ?predicates ?class.
            ?item rdfs:label ?label.

            FILTER(LANG(?label) = "en")
            FILTER(?item != wd:{topicID})
            {exceptions_string}
            SERVICE wikibase:label {{ bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }}
        }}
        
        LIMIT {n_items}
        '''
        
        results = self.run_query(query)
        return results
    
    def random_sister_topic(self, sister_json, count):
        '''
        Description:
            Takes in a json of sister topics and outputs a list of random topic Q number strings
        
        Arguments:
            sister_json:JSON
            count:int
        
        Returns:
            list of Q numbers
        '''
        sister_topics = sister_json["results"]["bindings"]
        random_sister_topics = random.sample(sister_topics, count)
        return [topic["item"]["value"].split("/")[-1] for topic in random_sister_topics]
    
    def display_as_table(self, results, n_items):
        '''
        Description:
            Creates a table of the n_items of the queried results
        
        Arguments:
            results:JSON - sister topics
            n_items:int
        
        Returns:
            None
        '''
        df = pandas.DataFrame.from_dict(results["results"]["bindings"][:n_items])
        df = df.applymap(lambda x: x["value"])
        pandas.set_option('display.max_rows', n_items) # n_items doesnt work here
        print(df)

    def sparql_question(self, topic_label, num_wrong_answers=3):
        '''
        Description:
           Creates a question for a given topic
        
        Arguments:
            topic_label:string
            num_wrong_answers:int
        
        Returns:
            question:Question
        '''
        topicID = self.find_ID_by_label(topic_label)
        
        relation = self.choose_adequate_relation(topic_label)
        wrong_answers = self.sister_topic(topic_label, [str(relation)], num_wrong_answers)

        selected_answers = self.random_sister_topic(wrong_answers, num_wrong_answers)
        print(selected_answers)
        return question.Question(relation, topicID, selected_answers)
    def print_question(self, question):
        '''
        Description:
            Prints the question
        
        Arguments:
            question:Question
        
        Returns:
            None
        '''
        print(f"Which of the following {self.QID_to_label(question.relation.predicate)} {self.QID_to_label(question.relation.object)}?")
       
        print(question.all_answers)
        for index, answer in enumerate(question.all_answers):
            print(f"{index + 1}. {self.QID_to_label(answer)}")

In [63]:
print("On what topic do you want questions?")
topic = input()
print("How many questions do you want?")
num_questions = int(input())



On what topic do you want questions?
How many questions do you want?


In [85]:
named_after = "wdt:P138"
occupation = "wdt:P106"
has_use = "wdt:P366"
sensible_sister_predicates = f"wdt:P279 wdt:P361 wdt:P101 wdt:P361 wdt:P921 wdt:P131 wdt:P150 {named_after} {occupation} {has_use}"

generator = Generator(sister_predicates=sensible_sister_predicates)

# Display mustard sister topics
# results = generator.sister_topic("mustard", n_items=100)
# generator.display_as_table(results, 50)

In [96]:
my_question = generator.sparql_question(topic, 3)

http://www.wikidata.org/entity/Q162202
http://www.wikidata.org/entity/Q162202
http://www.wikidata.org/entity/Q162202
['Q3716880', 'Q3100481', 'Q2307887']


In [97]:
generator.print_question(my_question)

Which of the following occupation musician?
['Q2307887', 'Q162202', 'Q3100481', 'Q3716880']
1. Mari Iijima
2. Nicki Minaj
3. Generación del 51
4. Dynamedion


In [38]:
answer = int(input("What is your answer?")) - 1
user_answer = generator.QID_to_label(my_question.all_answers[answer])
print(f"Your answer was {user_answer}")
correct_answer = generator.QID_to_label(my_question.correct_answer)
print(f"The correct answer was {correct_answer}")
if user_answer == correct_answer:
    print("You are correct!")
else:
    print("You are incorrect!")

Your answer was food
The correct answer was food
You are correct!


In [7]:

def get_wikidata_item(item_id):
    # Wikidata API endpoint
    url = "https://www.wikidata.org/w/api.php"
    
    # Parameters for the API request
    params = {
        "action": "wbgetentities",  # Action to get data about entities
        "ids": item_id,  # ID of the Wikidata item (e.g., Q64 for Berlin)
        "format": "json",  # Response format
        "props": "labels|descriptions|aliases",  # Properties to retrieve: labels, descriptions, and aliases
        "languages": "en"  # Language filter
    }
    
    # Make the GET request to the Wikidata API
    response = requests.get(url, params=params)
    
    # Check if the request was successful
    if response.status_code == 200:
        # Parse the JSON response
        data = response.json()
        
        # Access the item's data
        item_data = data['entities'][item_id]
        
        # Extract and print the label, description, and aliases
        label = item_data['labels']['en']['value']
        description = item_data['descriptions']['en']['value']
        aliases = [alias['value'] for alias in item_data['aliases']['en']]
        
        print(f"Label: {label}")
        print(f"Description: {description}")
        print(f"Aliases: {', '.join(aliases)}")
    else:
        print("Failed to retrieve data")

# Example usage
get_wikidata_item("Q64")

Label: Berlin
Description: federated state, capital and largest city of Germany
Aliases: Berlin, Germany, Berlin (Germany), DE-BE
