In [1]:
import requests
from SPARQLWrapper import SPARQLWrapper, JSON
import pandas

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas


In [2]:
print("On what topic do you want questions?")
topic = input()
print("How many questions do you want?")
num_questions = int(input())

On what topic do you want questions?
How many questions do you want?


ValueError: invalid literal for int() with base 10: ''

In [11]:
sparql = SPARQLWrapper("https://query.wikidata.org/sparql")

In [3]:
wikidata_property_example = "P1855"

In [48]:
def display_as_table(results, items):
    df = pandas.DataFrame.from_dict(results["results"]["bindings"])
    df = df.applymap(lambda x: x["value"])
    pandas.set_option('display.max_rows', items)
    print(df)

In [5]:
def find_uri_by_label(label):
    sparql = SPARQLWrapper("https://query.wikidata.org/sparql")
    
    # SPARQL query to find the URI for a given label
    query = """
    SELECT ?item WHERE {
      ?item rdfs:label "%s"@en.
      SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
    }
    LIMIT 1
    """ % label.replace('"', '\"')  # Basic string sanitization
    
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    
    try:
        results = sparql.query().convert()
        for result in results["results"]["bindings"]:
            return result["item"]["value"]  # Return the first matching URI
    except Exception as e:
        print(f"An error occurred: {e}")
        return None

In [6]:
def run_sparql(sparql_query):
    print(sparql_query)
    # Set the query and the return format (JSON)
    sparql.setQuery(sparql_query)
    sparql.setReturnFormat(JSON)

    # Perform the query and convert the result to a Python dictionary
    results = sparql.query().convert()
    return results

In [7]:
def find_Q_by_uri(uri):
    return uri.split("/")[-1]

In [8]:
def find_Q_by_label(label):
    return find_Q_by_uri(find_uri_by_label(label))

In [146]:
# I checked all of the ?predicates offered by ChatGPT and they are sensible
# wdt:P31 "instance of" and wdt:P21 "sex or gender" removed
sensible_sister_predicates = "wdt:P279 wdt:P361 wdt:P101 wdt:P361 wdt:P921 wdt:P131 wdt:P150"
named_after = " wdt:P138"
occupation = " wdt:P106"
sensible_sister_predicates += named_after + occupation

def sister_topic(topicQ, predicates=sensible_sister_predicates, exceptions=[]):
    # exceptions not yet tested
    exeptions = ["FILTER NOT EXISTS \{?item " + exception + " }\n" for exception in exceptions]
    #SPARQL query to find topics that are similar to the given topic
    query = """
    SELECT ?item ?label WHERE {
        VALUES ?predicates { """ + predicates + """ }
        wd:""" + topicQ +  """ ?predicates ?class.
        ?item ?predicates ?class.
        ?item rdfs:label ?label.
        
        FILTER(LANG(?label) = "en")
        FILTER(?item != wd:""" + topicQ +  """)
        """ + "".join(exeptions) + """
        SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
    }
    
    LIMIT 100
    """
    # ORDER BY RAND() didn't work
    
    # query = """SELECT ?item (GROUP_CONCAT(DISTINCT ?predicate; separator=", ") AS ?predicates)
    #             (GROUP_CONCAT(DISTINCT ?label; separator=", ") AS ?labels) 
    #             WHERE {
    #                 VALUES ?predicate { """ + predicates + """ }
    #                 wd:""" + topicQ + """ ?predicate ?class.
    #                 ?item ?predicate ?class.
    #                 ?item rdfs:label ?label.
    #                 FILTER(?item != wd:""" + topicQ + """)
                    
    #                 SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
    #             }
    #             GROUP BY ?item
    #             LIMIT 100"""

    return run_sparql(query)

  exeptions = ["FILTER NOT EXISTS \{?item " + exception + " }\n" for exception in exceptions]


In [147]:
display_as_table(sister_topic(find_Q_by_label("Yeat")), 100)


    SELECT ?item ?label WHERE {
        VALUES ?predicates { wdt:P279 wdt:P361 wdt:P101 wdt:P361 wdt:P921 wdt:P131 wdt:P150 wdt:P138 wdt:P106 }
        wd:Q108761978 ?predicates ?class.
        ?item ?predicates ?class.
        ?item rdfs:label ?label.
        
        FILTER(LANG(?label) = "en")
        FILTER(?item != wd:Q108761978)
        
        SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
    }
    
    LIMIT 100
    
                                     item                  label
0   http://www.wikidata.org/entity/Q12003                   Cher
1     http://www.wikidata.org/entity/Q303          Elvis Presley
2   http://www.wikidata.org/entity/Q12006             Dave Grohl
3   http://www.wikidata.org/entity/Q12007             Eric Saade
4   http://www.wikidata.org/entity/Q26780           Daiki Arioka
5   http://www.wikidata.org/entity/Q26834                  Poopy
6   http://www.wikidata.org/entity/Q26869           Major Harris
7     http:/

  df = df.applymap(lambda x: x["value"])


In [137]:
def sparql_question(topicQ, predicateQ):
    # Define the SPARQL query
    sparql_query = """
    SELECT ?thing WHERE {
      
    ?thing""" + " wdt:" + predicateQ + " wd:" + topicQ + """.

    SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
    }
    LIMIT 10
    """
    print("running query: " + sparql_query)

    return run_sparql(sparql_query)

In [21]:
find_Q_by_label(topic)

'Q3624078'

In [32]:
sparql_question(find_Q_by_label(topic), find_Q_by_label("instance of"))

running query: 
    SELECT ?thing WHERE {
      
    ?thing wdt:P31 wd:Q3624078.

    SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
    }
    LIMIT 10
    


{'head': {'vars': ['thing']},
 'results': {'bindings': [{'thing': {'type': 'uri',
     'value': 'http://www.wikidata.org/entity/Q16'}},
   {'thing': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/Q17'}},
   {'thing': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/Q20'}},
   {'thing': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/Q27'}},
   {'thing': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/Q28'}},
   {'thing': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/Q29'}},
   {'thing': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/Q30'}},
   {'thing': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/Q31'}},
   {'thing': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/Q32'}},
   {'thing': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/Q33'}}]}}

In [None]:

def get_wikidata_item(item_id):
    # Wikidata API endpoint
    url = "https://www.wikidata.org/w/api.php"
    
    # Parameters for the API request
    params = {
        "action": "wbgetentities",  # Action to get data about entities
        "ids": item_id,  # ID of the Wikidata item (e.g., Q64 for Berlin)
        "format": "json",  # Response format
        "props": "labels|descriptions|aliases",  # Properties to retrieve: labels, descriptions, and aliases
        "languages": "en"  # Language filter
    }
    
    # Make the GET request to the Wikidata API
    response = requests.get(url, params=params)
    
    # Check if the request was successful
    if response.status_code == 200:
        # Parse the JSON response
        data = response.json()
        
        # Access the item's data
        item_data = data['entities'][item_id]
        
        # Extract and print the label, description, and aliases
        label = item_data['labels']['en']['value']
        description = item_data['descriptions']['en']['value']
        aliases = [alias['value'] for alias in item_data['aliases']['en']]
        
        print(f"Label: {label}")
        print(f"Description: {description}")
        print(f"Aliases: {', '.join(aliases)}")
    else:
        print("Failed to retrieve data")

# Example usage
get_wikidata_item("Q64")

Label: Berlin
Description: federated state, capital and largest city of Germany
Aliases: Berlin, Germany, Berlin (Germany), DE-BE
