In [1]:
import requests
from typing import List,Tuple, Optional
import json

# construct the URL for the HTTP GET call
endpointUrl = 'https://query.wikidata.org/sparql'

In [3]:
# See https://meta.wikimedia.org/wiki/User-Agent_policy for details of Wikimedia User-Agent policy
# search_cirrus.py, containing a function for performing a Cirrus search of Wikidata (or other wikibase) using its API.
version = '0.1.0'
created = '2023-02-22'
user_agent_header = 'VanderSearchBot/' + version + ' (https://github.com/HeardLibrary/linked-data/tree/master/vanderbot; mailto:steve.baskauf@vanderbilt.edu)'
request_header_dictionary = {
	'Accept' : 'application/json',
	'User-Agent': user_agent_header
}
search_session = requests.Session()
# Set default User-Agent header so yo
# Set default User-Agent header so you don't have to send it with every request
search_session.headers.update(request_header_dictionary)

In [4]:

#This section was 
def search_cirrus(search_string: str, http: requests.Session, api_endpoint='https://www.wikidata.org/w/api.php') -> List[dict]:
	"""Search for a string using CirrusSearch (Elastic-based search engine)

	Parameters
	----------
	search_string : str
		String to use in elastic search, produces same results as the Wikidata search box
	http : requests.Session
		Requests HTTP session to use for search calls
	api_endpoint : str
		URL of the endpoint, defaults to Wikidata
	
	Returns
	-------
	List[dict] : List of search results with keys "qid", "description", and "label"
	"""
	request_string = '''{
		"action": "query",
		"format": "json",
		"list": "search",
		"formatversion": "2",
		"srsearch": "''' + search_string + '''"
	}'''

	response = http.get(api_endpoint, params=json.loads(request_string))
	data = response.json()

	hits = data['query']['search']

	# Look up the label for each item
	for index, hit in enumerate(hits):
		# "title" in the search results is the Q ID		
		request_string = '''{
			"action": "wbgetentities",
			"format": "json",
			"ids": "''' + hit['title'] + '''",
			"props": "labels"
		}'''
		response = http.get(api_endpoint, params=json.loads(request_string))
		data = response.json()
		#print(json.dumps(data, indent=2))

		# Match the Q ID to the label and add the label to the hits list
		try:
			hits[index]['label'] = data['entities'][hit['title']]['labels']['en']['value']
		except:
			hits[index]['label'] = ''

	#print(json.dumps(hits, indent=2))

	# Clean up the hits list by removing useless keys and renaming others
	clean_hits = []
	for hit in hits:
		del hit['ns']
		del hit['pageid']
		del hit['size']
		del hit['wordcount']
		del hit['timestamp']
		hit['qid'] = hit['title']
		del hit['title']
		hit['description'] = hit['snippet']
		del hit['snippet']
		clean_hits.append(hit)


	return clean_hits

In [26]:
def disambiguate(input_question):
    multiple_question = {}
    i = 1
    print("- Multiple Items came up when you searched '", input_question, ", Did you mean:")
    hits = search_cirrus(input_question, search_session)
    # print(json.dumps(hits, indent=2))
    for entry in hits:
        multiple_question[i] = [entry.get("description"), entry.get("qid")]
        print( "-", i,  entry.get("description"))
        i = i + 1
    # Taking user input and converting it to an integer
    user_input = int(input("Enter the corresponding number you mean: "))
    print('The user input is:', user_input )
    answer = multiple_question.get(user_input)
    print("The selected answer:", answer[0], "and the Qid:", answer[1])
    # return answer

# disambiguate('Geese')


In [11]:
def get_answer_qid(input_question):
    # search_string = 'Who won the grammy'
    qid = []
    hits = search_cirrus(input_question, search_session)
    # print(json.dumps(hits, indent=2))
    for entry in hits:
        qid.append(entry.get("qid"))
        print(entry)
    return qid
returned_qid = get_answer_qid('Geese')
returned_qid

{'label': 'Geese', 'qid': 'Q37075574', 'description': 'family name'}
{'label': 'Geese', 'qid': 'Q110565450', 'description': 'American rock band'}
{'label': 'Geese', 'qid': 'Q18615480', 'description': '1440th strip of the webcomic xkcd'}
{'label': 'Geese', 'qid': 'Q106697285', 'description': 'short story by Daniel Coudriet'}
{'label': 'Geese', 'qid': 'Q119859778', 'description': 'painting by Will Roberts (1907–2000), National Museum Wales, National Museum Cardiff'}
{'label': 'Chinese geese', 'qid': 'Q1162425', 'description': 'breeds of geese'}
{'label': 'Geese', 'qid': 'Q119868417', 'description': 'painting by Victor Cirefice (b.1949), Northern Ireland Civil Service'}
{'label': 'Geese', 'qid': 'Q50988783', 'description': 'painting by Auguste Durst'}
{'label': 'The Wild Geese', 'qid': 'Q64211', 'description': '1978 film by Andrew V. McLaglen'}
{'label': 'Geese', 'qid': 'Q20200197', 'description': 'painting by F.I. Roherberg'}


['Q37075574',
 'Q110565450',
 'Q18615480',
 'Q106697285',
 'Q119859778',
 'Q1162425',
 'Q119868417',
 'Q50988783',
 'Q64211',
 'Q20200197']

In [9]:
#generate the answers
def construct_query():
  for item in returned_qid:
    query = '''
    PREFIX wd: <http://www.wikidata.org/entity/>
    CONSTRUCT {
      wd:''' + item + ''' ?p1 ?o.
      ?s ?p2 wd:''' + item + '''.
    }
    WHERE {
      {wd:''' + item + ''' ?p1 ?o.}
      UNION
      {?s ?p2 wd:''' + item + '''.}
    }
    '''
    print(query)

In [10]:
construct_query()


    PREFIX wd: <http://www.wikidata.org/entity/>
    CONSTRUCT {
      wd:Q37075574 ?p1 ?o.
      ?s ?p2 wd:Q37075574.
    }
    WHERE {
      {wd:Q37075574 ?p1 ?o.}
      UNION
      {?s ?p2 wd:Q37075574.}
    }
    

    PREFIX wd: <http://www.wikidata.org/entity/>
    CONSTRUCT {
      wd:Q110565450 ?p1 ?o.
      ?s ?p2 wd:Q110565450.
    }
    WHERE {
      {wd:Q110565450 ?p1 ?o.}
      UNION
      {?s ?p2 wd:Q110565450.}
    }
    

    PREFIX wd: <http://www.wikidata.org/entity/>
    CONSTRUCT {
      wd:Q18615480 ?p1 ?o.
      ?s ?p2 wd:Q18615480.
    }
    WHERE {
      {wd:Q18615480 ?p1 ?o.}
      UNION
      {?s ?p2 wd:Q18615480.}
    }
    

    PREFIX wd: <http://www.wikidata.org/entity/>
    CONSTRUCT {
      wd:Q106697285 ?p1 ?o.
      ?s ?p2 wd:Q106697285.
    }
    WHERE {
      {wd:Q106697285 ?p1 ?o.}
      UNION
      {?s ?p2 wd:Q106697285.}
    }
    

    PREFIX wd: <http://www.wikidata.org/entity/>
    CONSTRUCT {
      wd:Q119859778 ?p1 ?o.
      ?s ?p2 wd:Q11985