In [12]:
import requests
from typing import List, Dict, Tuple, Any, Optional
import json

# here's the item to be retrieved
item = 'Q42'
# construct the URL for the HTTP GET call
endpointUrl = 'https://query.wikidata.org/sparql'

In [13]:
query = '''
PREFIX wd: <http://www.wikidata.org/entity/>
CONSTRUCT {
  wd:''' + item + ''' ?p1 ?o.
  ?s ?p2 wd:''' + item + '''.
}
WHERE {
  {wd:''' + item + ''' ?p1 ?o.}
  UNION
  {?s ?p2 wd:''' + item + '''.}
}
'''
print(query)


PREFIX wd: <http://www.wikidata.org/entity/>
CONSTRUCT {
  wd:Q42 ?p1 ?o.
  ?s ?p2 wd:Q42.
}
WHERE {
  {wd:Q42 ?p1 ?o.}
  UNION
  {?s ?p2 wd:Q42.}
}



In [14]:
# See https://meta.wikimedia.org/wiki/User-Agent_policy for details of Wikimedia User-Agent policy
# search_cirrus.py, containing a function for performing a Cirrus search of Wikidata (or other wikibase) using its API.
version = '0.1.0'
created = '2023-02-22'
user_agent_header = 'VanderSearchBot/' + version + ' (https://github.com/HeardLibrary/linked-data/tree/master/vanderbot; mailto:steve.baskauf@vanderbilt.edu)'
request_header_dictionary = {
	'Accept' : 'application/json',
	'User-Agent': user_agent_header
}
search_session = requests.Session()
# Set default User-Agent header so yo
# Set default User-Agent header so you don't have to send it with every request
search_session.headers.update(request_header_dictionary)

In [15]:


def search_cirrus(search_string: str, http: requests.Session, api_endpoint='https://www.wikidata.org/w/api.php') -> List[dict]:
	"""Search for a string using CirrusSearch (Elastic-based search engine)

	Parameters
	----------
	search_string : str
		String to use in elastic search, produces same results as the Wikidata search box
	http : requests.Session
		Requests HTTP session to use for search calls
	api_endpoint : str
		URL of the endpoint, defaults to Wikidata
	
	Returns
	-------
	List[dict] : List of search results with keys "qid", "description", and "label"
	"""
	request_string = '''{
		"action": "query",
		"format": "json",
		"list": "search",
		"formatversion": "2",
		"srsearch": "''' + search_string + '''"
	}'''

	response = http.get(api_endpoint, params=json.loads(request_string))
	data = response.json()
	#print(json.dumps(data, indent=2))

	hits = data['query']['search']

	# Look up the label for each item
	for index, hit in enumerate(hits):
		# "title" in the search results is the Q ID		
		request_string = '''{
			"action": "wbgetentities",
			"format": "json",
			"ids": "''' + hit['title'] + '''",
			"props": "labels"
		}'''
		response = http.get(api_endpoint, params=json.loads(request_string))
		data = response.json()
		#print(json.dumps(data, indent=2))

		# Match the Q ID to the label and add the label to the hits list
		try:
			hits[index]['label'] = data['entities'][hit['title']]['labels']['en']['value']
		except:
			hits[index]['label'] = ''

	#print(json.dumps(hits, indent=2))

	# Clean up the hits list by removing useless keys and renaming others
	clean_hits = []
	for hit in hits:
		del hit['ns']
		del hit['pageid']
		del hit['size']
		del hit['wordcount']
		del hit['timestamp']
		hit['qid'] = hit['title']
		del hit['title']
		hit['description'] = hit['snippet']
		del hit['snippet']
		clean_hits.append(hit)


	return clean_hits

In [21]:
def get_answer_qid(input_question):
    # search_string = 'Who won the grammy'
    qid = []
    hits = search_cirrus(input_question, search_session)
    # print(json.dumps(hits, indent=2))
    for entry in hits:
        qid.append(entry.get("qid"))
    return qid
returned_qid = get_answer_qid('America')
returned_qid

['Q828',
 'Q30',
 'Q482262',
 'Q49',
 'Q126852',
 'Q18',
 'Q27611',
 'Q2842807',
 'Q2844832',
 'Q4630358']

In [20]:
#generate the answers
def construct_query():
  for item in returned_qid:
    query = '''
    PREFIX wd: <http://www.wikidata.org/entity/>
    CONSTRUCT {
      wd:''' + item + ''' ?p1 ?o.
      ?s ?p2 wd:''' + item + '''.
    }
    WHERE {
      {wd:''' + item + ''' ?p1 ?o.}
      UNION
      {?s ?p2 wd:''' + item + '''.}
    }
    '''
    print(query)


  PREFIX wd: <http://www.wikidata.org/entity/>
  CONSTRUCT {
    wd:Q8275035 ?p1 ?o.
    ?s ?p2 wd:Q8275035.
  }
  WHERE {
    {wd:Q8275035 ?p1 ?o.}
    UNION
    {?s ?p2 wd:Q8275035.}
  }
  
