In [1203]:
import requests
import datetime
import string


In [1204]:
class QUESTION:

	def __conditins_meet_expectations(self, condition_answers: list):
		"""
		Check if the conditions meet the expectations.
		:param condition_answers: List of tuples containing the condition and the function to call
		:return: True if all conditions meet the expectations, False otherwise
		"""
		for condition_answer in condition_answers:
			if len(condition_answer) != 2:
				raise ValueError(f'Each condition must be accompanied by a function to call when the condition is true. Got: {condition_answer}')
			if not isinstance(condition_answer[0], tuple):
				raise ValueError(f'Condition must be a tuple. Got: {condition_answer[0]}')
			if len(condition_answer[0]) != self.number_of_conditions:
				raise ValueError(f'Number of conditions in {condition_answer[0]} does not match the number of * in the question. Expected {self.number_of_conditions}, got {len(condition_answer[0])}')
		

	def __init__(self, question_with_mask: str, condition_answers: list):
		self.question_with_mask = question_with_mask ## e.g. 'how * *', 'what * *'

		self.mask_parts = self.__split_mask()  ## e.g. ['how ', '*', ' ', '*'] for 'how * *'

		## check that each condition is a tupple of the correct length
		self.number_of_conditions = len(question_with_mask.split('*')) - 1

		try:
			self.__conditins_meet_expectations(condition_answers)
		except ValueError as e:
			print(f'Error in conditions: {e}')
			raise ValueError(f'Error in conditions: {e}')

		self.conditions = condition_answers ## e.g. [(('old is', '*'), function_on_true), (('is the population of', '*'), function_on_true)]

		self.injected_parts = self.__inject_condition_parts()

		self.answer = None  # This will be set after the question is asked


	def add_condition(self, condition: tuple, function_on_true):
		try:
			self.__conditins_meet_expectations([(condition, function_on_true)])
		except ValueError as e:
			print(f'Error in condition: {e}')
			raise ValueError(f'Error in condition: {e}')

		self.conditions.append((condition, function_on_true))

		self.injected_parts = None  # Reset injected parts


	def inject_conditions(self):
		"""
		Inject the conditions into the question mask.
		This will create a list of parts where '*' is replaced with the condition parts.
		:return: List of injected parts
		"""
		if self.injected_parts is None:
			self.injected_parts = self.__inject_condition_parts()
		

	def __split_mask(self):
		"""
		Split the question mask into parts, where '*' is a wildcard.
		:return: List of parts
		"""
		parts = []
		mask = self.question_with_mask
		i = 0
		part = ''
		while i < len(mask):
			if mask[i] == '*':
				if part != '':
					parts.append(part)
				parts.append('*')
				part = ''
			else:
				part += mask[i]
			i += 1
		if part != '':
			parts.append(part)

		print(parts)
		return parts


	def __inject_condition_parts(self):
		## check if the question mask matches the question e.g. 'how * *' matches 'how old is Tom Cruise'
		# loop through the conditions.
		all_injected_parts = []
		for condition in self.conditions:
		## create a new list of parts, where '*' parts are replaced with the condition parts (e.g. 'how * *' becomes 'how old is *')
			injected_parts = []
			star_index = 0
			for i, part in enumerate(self.mask_parts):
				if part == '*':
					injected_parts.append(condition[0][star_index])
					star_index += 1
				else:
					injected_parts.append(part)
			all_injected_parts.append(injected_parts)

		print(all_injected_parts)
		return all_injected_parts



	def __find_next_part(self, question: str, start_index: int, part: tuple):
		"""
		Find the next part in the question starting from the given index.
		:param question: The question to search in
		:param start_index: The index to start searching from
		:param part: The part to find
		:return: The end index of the found part or None if not found
		"""

		looking_for = self.injected_parts[part[0]][part[1]]

		if looking_for == '*':

			## if this is the last part, return the end of the question
			if part[1] == len(self.injected_parts[part[0]]) - 1:
				return len(question)

			return self.__find_next_part(question, start_index, (part[0], part[1] + 1))

		# else is implicit
		end_index = question.find(looking_for, start_index)
		if end_index == -1:
			return None
		return end_index + len(looking_for)


	def __find_matching_condition(self, question: str):

		
		## in the case of 'how * *', the mask is split into ['how ', '*', ' ', '*']. This means that these parts must be cycled through. 

		## check to see if the question matches the injected parts. if a wildcard is present, then the question can have any text in that position.
		# e.g. 'how old is Tom Cruise' matches 'how * *' with injected parts ['how ', 'old is', ' ', '*']

		for i, injected_parts in enumerate(self.injected_parts):
			start_of_part = 0
			question_parts = []
			variables = []
			matched = False
			for j, part in enumerate(injected_parts):
				# print(f'Checking part {part} in question {question}')
				next_part_index = self.__find_next_part(question, start_of_part, (i, j))
				if next_part_index is None:
					break
				# print(f'Found part {part} at index {next_part_index}')

				if j == len(injected_parts) - 1:
					matched = True

				if part == '*':
					# If the part is a wildcard, take the substring from the start of the part to the end of the next part
					question_parts.append(question[start_of_part:next_part_index])
					variables.append(question[start_of_part:next_part_index])
				else:
					# If the part is not a wildcard, just append the part
					question_parts.append(part)



				if matched:
					return i, variables


				start_of_part = next_part_index

		
		return None, None


	def ask_question(self, question: str):
		"""
		Check if the question matches any of the conditions and return the corresponding answer.
		:param question: The question to be asked
		:return: The answer to the question or None if no condition matches

		"""
		## check that injected_parts is not None
		if self.injected_parts is None:
			print('Injected parts are None. Please call inject_conditions() during setup to avoid delays in responses.')
			self.inject_conditions()

		# Find the matching condition
		matching_condition, variables = self.__find_matching_condition(question)

		if matching_condition is None:
			# print(f'No matching condition found for question: {question}')
			return None


		# Get the function to call for the matching condition
		function_to_call = self.conditions[matching_condition][1]

		# Call the function with the extracted variables
		return function_to_call(*variables)

In [1205]:
def GetFromWIKIDATA(query: str):
	"""
	Fetch data from the Wikidata SPARQL endpoint.
	"""
	url = "https://query.wikidata.org/sparql"
	headers = {
		"Accept": "application/sparql-results+json"
	}

	response = requests.get(url, params={"query": query}, headers=headers)

	if response.status_code != 200:
		print(f"Request failed with status code {response.status_code}")
		return None

	results = response.json().get("results", {}).get("bindings", [])
	
	return results

In [1206]:
def GetAgeOf(name: str):
	## make each word capitalized
	name = name.strip().title()
	print(f"Checking age for {name}.")


	query = f"""
	SELECT ?birthDate WHERE {{
	  ?person rdfs:label "{name}"@en;
			  wdt:P569 ?birthDate.
	}} LIMIT 1
	"""
	results = GetFromWIKIDATA(query)
	if results is None:
		print(f"Failed to fetch data for {name}.")
		return None

	birth_date_str = results[0]["birthDate"]["value"]
	birth_date = datetime.datetime.strptime(birth_date_str[:10], "%Y-%m-%d")  # Only use YYYY-MM-DD
	today = datetime.datetime.now().date()
	age = today.year - birth_date.year - ((today.month, today.day) < (birth_date.month, birth_date.day))
	return str(age)

In [1207]:
def GetPopulationOf(area: str):

	## make each word capitalized
	area = area.strip().title()
	print(f"Checking population for {area}.")


	## check for ambiguity. If if searching for an area that is both a state/county and a city, it it should be made clear we are looking fro the city.
	check_query = f"""
	SELECT ?typeLabel WHERE {{
		?area rdfs:label "{area}"@en.
		?area wdt:P31 ?type.
		SERVICE wikibase:label {{ bd:serviceParam wikibase:language "en". }}
	}}
	LIMIT 1
	"""
	check_results = GetFromWIKIDATA(check_query)
	if not check_results or check_results == []:
		print(f"No results found for {area}. It might not be a recognized city or area in Wikidata.")
		return None
	
	try:
		type_label = check_results[0]['typeLabel']['value'].lower()
	except KeyError:
		print(f"Unexpected response format for {area}. Could not determine type label.")
		return None
	
	if type_label not in ['city', 'town', 'village']:
		print(f"Assuming {area} refers to a city due to ambiguity.")
		area = area + " City"


	print(f"Final area to query: {area}")


	query = f"""
		SELECT ?population WHERE {{
		  ?area rdfs:label "{area}"@en;
				wdt:P31/wdt:P279* wd:Q515; 
				wdt:P1082 ?population.
		}} LIMIT 1
		"""

	results = GetFromWIKIDATA(query)

	if results is None:
		print(f"Failed to fetch population data for {area}.")
		return None

	if results[0]["population"]["value"] is None:
		return None

	return results[0]["population"]["value"]


In [1208]:
q1 = QUESTION('how * *', [(('old is', '*'), GetAgeOf)])
q2 = QUESTION('what * *', [(('is the population of', '*'), GetPopulationOf)])

['how ', '*', ' ', '*']
[['how ', 'old is', ' ', '*']]
['what ', '*', ' ', '*']
[['what ', 'is the population of', ' ', '*']]


In [1209]:
q1.add_condition(('big is the population of', '*'), GetPopulationOf)
q1.add_condition(('many people live in', '*'), GetPopulationOf)
q2.add_condition(('is the age of', '*'), GetAgeOf)

In [1210]:
q1.inject_conditions()
q2.inject_conditions()

[['how ', 'old is', ' ', '*'], ['how ', 'big is the population of', ' ', '*'], ['how ', 'many people live in', ' ', '*']]
[['what ', 'is the population of', ' ', '*'], ['what ', 'is the age of', ' ', '*']]


In [1211]:
questions = []
questions.append(q1)
questions.append(q2)

In [None]:
def ask(question: str):

	## strip the punctuation from the question
	question = question.strip(string.punctuation).lower()

	for _question in questions:
		answer = _question.ask_question(question=question)
		if answer is not None:
			print(f'Answer: {answer}')
			return answer
	# If no question matched, return None
	print(f'No matching question found for: {question}')

	pass

In [1213]:
assert '62' == ask('how old is Tom Cruise')
assert '35' == ask('how old is Taylor Swift')
assert '8799728' == ask('what is the population of London')
assert '8804190' == ask('what is the population of New York?')


Checking age for Tom Cruise.
Answer: 62
Checking age for Taylor Swift.
Answer: 35
Checking population for London.
Final area to query: London
Answer: 8799728
Checking population for New York.
Assuming New York refers to a city due to ambiguity.
Final area to query: New York City
Answer: 8804190
