Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
simontaurus committed May 2, 2023
2 parents 385e451 + 14b051c commit b83d590
Showing 1 changed file with 17 additions and 11 deletions.
28 changes: 17 additions & 11 deletions src/osw/wiki_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,14 +112,16 @@ def create_site_object(

# Standard Query
# api.php?action=query&list=prefixsearch&pssearch=Star Wars
def prefix_search(site: mwclient.client.Site, text: str):
def prefix_search(site: mwclient.client.Site, text: str, debug: bool = True):
"""
Parameters
----------
site :
Site object from mwclient lib
text :
debug :
Whether to print the results
Returns
-------
Expand All @@ -129,25 +131,28 @@ def prefix_search(site: mwclient.client.Site, text: str):
result = site.api(
"query", list="prefixsearch", pssearch=text, pslimit=1000, format="json"
)
if len(result["query"]["prefixsearch"]) == 0:
if len(result["query"]["prefixsearch"]) == 0 and debug:
print("No results")
else:
for page in result["query"]["prefixsearch"]:
title = page["title"]
print(title)
if debug:
print(title)
page_list.append(title)
return page_list


# Semantic Query
def semantic_search(site: mwclient.client.Site, query):
def semantic_search(site: mwclient.client.Site, query: str, debug: bool = True):
"""
Parameters
----------
site :
Site object from mwclient lib
query
query :
debug :
Whether to print the results
Returns
-------
Expand All @@ -156,19 +161,20 @@ def semantic_search(site: mwclient.client.Site, query):
page_list = []
query += "|limit=1000"
result = site.api("ask", query=query, format="json")
if len(result["query"]["results"]) == 0:
if len(result["query"]["results"]) == 0 and debug:
print("Query '{}' returned no results".format(query))
else:
print(
"Query '{}' returned {} results".format(
query, len(result["query"]["results"])
if debug:
print(
"Query '{}' returned {} results".format(
query, len(result["query"]["results"])
)
)
)
for page in result["query"]["results"].values():
# why do we do the following?
if "printouts" in page:
title = page["fulltext"]
if "#" not in title:
if "#" not in title and debug:
print(title)
# original position of "page_list.append(title)" line
page_list.append(title)
Expand Down

0 comments on commit b83d590

Please sign in to comment.