In [11]:
from SPARQLWrapper import SPARQLWrapper, JSON
from typing import Optional, Tuple, List

from pymongo import MongoClient

from geodata.db.client import WorldDataDB
from geodata.wikidata.sparql import results_from_query
from geodata.db.models.state import State
from geodata.db.models.city import City

In [12]:
db = WorldDataDB()

In [16]:
def fuzzy_search_wikidata(query_term: str, country_code: str) -> List[Tuple[str, str]]:
    sparql = SPARQLWrapper("https://query.wikidata.org/sparql")
    sparql.setQuery(f"""
        SELECT ?place ?placeLabel WHERE {{
            ?place wdt:P31/wdt:P279* wd:Q10864048;
                rdfs:label ?placeLabel;
                wdt:P17 ?country. # Country of the place.
            ?country wdt:P297 "{country_code}".
            FILTER(CONTAINS(LCASE(?placeLabel), "{query_term}")).
            SERVICE wikibase:label {{ bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }}
        }}
    """)
    sparql.setReturnFormat(JSON)
    
    try:
        results = sparql.query().convert()
        bindings = results["results"]["bindings"]
        fuzzy_results = [(binding["place"]["value"].split('/')[-1], binding["placeLabel"]["value"]) for binding in bindings]
        return fuzzy_results
    except Exception as e:
        print(f"Error during SPARQL query: {e}")
        return []


state = State(**db.states.coll.find_one({"state_id_csc": 3007}))
r = fuzzy_search_wikidata(state.state_name, state.country_code)
print(r)
state.model_dump()

[]


{'created_time': datetime.datetime(2024, 3, 10, 19, 55, 35, 110000),
 'updated_time': datetime.datetime(2024, 3, 10, 19, 55, 35, 110000),
 'country_code': 'DE',
 'country_id_csc': 82,
 'latitude': 53.6126505,
 'longitude': 12.4295953,
 'postal_codes_wikidata': [],
 'websites_wikidata': [],
 'state_id_csc': 3007,
 'state_name': 'Mecklenburg-Vorpommern',
 'state_code': 'MV',
 'state_type_csc': None,
 'state_id_wikidata': None}

In [None]:
def get_native_name_from_wikidata(id_wikidata: str) -> Optional[str]:
    query = f"""
        SELECT ?placeNativeLabel WHERE {{
            wd:{id_wikidata} wdt:P1448 ?placeNativeLabel.
            SERVICE wikibase:label {{ bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en,native". }}
        }}
    """

    results = results_from_query(query)
    bindings = results["results"]["bindings"]

    if len(bindings) > 0 and "placeNativeLabel" in bindings[0]:
        return bindings[0]["placeNativeLabel"]["value"]
    else:
        return None

results = []
for state_doc in db.states.coll.find({}):
    state = State(**state_doc)
    name = get_native_name_from_wikidata(state.id_wikidata)
    
    if name is not None:
        display(name)
        display(state.model_dump())
        results.append((state, name))