In [None]:
from typing import Tuple
from concurrent.futures import ThreadPoolExecutor, as_completed

from geopy.geocoders import Nominatim
from pymongo import MongoClient

from geodata.db.client import WorldDataDB
from geodata.db.models.city import City

def search_osm(geolocalizador: Nominatim, name: str, state_code: str, iso2: str) -> str:
    query = f"{name}, {state_code}, {iso2}"
    ubicacion = geolocalizador.geocode(query)

    if ubicacion:
        nombre = ubicacion.address
        latitude = ubicacion.latitude
        longitude = ubicacion.longitude
        postal_code = ubicacion.raw.get('address', {}).get('postcode', 'No disponible')
        _city = ubicacion.raw.get('address', {}).get('city', 'No disponible')
        _state = ubicacion.raw.get('address', {}).get('state', 'No disponible')
        _country = ubicacion.raw.get('address', {}).get('country', 'No disponible')
        return postal_code
    else:
        return 'No disponible'

In [None]:
mongo_client = MongoClient()
db = WorldDataDB(mongo_client=mongo_client)

In [None]:
def sarasa(geolocalizador: Nominatim, city_doc: dict) -> Tuple[City, str]:
    city = City(**city_doc)
    if len(city.postal_codes_wikidata) == 0 and city.state_code is not None:
        s = search_osm(geolocalizador, city.city_name, city.state_code, city.country_code)
        if s != "No disponible":
            display(city)
            display(s)
            display("-"*40)
        return city, s
    else:
        return city, "No disponible"

geolocalizador = Nominatim(user_agent="mi_aplicacion")
len_cities = sum(1 for _ in db.cities.coll.find({}))
for i, city_doc in enumerate(db.cities.coll.find({}), start=1):
    city, s = sarasa(geolocalizador, city_doc)
    print(f"{i}/{len_cities}")

In [None]:
with ThreadPoolExecutor(max_workers=100) as pool:
    iter_futures = (pool.submit(sarasa, geolocalizador, city_doc) for city_doc in db.cities.coll.find({}))
    
    for i, future in enumerate(as_completed(iter_futures), start=1):
        city, s = future.result()
        print(f"{i}/{len_cities}")