In [None]:
from geodata.db.client import WorldDataDB
db = WorldDataDB()


In [None]:
import pandas as pd
df_countries = pd.DataFrame(list(db.countries.coll.find()))
df_states = pd.DataFrame(list(db.states.coll.find()))
df_cities = pd.DataFrame(list(db.cities.coll.find()))

In [None]:
cols_countries = [
    "created_time", "updated_time", "country_code", "country_id_csc",
    "latitude", "longitude", "region_id_csc", "country_name", "country_name_native",
    "country_name_english", "phone_code", "country_id_wikidata", "status"
]
cols_states = [
    "created_time", "updated_time", "country_code", "country_id_csc", "latitude", "longitude",
    "state_id_csc", "state_name", "state_name_native", "state_name_english", "state_code", "state_type_csc",
    "state_id_wikidata", "status"
]
cols_cities = [
    "created_time", "updated_time", "country_code", "country_id_csc", "latitude", "longitude",
    "city_id_csc", "state_id_csc", "city_name", "city_name_native", "city_name_english", "state_code",
    "city_id_wikidata", "status"
]


In [None]:
import random
display(f"---------- Countries (Total data: {len(df_countries)}) ----------")
display(df_countries[cols_countries].loc[random.choices(df_countries.index.tolist(), k=3)].head(3))

display(f"---------- States (Total data: {len(df_states)}) ----------")
display(df_states[cols_states].loc[random.choices(df_states.index.tolist(), k=3)].head(3))

display(f"---------- Cities (Total data: {len(df_cities)}) ----------")
display(df_cities[cols_cities].loc[random.choices(df_cities.index.tolist(), k=3)].head(3))

In [None]:
from datetime import datetime
import json

def default_serializer(obj):
    if isinstance(obj, datetime):
        return obj.isoformat()
    raise TypeError(f"Type {type(obj)} not serializable")

with open("data/countries.json", "w") as f:
    countries = []
    for doc_country in db.countries.coll.find():
        doc_country.pop("_id")
        countries.append(doc_country)
    json.dump({"countries": countries}, f, default=default_serializer)

with open("data/states.json", "w") as f:
    states = []
    for doc_state in db.states.coll.find():
        doc_state.pop("_id")
        states.append(doc_state)
    json.dump({"states": states}, f, default=default_serializer)

with open("data/cities.json", "w") as f:
    cities = []
    for doc_city in db.cities.coll.find():
        doc_city.pop("_id")
        cities.append(doc_city)
    json.dump({"cities": cities}, f, default=default_serializer)

In [None]:
import geopandas as gpd
gdf_countries = gpd.GeoDataFrame(geometry=gpd.points_from_xy(df_countries["longitude"], df_countries["latitude"]), crs=4326)
gdf_states = gpd.GeoDataFrame(geometry=gpd.points_from_xy(df_states["longitude"], df_states["latitude"]), crs=4326)
gdf_cities = gpd.GeoDataFrame(geometry=gpd.points_from_xy(df_cities["longitude"], df_cities["latitude"]), crs=4326)

In [None]:
gdf_countries.to_file("data/countries.geojson", driver="GeoJSON")
gdf_states.to_file("data/states.geojson", driver="GeoJSON")
gdf_cities.to_file("data/cities.geojson", driver="GeoJSON")