In [5]:
import os
import pandas as pd
from sqlalchemy import create_engine

HOST = os.getenv("POSTGRES_HOST", default="localhost")
PORT = os.getenv("POSTGRES_PORT", default=5430)  # TODO: copy tables to DB
DATABASE = os.getenv("POSTGRES_DB", default="osm-paper")
USER = os.getenv("POSTGRES_USER", default="osm-paper")
PASSWORD = os.getenv("POSTGRES_PASSWORD", default="osm-paper")

In [6]:
dataset_tables = [
    "os_building_count_urban_centers_grid_agg",
    "nl_building_count_urban_centers_grid_agg",
    "nrw_building_count_urban_centers_grid_agg",
    "microsoft_building_count_urban_centers_grid_agg",
    "south_korea_building_count_urban_centers_grid_agg",
    "france_ign_building_count_urban_centers_grid_agg",
    "ecuador_quito_building_count_urban_centers_grid_agg",
    "japan_all_building_count_urban_centers_grid_agg",
    "brazil_sao_paolo_building_count_urban_centers_grid_agg",
    "argentina_buenos_aires_building_count_urban_centers_grid_agg",
    "colombia_bogota_building_count_urban_centers_grid_agg",
    "colombia_medellin_building_count_urban_centers_grid_agg",
    "czechia_building_count_urban_centers_grid_agg",
    "estonia_building_count_urban_centers_grid_agg",
    "germany_sachsen_building_count_urban_centers_grid_agg",
    "germany_berlin_building_count_urban_centers_grid_agg",
    "new_zealand_building_count_urban_centers_grid_agg",
    "poland_building_count_urban_centers_grid_agg",
]

con = create_engine(f"postgresql://{USER}:{PASSWORD}@{HOST}:{PORT}/{DATABASE}")
merged_df = []

for i, table_name in enumerate(dataset_tables):
    sql = f"""
        select
          '{table_name}' as name
          ,c.region_wb
          ,count(distinct b.iso_a3) countries
          ,array_agg(distinct c.NAME) country_codes
          ,count(distinct urban_center_id) urban_centers
          ,count(*) grids
        from {table_name} a
        left join urban_centers_1km_grid_details b
            on a.id = b.fid
        left join ne_10m_admin_0_countries c
            on c.iso_a3 = b.iso_a3
        where
            '-99' != b.iso_a3
        group by c.region_wb
        order by c.region_wb, urban_centers
    """
    df = pd.read_sql(sql, con=con)
    
    if len(merged_df) > 0:
        merged_df = pd.concat([merged_df, df])
    else:
        merged_df = df
    
    if table_name in ["google_building_count_urban_centers_grid_agg"]:
        google_countries = df["country_codes"].to_list()
        string = ""
        for country in google_countries[0]:
            string += country[0] + ', '

        print(string)

In [7]:
display(merged_df.sort_values(["region_wb", "urban_centers"], ascending=True))

wb_regions = [
    ["East Asia & Pacific"],
    ["Europe & Central Asia"],
    ["Latin America & Caribbean"],
    ["Middle East & North Africa"],
    ["North America"],
    ["South Asia"],
    ["Sub-Saharan Africa"],
]

for region in wb_regions:
    country_list = merged_df.loc[
        (merged_df["region_wb"].isin(region)) &
        (merged_df["name"] == "microsoft_building_count_urban_centers_grid_agg")
    ]["country_codes"].to_list()
    
    country_string = ""
    for country in country_list[0]:
        country_string += country + ', ' 
    print(f"### {region} ###")
    print(country_string)

Unnamed: 0,name,region_wb,countries,country_codes,urban_centers,grids
0,new_zealand_building_count_urban_centers_grid_agg,East Asia & Pacific,1,[New Zealand],8,941
0,south_korea_building_count_urban_centers_grid_agg,East Asia & Pacific,1,[South Korea],36,5185
0,japan_all_building_count_urban_centers_grid_agg,East Asia & Pacific,1,[Japan],105,22011
0,microsoft_building_count_urban_centers_grid_agg,East Asia & Pacific,12,"[Australia, Cambodia, China, Indonesia, Laos, ...",825,58707
0,germany_berlin_building_count_urban_centers_gr...,Europe & Central Asia,1,[Germany],1,657
0,estonia_building_count_urban_centers_grid_agg,Europe & Central Asia,1,[Estonia],2,130
0,germany_sachsen_building_count_urban_centers_g...,Europe & Central Asia,2,"[Germany, Poland]",5,443
0,czechia_building_count_urban_centers_grid_agg,Europe & Central Asia,1,[Czechia],12,713
0,nrw_building_count_urban_centers_grid_agg,Europe & Central Asia,2,"[Germany, Netherlands]",18,3100
0,nl_building_count_urban_centers_grid_agg,Europe & Central Asia,2,"[Belgium, Netherlands]",38,2683


### ['East Asia & Pacific'] ###
Australia, Cambodia, China, Indonesia, Laos, Malaysia, Mongolia, Myanmar, Philippines, Singapore, Thailand, Vietnam, 
### ['Europe & Central Asia'] ###
Albania, Armenia, Austria, Azerbaijan, Belarus, Bosnia and Herz., Bulgaria, Croatia, Cyprus, Czechia, Denmark, Estonia, Finland, France, Georgia, Greece, Hungary, Iceland, Italy, Kazakhstan, Kyrgyzstan, Latvia, Lithuania, Macedonia, Moldova, Montenegro, Poland, Portugal, Romania, Russia, Serbia, Slovakia, Slovenia, Spain, Sweden, Tajikistan, Turkey, Ukraine, Uzbekistan, 
### ['Latin America & Caribbean'] ###
Argentina, Bahamas, Barbados, Belize, Bolivia, Brazil, Chile, Colombia, Costa Rica, Cuba, Curaçao, Dominican Rep., Ecuador, El Salvador, Guatemala, Haiti, Honduras, Jamaica, Mexico, Nicaragua, Panama, Paraguay, Peru, Puerto Rico, Suriname, Trinidad and Tobago, Venezuela, 
### ['Middle East & North Africa'] ###
Algeria, Djibouti, Egypt, Iran, Iraq, Israel, Jordan, Lebanon, Libya, Malta, Morocco, Oman, 