In [1]:
import pandas as pd

from lets_plot import *
from lets_plot.geo_data import *

The geodata is provided by © OpenStreetMap contributors and is made available here under the Open Database License (ODbL).


In [2]:
LetsPlot.setup_html()

In [5]:
def get_naturalearth_data(data_type, columns=None):
    import shapefile
    import geopandas as gpd
    from shapely.geometry import shape

    naturalearth_url = "https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/" + \
                       "data/naturalearth/{0}/data.shp?raw=true".format(data_type)
    sf = shapefile.Reader(naturalearth_url)

    gdf = gpd.GeoDataFrame(
        [
            dict(zip([field[0] for field in sf.fields[1:]], record))
            for record in sf.records()
        ],
        geometry=[shape(s) for s in sf.shapes()]
    )
    if columns is not None:
        gdf = gdf[columns]
        gdf.columns = [col.lower() for col in gdf.columns]

    return gdf

In [6]:
world_gdf = get_naturalearth_data("admin_0_countries", columns=["NAME", "ADM0_A3", "CONTINENT"])
print(world_gdf.shape)
world_gdf.head()

(177, 3)


Unnamed: 0,name,adm0_a3,continent
0,Fiji,FJI,Oceania
1,Tanzania,TZA,Africa
2,W. Sahara,SAH,Africa
3,Canada,CAN,North America
4,United States of America,USA,North America


In [7]:
europe_gdf = world_gdf[world_gdf["continent"] == "Europe"].drop(columns=["continent"]).reset_index(drop=True)
europe_gdf["name"] = europe_gdf["name"].replace({
    "Bosnia and Herz.": "Bosnia and Herzegovina"
})
print(europe_gdf.shape)
europe_gdf.head()

(39, 2)


Unnamed: 0,name,adm0_a3
0,Russia,RUS
1,Norway,NOR
2,France,FRA
3,Sweden,SWE
4,Belarus,BLR


In [8]:
capitals_gdf = get_naturalearth_data("populated_places", columns=["NAME", "ADM0_A3", "ADM0CAP", "geometry"])
capitals_gdf = capitals_gdf[capitals_gdf["adm0cap"] == 1].drop(columns=["adm0cap"]).reset_index(drop=True)
print(capitals_gdf.shape)
capitals_gdf.head()

(199, 3)


Unnamed: 0,name,adm0_a3,geometry
0,Vatican City,VAT,POINT (12.45339 41.90328)
1,San Marino,SMR,POINT (12.44177 43.9361)
2,Vaduz,LIE,POINT (9.51667 47.13372)
3,Luxembourg,LUX,POINT (6.13 49.61166)
4,Palikir,FSM,POINT (158.14997 6.91664)


In [14]:
europe_capitals_gdf = pd.merge(europe_gdf, capitals_gdf, on="adm0_a3", how='inner').drop(columns=["adm0_a3"])
europe_capitals_gdf.columns = ["country", "capital", "geometry"]
europe_capitals_gdf = europe_capitals_gdf.sort_values(by="country").reset_index(drop=True)
europe_capitals_gdf.head()

Unnamed: 0,country,capital,geometry
0,Albania,Tirana,POINT (19.81888 41.32754)
1,Austria,Vienna,POINT (16.36469 48.20196)
2,Belarus,Minsk,POINT (27.56468 53.90192)
3,Belgium,Brussels,POINT (4.33137 50.83526)
4,Bosnia and Herzegovina,Sarajevo,POINT (18.383 43.85002)


In [15]:
def geocode_country(name, capital, geometry):
    result = geocode_cities().scope(name).ignore_not_found().get_centroids()[["found name", "geometry"]]
    result.columns = ["name", "geometry"]
    result = result.assign(is_capital = result["name"] == capital)
    if not result["is_capital"].any():
        capital_row = pd.DataFrame({"name": [capital], "geometry": [geometry], "is_capital": True})
        result = pd.concat([result, capital_row], ignore_index=True)
    return result.assign(country=name).sort_values(by=["is_capital", "name"])

gdf = pd.concat([
    geocode_country(row["country"], row["capital"], row["geometry"])
    for i, row in europe_capitals_gdf.iterrows()
]).reset_index(drop=True)

  return GeometryArray(data, crs=_get_common_crs(to_concat))


In [67]:
tmp_gdf = gdf.iloc[:5] # TODO
tmp_gdf.head()

Unnamed: 0,name,geometry,is_capital,country
0,Aliko,POINT (20.06347 39.81703),False,Albania
1,Allkaj,POINT (19.76123 40.84303),False,Albania
2,Ana e Malit,POINT (19.41946 42.02265),False,Albania
3,Antigonë,POINT (20.21876 40.10062),False,Albania
4,Aranitas,POINT (19.79317 40.61398),False,Albania


In [68]:
# TODO
ggplot() + \
    geom_pointdensity(aes(color="is_capital", fill="..count..", group="country"), data=tmp_gdf, map=tmp_gdf, map_join=[["country", "name"], ["country", "name"]], shape=21) + \
    coord_map(xlim=[-10.5, 44.0], ylim=[37.0, 60.5])