In [11]:
"""
CS 519 Project: World GDP Map Visualization

Author: Zong Fan
Date: 2020-10-15
"""
import geopandas as gpd 
import json
import pandas as pd

In [12]:
# load geometry data (Download from natural earth website: https://www.naturalearthdata.com/downloads/110m-cultural-vectors/)
country_shapefile = "ne_110m_admin_0_countries.shp"
geo_df = gpd.read_file(country_shapefile)
print(geo_df.columns)

geo_data = geo_df[["ADMIN", "ADM0_A3", "CONTINENT", "geometry"]]
print(len(geo_df))
# print(geo_df[geo_data["ADMIN"]=="France"])
geo_data.head()

Index(['featurecla', 'scalerank', 'LABELRANK', 'SOVEREIGNT', 'SOV_A3',
       'ADM0_DIF', 'LEVEL', 'TYPE', 'ADMIN', 'ADM0_A3', 'GEOU_DIF', 'GEOUNIT',
       'GU_A3', 'SU_DIF', 'SUBUNIT', 'SU_A3', 'BRK_DIFF', 'NAME', 'NAME_LONG',
       'BRK_A3', 'BRK_NAME', 'BRK_GROUP', 'ABBREV', 'POSTAL', 'FORMAL_EN',
       'FORMAL_FR', 'NAME_CIAWF', 'NOTE_ADM0', 'NOTE_BRK', 'NAME_SORT',
       'NAME_ALT', 'MAPCOLOR7', 'MAPCOLOR8', 'MAPCOLOR9', 'MAPCOLOR13',
       'POP_EST', 'POP_RANK', 'GDP_MD_EST', 'POP_YEAR', 'LASTCENSUS',
       'GDP_YEAR', 'ECONOMY', 'INCOME_GRP', 'WIKIPEDIA', 'FIPS_10_', 'ISO_A2',
       'ISO_A3', 'ISO_A3_EH', 'ISO_N3', 'UN_A3', 'WB_A2', 'WB_A3', 'WOE_ID',
       'WOE_ID_EH', 'WOE_NOTE', 'ADM0_A3_IS', 'ADM0_A3_US', 'ADM0_A3_UN',
       'ADM0_A3_WB', 'CONTINENT', 'REGION_UN', 'SUBREGION', 'REGION_WB',
       'NAME_LEN', 'LONG_LEN', 'ABBREV_LEN', 'TINY', 'HOMEPART', 'MIN_ZOOM',
       'MIN_LABEL', 'MAX_LABEL', 'NE_ID', 'WIKIDATAID', 'NAME_AR', 'NAME_BN',
       'NAME_DE', 'NAME_

Unnamed: 0,ADMIN,ADM0_A3,CONTINENT,geometry
0,Fiji,FJI,Oceania,"MULTIPOLYGON (((180.00000 -16.06713, 180.00000..."
1,United Republic of Tanzania,TZA,Africa,"POLYGON ((33.90371 -0.95000, 34.07262 -1.05982..."
2,Western Sahara,SAH,Africa,"POLYGON ((-8.66559 27.65643, -8.66512 27.58948..."
3,Canada,CAN,North America,"MULTIPOLYGON (((-122.84000 49.00000, -122.9742..."
4,United States of America,USA,North America,"MULTIPOLYGON (((-122.84000 49.00000, -120.0000..."


In [13]:
# load geometric data directly in geopandas, but to many mismatches
# geo_data = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
# print(geo_data[geo_data["name"]=="France"])

In [14]:
# load gdp data (Download from world bank: https://data.worldbank.org/indicator/NY.GDP.MKTP.CD)
gdp_file = "API_NY.GDP.MKTP.CD_DS2_en_csv_v2_1429653.csv"
gdp_data = pd.read_csv(gdp_file, skiprows=[0,1,2,3])
# keep country name, code and gdp from 1980 to 2019 (2020 is NaN)
min_year = 1990
max_year = 2019
selected_years = [str(x) for  x in range(min_year, max_year+1)]
gdp_data = gdp_data[["Country Name", "Country Code"]+selected_years]
scale = 1e9  # convert unit to bilion dollars
gdp_data[selected_years] = gdp_data[selected_years]/scale
# replace NaN with 0
gdp_data = gdp_data.fillna(0)
gdp_data.head()

Unnamed: 0,Country Name,Country Code,1990,1991,1992,1993,1994,1995,1996,1997,...,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
0,Aruba,ABW,0.764887,0.872139,0.958463,1.08298,1.245688,1.320475,1.379961,1.531944,...,2.390503,2.549721,2.534637,2.701676,2.765363,2.919553,2.965922,3.056425,0.0,0.0
1,Afghanistan,AFG,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,15.856575,17.804293,20.001599,20.56107,20.484885,19.907111,19.362642,20.19176,19.484385,19.101354
2,Angola,AGO,11.228765,10.603785,8.307811,5.76872,4.438321,5.538749,7.526447,7.648377,...,83.799497,111.789686,128.052854,136.709863,145.7122,116.193649,101.123851,122.123822,101.353231,94.635416
3,Albania,ALB,2.028554,1.099559,0.652175,1.185315,1.880952,2.392765,3.199643,2.258516,...,11.926929,12.890765,12.31983,12.776217,13.228144,11.386846,11.861201,13.019693,15.147021,15.278077
4,Andorra,AND,1.029048,1.106929,1.210014,1.007026,1.017549,1.178739,1.223945,1.180597,...,3.449967,3.629204,3.188809,3.193704,3.271808,2.78987,2.896679,3.000181,3.218316,3.154058


In [15]:
# statistic gdp range to select optimal color map scope
for year in range(1990, 2020):
    percentiles = gdp_data[str(year)].quantile([0.1, 0.25, 0.5, 0.75, 0.9, 0.95])
    print("year {}: GDP percentile is {} for 10%, 25%, 50%, 75%, 90%, 95%".format(year, percentiles.tolist()))
    print("year {}: Max GDP: {}".format(year, max(gdp_data[str(year)])))
# based on the statistic result: we would choose the gdp range as [0, 4,000]

year 1990: GDP percentile is [0.0, 0.5495894869444446, 6.19313038292234, 118.17062467867902, 887.3018118893413, 3409.203168668488] for 10%, 25%, 50%, 75%, 90%, 95%
year 1990: Max GDP: 22626.3691233133
year 1991: GDP percentile is [0.0, 0.5807043611111112, 6.740031339584325, 117.93461328372152, 852.3871924549206, 3704.2366899133544] for 10%, 25%, 50%, 75%, 90%, 95%
year 1991: Max GDP: 23966.5563685986
year 1992: GDP percentile is [0.0, 0.618691018127978, 6.60756109250824, 119.17524642690049, 828.490253810686, 3761.369262108896] for 10%, 25%, 50%, 75%, 90%, 95%
year 1992: Max GDP: 25452.8806011177
year 1993: GDP percentile is [0.0, 0.9043129972193721, 6.635280124146055, 131.68278867310602, 884.6122779051802, 4006.70300512126] for 10%, 25%, 50%, 75%, 90%, 95%
year 1993: Max GDP: 25857.861952437303
year 1994: GDP percentile is [0.0, 0.9256204667015338, 6.909312782278285, 135.04491684904602, 1003.6349141414735, 4358.387080129313] for 10%, 25%, 50%, 75%, 90%, 95%
year 1994: Max GDP: 27770.70

In [16]:
# merge geometry data with gdp data (use gpd merge instead of pd due to the polygon class)
# geo_gdp_df = gpd.merge(gdp_data, geo_data, how="left", left_on=["Country Name", "Country Code"], right_on=["name", "iso_a3"])
# geo_gdp_df = geo_gdp_df.drop(columns=["name", "iso_a3"])
geo_gdp_df = geo_data.merge(gdp_data, how="left", left_on=["ADM0_A3"], right_on=["Country Code"])
geo_gdp_df = geo_gdp_df.drop(columns=["ADMIN", "ADM0_A3"])
# create new columns with name year and gdp
geo_gdp_df = geo_gdp_df.melt(id_vars=["Country Name", "Country Code", "geometry", "CONTINENT"], var_name="year", value_name="gdp")
new_columns = ["country", "code", "geometry", "continent", "year", "gdp"]
geo_gdp_df.columns = new_columns
geo_gdp_df = geo_gdp_df.dropna()
print(geo_gdp_df.head())

         country code                                           geometry  \
0           Fiji  FJI  MULTIPOLYGON (((180.00000 -16.06713, 180.00000...   
1       Tanzania  TZA  POLYGON ((33.90371 -0.95000, 34.07262 -1.05982...   
3         Canada  CAN  MULTIPOLYGON (((-122.84000 49.00000, -122.9742...   
4  United States  USA  MULTIPOLYGON (((-122.84000 49.00000, -120.0000...   
5     Kazakhstan  KAZ  POLYGON ((87.35997 49.21498, 86.59878 48.54918...   

       continent  year          gdp  
0        Oceania  1990     1.337025  
1         Africa  1990     4.258743  
3  North America  1990   593.929551  
4  North America  1990  5963.144000  
5           Asia  1990    26.932729  


In [18]:
from bokeh.io import output_notebook, show, output_file, curdoc
from bokeh.models import GeoJSONDataSource, ColumnDataSource, LinearColorMapper, ColorBar, tickers, Label, HoverTool, TapTool, PrintfTickFormatter, Slider, CustomJS, TextInput 
from bokeh.plotting import figure 
from bokeh.palettes import brewer, mpl, all_palettes
from bokeh.layouts import WidgetBox, column

def gdp_of_year(year):
    """Keep gdp data with given year"""
    df = geo_gdp_df
    select_df = df[df["year"]==str(year)]
    return json.dumps(json.loads(select_df.to_json()))

def gdp_above_thres(thres):
    """Keep gdp data above the given value"""
    df = geo_gdp_df 
    select_df = df[df["gdp"]>thres]
    return json.dumps(json.loads(select_df.to_json()))
    
# use turbo256 color palettes and construct color map
# print(list(all_palettes))
# print(mpl.keys())
palette = mpl["Viridis"][256][::-1] # the larger gdp, the deeper color
min_gdp=0
max_gdp=4000
# define linear color mapper
color_mapper = LinearColorMapper(low=min_gdp, high=max_gdp, palette=palette)
# define corresponding color bar
color_bar = ColorBar(color_mapper=color_mapper, major_label_text_font_size="12px",
                     ticker=tickers.AdaptiveTicker(desired_num_ticks=8),
                     formatter=PrintfTickFormatter(format="$ %d B"),
                     border_line_color=None,label_standoff=14,
                     location=(0, 0))
# load map for coloring
gdp_json = gdp_of_year(2019)
world_map = GeoJSONDataSource(geojson=gdp_json)
current_year = 2019
# initialize world map figure
p=figure(title="{} World GDP Map".format(current_year), plot_height=640, plot_width=960)
p.grid.grid_line_color=None
p.axis.axis_line_color=None
p.axis.major_tick_line_color=None

g = p.patches("xs", "ys", source=world_map, line_color="black", line_width=0.3, 
          fill_color={"field": "gdp", "transform": color_mapper})
p.add_layout(color_bar, "right")
# unrecognized countries or regions
unid = Label(x=-190, y=-55, text="* white part: unidentified countries", text_font_size="12px")
p.add_layout(unid)


In [19]:
# add hover function to display country name and gdp when mouse is put on specific country
hover = HoverTool(renderers=[g], tooltips=[("Country/Region", "@country"), ("GDP", "@gdp{(0,0.00)}")])
p.add_tools(hover)

In [20]:
# add slider function to change the year to display
slider = Slider(start=min_year, end=max_year, value=2019, step=1, title="YEAR", visible=True)
# callback function when changing the year on the slider
def slider_callback(attr, old, new):
    # renew data used for each year
    new_gdp = gdp_of_year(new)
    world_map.geojson = new_gdp
    p.title.text = "{} World GDP Map".format(new)
slider.on_change('value', slider_callback)

In [25]:
# add gdp threshold to visualize country above it
# texter = TextInput(value="0", title="Display country only above this GDP threshold (unit: $B)", width=100)
# def texter_callback(attr, old, new):
#     try:
#         new_gdp = gdp_above_thres(int(new)*1e9)
#         world_map.geojson = new_gdp 
#         p.title.text = "{} World GDP Map (only show those > ${}B))".format(slider.value, new)
#     except: 
#         print("Input is not a value!")
# texter.on_change('value', texter_callback)

In [26]:
# add click function to expand the trend of country's GDP in the years from 1990 to the selected year

# def select_country(country_code):
#     selected_country_gdp = geo_gdp_df[geo_gdp_df["code"]==country_code]["gdp"][:slider.value]
#     selected_country_year = [int(x) for x in geo_gdp_df[geo_gdp_df["code"]==country_code]["year"][:slider.value]]
#     return selected_country_gdp, selected_country_year

# def gdp_trend(attr, old, new):
#     selected_idx = world_map.selected.indices[0]
#     code = select_df.iloc[selected_idx]['code']
#     new_gdp, new_year = select_country(code)
#     gdp_source.data.x=new_year
#     gdp_source.data.y=new_gdp

def draw_country_gdp(country_code):
    """Get GDP line figure and corresponding gdp data with given country code"""
    # remove geometry data which cannnot be jsonized
    df = geo_gdp_df[["year", "code", "gdp", "country"]]
    g = figure(title="GDP trend from {} to {}".format(1990, slider.value), 
            plot_height=200, plot_width=400,
            x_range=(1990, 2020), border_fill_color='white', outline_line_color="grey", outline_line_alpha=0)
    g.background_fill_color = "white"
    g.xgrid.grid_line_color = "grey"
    g.xgrid.grid_line_alpha = 0.4
    g.ygrid.grid_line_color = "grey"
    g.ygrid.grid_line_alpha = 0.4
    g.axis.visible = True
    g.margin = (20, 30, 0, 0)
    # select data for a given country
    c_df = df[df["code"]==country_code]
    # null gdp value when initialization
    c_df["gdp"] = 0 
    gdp_source = ColumnDataSource(c_df)
    g.line('year', 'gdp', source=gdp_source, line_width=2, alpha=0.8, color="purple")
    cr = g.circle('year', 'gdp', source=gdp_source, size=10, hover_color="purple", hover_alpha=0.5) # hover setting
    circle_hover = HoverTool(tooltips=[("GDP", "@gdp{0,0.00}")], renderers=[cr])
    g.add_tools(circle_hover)
    return g, gdp_source

# all country gdp data
overall_data = ColumnDataSource(geo_gdp_df[["year", "code", "gdp", "country"]])
# get figure and gdp data of specific country
gdp_figure, gdp_source = draw_country_gdp("USA")  
# callback function for taptool
tap_country_callback = CustomJS(args=dict(overall=overall_data, curr=gdp_source), code="""
                var idx = cb_data.source.selected.indices[0];
                var country = cb_data.source.data.country[idx];
                var j=0;
                for (var i=0;i<=overall.data['gdp'].length;i++){
                    if (overall.data['country'][i] == country){
                        curr.data["gdp"][j] = overall.data["gdp"][i];
                        j = j+1;
                    }
                }
                graph.title.text = country.concat(" GDP trend from 1990 to 2019")
                graph.change.emit();
                curr.change.emit();
            """)

# taptool to select a country from the map
tap = TapTool(callback=tap_country_callback)
# assign graph to callback function
tap_country_callback.args["graph"] = gdp_figure
p.add_tools(tap)


In [27]:
# output iteractive map in notebook
output_notebook()
# stack slider, map figure, gdp figure 
layout = column(slider, p, gdp_figure)
curdoc().add_root(layout)
show(layout)

You are generating standalone HTML/JS output, but trying to use real Python
callbacks (i.e. with on_change or on_event). This combination cannot work.

Only JavaScript callbacks may be used with standalone output. For more
information on JavaScript callbacks with Bokeh, see:

    https://docs.bokeh.org/en/latest/docs/user_guide/interaction/callbacks.html

Alternatively, to use real Python callbacks, a Bokeh server application may
be used. For more information on building and running Bokeh applications, see:

    https://docs.bokeh.org/en/latest/docs/user_guide/server.html

