In [1]:
import pandas as pd
import geopandas as gpd
# Reading csv files
shapefile = 'data/ne_110m_admin_0_countries/ne_110m_admin_0_countries.shp'

gdf = gpd.read_file(shapefile)[['ADMIN', 'ADM0_A3', 'geometry']]
gdf.columns = ['country', 'country_code', 'geometry']
gdf

Unnamed: 0,country,country_code,geometry
0,Fiji,FJI,"MULTIPOLYGON (((180.00000 -16.06713, 180.00000..."
1,United Republic of Tanzania,TZA,"POLYGON ((33.90371 -0.95000, 34.07262 -1.05982..."
2,Western Sahara,SAH,"POLYGON ((-8.66559 27.65643, -8.66512 27.58948..."
3,Canada,CAN,"MULTIPOLYGON (((-122.84000 49.00000, -122.9742..."
4,United States of America,USA,"MULTIPOLYGON (((-122.84000 49.00000, -120.0000..."
...,...,...,...
172,Republic of Serbia,SRB,"POLYGON ((18.82982 45.90887, 18.82984 45.90888..."
173,Montenegro,MNE,"POLYGON ((20.07070 42.58863, 19.80161 42.50009..."
174,Kosovo,KOS,"POLYGON ((20.59025 41.85541, 20.52295 42.21787..."
175,Trinidad and Tobago,TTO,"POLYGON ((-61.68000 10.76000, -61.10500 10.890..."


In [2]:
print(gdf[gdf['country'] == 'Antarctica'])
gdf.drop(gdf.index[159], inplace=True)

        country country_code  \
159  Antarctica          ATA   

                                              geometry  
159  MULTIPOLYGON (((-48.66062 -78.04702, -48.15140...  


In [3]:
from copy import deepcopy

df = pd.read_csv('data/Worldpop_by_year.csv', usecols=[0,1,2])
df2 = pd.read_csv('data/population-and-demography.csv', usecols=[0,1,2])
df3 = pd.read_csv('data/countries-table.csv', usecols=['country','pop2022','pop2023'])

# Data wrangling
df2.columns = ['country', 'year', 'population']

df['population'] = df['population'] / 1000000
df2['population'] = df2['population'] / 1000000
df3['pop2022'] = df3['pop2022'] / 1000000
df3['pop2023'] = df3['pop2023'] / 1000000

df3_copy = deepcopy(df3)
df

Unnamed: 0,country,year,population
0,Afghanistan,1800,3.28
1,Afghanistan,1801,3.28
2,Afghanistan,1802,3.28
3,Afghanistan,1803,3.28
4,Afghanistan,1804,3.28
...,...,...,...
38977,Zimbabwe,2014,15.40
38978,Zimbabwe,2015,15.80
38979,Zimbabwe,2016,16.20
38980,Zimbabwe,2017,16.50


In [4]:
# Manually changing display names of country 
gdf.loc[gdf['country'] == 'United States of America', 'country'] = 'United States'
gdf.loc[gdf['country'] == 'The Bahamas', 'country'] = 'Bahamas'
gdf.loc[gdf['country'] == 'Ivory Coast', 'country'] = 'Cote d\'Ivoire'
gdf.loc[gdf['country'] == 'Republic of Serbia', 'country'] = 'Serbia'
gdf.loc[gdf['country'] == 'Czechia', 'country'] = 'Czech Republic'
gdf.loc[gdf['country'] == 'United Republic of Tanzania', 'country'] = 'Tanzania'
gdf.loc[gdf['country'] == 'Somaliland', 'country'] = 'Somalia'

In [5]:
df.loc[df['country'] == 'Congo, Rep.', 'country'] = 'Republic of the Congo'
df.loc[df['country'] == 'Lao', 'country'] = 'Laos'
df.loc[df['country'] == 'Kyrgyz Republic', 'country'] = 'Kyrgyzstan'
df.loc[df['country'] == 'Congo, Dem. Rep.', 'country'] = 'Democratic Republic of the Congo'
df.loc[df['country'] == 'Timor-Leste', 'country'] = 'East Timor'
df.loc[df['country'] == 'Slovak Republic', 'country'] = 'Slovakia'
df.loc[df['country'] == 'Macedonia, FYR', 'country'] = 'North Macedonia'
df3.loc[df3['country'] == 'DR Congo', 'country'] = 'Democratic Republic of the Congo'

In [6]:
df3_clean = df3.melt(id_vars="country", value_vars=["pop2022", "pop2023"], var_name="year", value_name="population")
df3_clean.reset_index()
df3_clean['year'] = df3_clean.year.transform(lambda x: x[3:]).astype(int)
# df3_clean. = df3['year'].applymap(lambda x: x[3:])
df3_clean

Unnamed: 0,country,year,population
0,India,2022,1417.173173
1,China,2022,1425.887337
2,United States,2022,338.289857
3,Indonesia,2022,275.501339
4,Pakistan,2022,235.824862
...,...,...,...
463,Montserrat,2023,0.004386
464,Falkland Islands,2023,0.003791
465,Niue,2023,0.001935
466,Tokelau,2023,0.001893


In [7]:
# Merge all dataframes
df_country_pop = pd.concat([df, df2, df3_clean])
df_country_pop

Unnamed: 0,country,year,population
0,Afghanistan,1800,3.280000
1,Afghanistan,1801,3.280000
2,Afghanistan,1802,3.280000
3,Afghanistan,1803,3.280000
4,Afghanistan,1804,3.280000
...,...,...,...
463,Montserrat,2023,0.004386
464,Falkland Islands,2023,0.003791
465,Niue,2023,0.001935
466,Tokelau,2023,0.001893


In [8]:
import json
from bokeh.io import output_notebook, show, output_file, curdoc, output_notebook
from bokeh.plotting import figure, output_file, save
from bokeh.models import GeoJSONDataSource, LinearColorMapper, ColorBar, CustomJS, Slider, HoverTool
from bokeh.palettes import brewer
from bokeh.layouts import row, column
from bokeh.resources import CDN
from bokeh.embed import file_html

    
def json_data(yr):
    # Filter the population data
    df_pop_filtered = df_country_pop[df_country_pop["year"]==yr]
    
    merged = gdf.merge(df_pop_filtered, left_on = 'country', right_on = 'country', how = 'left')
    merged.fillna(0, inplace = True)
    
    merged_json = json.loads(merged.to_json())
    json_data = json.dumps(merged_json)
    return json_data


In [9]:
geosource = GeoJSONDataSource(geojson = json_data(2023))

color_mapper = LinearColorMapper(palette = brewer['YlGnBu'][6][::-1], low = 0, high = 200, nan_color = '#d9d9d9')

hover = HoverTool(tooltips = [ ('Country/region','@country'),('population(in millions)', '@population')])

color_bar = ColorBar(color_mapper=color_mapper, label_standoff=8, width = 500, height = 20,
                     border_line_color=None,location = (0,0), orientation = 'horizontal')

p = figure(title = 'World Population', height = 600 , width = 950, toolbar_location = None, tools = [hover])
p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None
p.xaxis.visible = False
p.yaxis.visible = False

p.patches('xs','ys', source = geosource,fill_color = {'field' :'population', 'transform' : color_mapper},
          line_color = 'black', line_width = 0.4, fill_alpha = 1)

p.add_layout(color_bar, 'below')

In [10]:
def update_plot(attr, old, new):
    yr = slider.value
    new_data = json_data(yr)
    geosource.geojson = new_data
    p.title.text = f'World Population ({yr})' 
    
slider = Slider(title = 'Year',start = 1800, end = 2023, step = 1, value = 2023)


slider.on_change('value', update_plot)

layout = column(p, slider)
curdoc().add_root(layout)
curdoc().theme = 'dark_minimal'

# output_notebook()

# show(layout)
html = file_html(layout, CDN)

You are generating standalone HTML/JS output, but trying to use real Python
callbacks (i.e. with on_change or on_event). This combination cannot work.

Only JavaScript callbacks may be used with standalone output. For more
information on JavaScript callbacks with Bokeh, see:

    https://docs.bokeh.org/en/latest/docs/user_guide/interaction/callbacks.html

Alternatively, to use real Python callbacks, a Bokeh server application may
be used. For more information on building and running Bokeh applications, see:

    https://docs.bokeh.org/en/latest/docs/user_guide/server.html



ValueError: OutputDocumentFor expects a sequence of Models

To see the interactivitiy of the plot please execute <bokeh serve --show choropleth.ipynb>