In [1]:
import numpy as np
import pandas as pd
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import ColumnDataSource, GeoJSONDataSource, LinearColorMapper
from bokeh.transform import factor_cmap, factor_mark
from bokeh.models import CategoricalColorMapper, Legend
from bokeh.palettes import Category10

from colorcet import fire as color_palette

from data import merged_data

output_notebook()

In [10]:
df = merged_data(europe=False)
df = df.reset_index()
df_total = df[(df.level == 'total') & (df.gender == 'total') & (df.year == 2020)].copy()
df_total['education_spent'] = df_total.gdppc * df_total.education_expenditure_gdp_rate / 100

In [6]:
REGIONS = sorted(df.region.unique())
INCOME_GROUPS = sorted(df.income_group.unique())
MARKERS = ['hex', 'circle_x', 'triangle', 'diamond']

p = figure(height=400, width=800,
           title = 'Harmonized Learning outcome by GDPPC',
           x_axis_type='log')
p.xaxis.axis_label = 'log GDP per capita'
p.yaxis.axis_label = 'Harmonized Learning Outcome'
p.add_layout(Legend(), 'right')
p.legend.location = 'top_left'
p.legend.title = 'Regions'

p.scatter('gdppc', 'learning_outcome', source=df_total,
          color=factor_cmap('region', 'Category10_7', REGIONS),
          size=9,
          legend_group='region',
          fill_alpha=0.5)



show(p)

In [7]:
REGIONS = sorted(df.region.unique())
INCOME_GROUPS = sorted(df.income_group.unique())
MARKERS = ['hex', 'circle_x', 'triangle', 'diamond']

p = figure(height=400, width=800,
           title = 'Harmonized Learning outcome by ...',
           x_axis_type='log')
p.xaxis.axis_label = '...'
p.yaxis.axis_label = 'Harmonized Learning Outcome'
p.add_layout(Legend(), 'right')
p.legend.title = 'Income Group'

p.scatter('education_spent', 'learning_outcome', source=df_total,
          color=factor_cmap('income_group', 'Category10_4', INCOME_GROUPS),
          size=9,
          legend_group='income_group',
          fill_alpha=0.5)
show(p)

In [8]:
df_asdf = df_total.sort_values('income_group')
p = figure(
    x_range=df_asdf.country_code,
    height=400, width=800,
    title = 'Harmonized Learning outcome by GDPPC')

p.add_layout(Legend(), 'right')
p.legend.title = 'Income Group'

p.vbar(
    x='country_code', top='education_expenditure_gdp_rate', source=df_asdf,
    color=factor_cmap('income_group', 'Category10_4', INCOME_GROUPS),
    legend_group='income_group')

show(p)
           

In [None]:

chloropleth = figure(
    plot_width=500, 
    plot_height=400, 
    background_fill_color='#7dade0')
chloropleth.grid.visible = False
color_mapper = LinearColorMapper(palette=COLOR_PALETTE)

PATH = Path('./data/ne_110m_admin_0_countries/ne_110m_admin_0_countries.shp')
geo_data = gpd.read_file(PATH)
geo_data = geo_data[['SOV_A3', 'geometry']]
geo_data = geo_data.rename(columns={'SOV_A3': 'country_code'})
geo_data.set_index('country_code')
geo_data.head()