In [100]:
import pandas as pd
import ipywidgets as widgets
from plotnine import ggplot, geom_point, aes, geom_line
import matplotlib.pyplot as plt

In [19]:
url = "https://docs.google.com/spreadsheets/d/1jLNfP3iuteUJrH0zS9qWONskyKh9pFcl1hKSlgEc-I8/gviz/tq?tqx=out:csv&sheet=All+data"

In [20]:
all_data = pd.read_csv(url)
all_data.head()

Unnamed: 0,country,year,value,series_code,dimension,measure,welfare_concept,short_reference,long_reference,reference url,preferred_definition,description,source_codes_used,legend
0,Argentina,1953,40.0,S1,Overall Income Inequality,Gini coefficient,Household income,Altimir (1986),Altimir (1986) Cuadro 7,https://www.jstor.org/stable/3466844,,,,Gini coefficient - Household income (Altimir (...
1,Argentina,1959,44.7,S1,Overall Income Inequality,Gini coefficient,Household income,Altimir (1986),Altimir (1986) Cuadro 7,https://www.jstor.org/stable/3466844,,,,Gini coefficient - Household income (Altimir (...
2,Argentina,1961,41.9,S1,Overall Income Inequality,Gini coefficient,Household income,Altimir (1986),Altimir (1986) Cuadro 7,https://www.jstor.org/stable/3466844,,,,Gini coefficient - Household income (Altimir (...
3,Argentina,1972,35.3,S1,Overall Income Inequality,Gini coefficient,Household income,Altimir (1986),Altimir (1986) Cuadro 7,https://www.jstor.org/stable/3466844,,,,Gini coefficient - Household income (Altimir (...
4,Argentina,1980,37.6,S1,Overall Income Inequality,Gini coefficient,Household income,Altimir (1986),Altimir (1986) Cuadro 7,https://www.jstor.org/stable/3466844,,,,Gini coefficient - Household income (Altimir (...


In [21]:
# Create a string var that will be the legend in the source comparison plots

all_data['source_legend'] = all_data['short_reference'] + " - " + all_data['welfare_concept']

all_data.loc[all_data['series_code'].str[0] == "F", 'source_legend'] = "Chartbook series"

## Compare sources and final series

In [122]:

#Function to filter the data according to input widgets and print chart
def compare_source_chart(final_series):
    Country=countryW.value
    
    country_data = all_data.loc[(all_data['country']==Country)]
    sources_used = country_data.loc[(all_data['series_code']==final_series)].source_codes_used.unique().item().split(",")
    source_series_df = country_data.loc[all_data.series_code.isin(sources_used)]
    final_series_df = country_data.loc[all_data.series_code ==  final_series]
    
    p = (ggplot() 
         + geom_point(source_series_df, aes(x = "year", y = "value", color = "source_legend"))
         + geom_line(final_series_df, aes(x = "year", y = "value")))
    
    p.draw()
   
    
#Function to update the options shown in the 'final_series' widget based on selected country    
def select_source_code(Country):
    final_seriesW.options = all_data.loc[(all_data['country']==Country)& (all_data['series_code'].str.startswith('F'))].series_code.unique().tolist()

#Country widget
countryW = widgets.Dropdown(options = all_data.country.unique().tolist())

#Grab the selected country 
init = countryW.value

# Final series widget (options update based on selected country)
final_seriesW = widgets.Dropdown(options = all_data.loc[(all_data['country']==init) & (all_data['series_code'].str.startswith('F'))].series_code.unique().tolist())

# Initialise widgets (and chart)
j = widgets.interactive(compare_source_chart, final_series=final_seriesW)
i = widgets.interactive(select_source_code, Country=countryW)

# Diplay widgets and chart
print("Compare sources and final series (final series in black)")
display(i)
display(j)




Compare sources and final series (final series in black)


interactive(children=(Dropdown(description='Country', options=('Argentina', 'Australia', 'Brazil', 'Canada', '…

interactive(children=(Dropdown(description='final_series', options=('F1', 'F21', 'F22', 'F3'), value='F1'), Ou…

## Compare old and new final series

In [133]:
# Pull in the old chartbook data

url = "https://docs.google.com/spreadsheets/d/1jLNfP3iuteUJrH0zS9qWONskyKh9pFcl1hKSlgEc-I8/gviz/tq?tqx=out:csv&sheet=Old+final+series"

old_data = pd.read_csv(url)
old_data.head()


Unnamed: 0,country,year,dimension of inequality,measure of inequality,series,description,value
0,Argentina,1900,Poverty Measures,Poverty rate,1.0,Individuals below 50% median household per cap...,
1,Argentina,1900,Dispersion of Earnings,,,,
2,Argentina,1900,Top Income Shares,Top 1%,1.0,Share of top 1 per cent in gross income (indiv...,
3,Argentina,1900,Overall Income Inequality,Gini Coefficient,1.0,Household equivalised income,
4,Argentina,1900,Wealth Inequality,,,,


In [134]:
# Collapse metadata columns to aid selection

old_data = old_data.astype({"series": 'str'})

old_data['old_series_desc'] =  old_data['measure of inequality'] + " - " \
                             + old_data['description'] + " - " \
                             + old_data['series']

old_data.head()


Unnamed: 0,country,year,dimension of inequality,measure of inequality,series,description,value,old_series_desc
0,Argentina,1900,Poverty Measures,Poverty rate,1.0,Individuals below 50% median household per cap...,,Poverty rate - Individuals below 50% median ho...
1,Argentina,1900,Dispersion of Earnings,,,,,
2,Argentina,1900,Top Income Shares,Top 1%,1.0,Share of top 1 per cent in gross income (indiv...,,Top 1% - Share of top 1 per cent in gross inco...
3,Argentina,1900,Overall Income Inequality,Gini Coefficient,1.0,Household equivalised income,,Gini Coefficient - Household equivalised incom...
4,Argentina,1900,Wealth Inequality,,,,,


In [135]:

#Function to filter the data according to input widgets and print chart
def compare_chart(new_final_series_code, old_series_desc):
    Country=countryW.value
    
    new_df = all_data.loc[(all_data['country']==Country) & (all_data['series_code']==new_final_series_code)]
    
    old_df = old_data.loc[(old_data['country']==Country) & (old_data['old_series_desc']==old_series_desc)]
    
    p = (ggplot() 
         + geom_line(new_df, aes(x = "year", y = "value"))
         + geom_line(old_df, aes(x = "year", y = "value")))
    
    p.draw()
   
    
#Function to update the options shown in the 'final_series' widget based on selected country    
def update_widget_options(Country):
    new_final_seriesW.options = all_data.loc[(all_data['country']==Country)& (all_data['series_code'].str.startswith('F'))].series_code.unique().tolist()

#Country widget
countryW = widgets.Dropdown(options = all_data.country.unique().tolist())

#Grab the selected country 
init = countryW.value

# Final series widget (options update based on selected country)
new_final_seriesW = widgets.Dropdown(options = all_data.loc[(all_data['country']==init) & (all_data['series_code'].str.startswith('F'))].series_code.unique().tolist())

old_series_descW = widgets.Dropdown(options = old_data.loc[(old_data['country']==init)].old_series_desc.unique().tolist())


# Initialise widgets (and chart)
j = widgets.interactive(compare_chart, new_final_series_code=new_final_seriesW, old_series_desc=old_series_descW)
i = widgets.interactive(update_widget_options, Country=countryW)

# Diplay widgets and chart
print("Test")
display(i)
display(j)




Test


interactive(children=(Dropdown(description='Country', options=('Argentina', 'Australia', 'Brazil', 'Canada', '…

interactive(children=(Dropdown(description='new_final_series_code', options=('F1', 'F21', 'F22', 'F3'), value=…