In [1]:
pip install altair vega_datasets

Note: you may need to restart the kernel to use updated packages.


In [2]:
pip install geopandas

Note: you may need to restart the kernel to use updated packages.


In [103]:
#Import required packages (pandas)
import pandas as pd
import altair as alt
#Load excel data as pandas DataFrame
bli_df = pd.read_excel('OECD_betterLifeIndex.xlsx')
#Rename variables
bli_df.rename(columns = {'Dwellings without basic facilities (Percentage)': 'Inadequate Housing',
                         'Household net adjusted disposable income (US Dollar)':'Household Income',
                         'Employment rate (Percentage)':'Employment Rate',
                         'Quality of support network (Percentage)':'Support Network Quality',
                         'Educational attainment (Percentage)':'Educational Attainment',
                         'Water quality (Percentage)':'Water Quality',
                         'Voter turnout (Percentage)':'Voter Turnout',
                         'Life expectancy (Years)':'Life Expectancy',
                         'Life satisfaction (Average Score)':'Life Satisfaction',
                         'Feeling safe walking alone at night (Percentage)':'Safety At Night',
                        }, inplace = True)

In [132]:
catSelection = alt.selection_multi(fields=['Country'])
catColor = alt.condition(catSelection,
    alt.Color('Country:N', scale=alt.Scale(scheme='turbo',type='ordinal'), legend=None), #hide the legend
    alt.value('lightgray')
    )
pcp = alt.Chart(bli_df).transform_window(
index='count()'
).transform_fold(
['Educational Attainment', 'Employment Rate', 'Inadequate Housing', 'Water Quality', 'Safety At Night', 'Support Network Quality', 'Voter Turnout']
).mark_line().encode(
x='key:N',
y='value:Q',
color = catColor,
detail='index:N',
opacity=alt.value(1)
).transform_filter(catSelection).properties(width=400,height=200)
countries = bli_df['Country'].unique()
legend1_values = countries[:len(countries)//2]
legend2_values = countries[len(countries)//2:]

data_legend1 = bli_df[bli_df['Country'].isin(legend1_values)]
legend1 = alt.Chart(data_legend1).mark_point().encode(
y=alt.Y('Country', axis=alt.Axis(orient='right')),
color=catColor,
detail='Country:N'
).add_selection(
catSelection
).properties(height=300)
data_legend2 = bli_df[bli_df['Country'].isin(legend2_values)]
legend2 = alt.Chart(data_legend2).mark_point().encode(
y=alt.Y('Country', axis=alt.Axis(orient='right',title=None)),
color=catColor,
detail='Country:N'
).add_selection(
catSelection
).properties(height=300)



In [133]:
scatter = alt.Chart(bli_df).mark_point().encode(
y='Life Satisfaction',
color= catColor,
detail='Country:N'
).properties(#control the size property of the plot
width=125,
height=125,
)
#Create a table layout of 8 plots, name the different plots for simplicity
eduPlot = scatter.encode(x='Educational Attainment')
employeeHoursPlot = scatter.encode(x='Employees Working Long') 
employmentPlot = scatter.encode(x='Employment Rate') 
housingPlot = scatter.encode(x='Inadequate Housing') 
safetyPlot = scatter.encode(x='Safety At Night') 
supportPlot = scatter.encode(x='Support Network Quality') 
voterPlot = scatter.encode(x='Voter Turnout') 
waterPlot = scatter.encode(x='Water Quality')
incomePlot = scatter.encode(x='Household Income')
#set up a nested layout with 3 rows (vertical)
#with 2,2,3 plots per row (horizontal), and give it a title

In [134]:
import altair as alt
import geopandas as gpd
import pandas as pd
from vega_datasets import data

#Source of the cartography background
url = "https://naciscdn.org/naturalearth/110m/cultural/ne_110m_admin_0_countries.zip"
countries_shape = gpd.read_file(url) # zipped shapefile
countries_shape = countries_shape[['NAME', 'CONTINENT', 'ISO_A3', 'geometry']]

sphere = alt.sphere()
graticule = alt.graticule()

basemap = alt.layer(
alt.Chart(sphere).mark_geoshape(fill='white'),
alt.Chart(graticule).mark_geoshape(stroke='LightGray', strokeWidth=0.5)
)
countries = alt.Chart(countries_shape).mark_geoshape(fill='Silver', stroke='white')
# Define a pointer selection
click_countries = alt.selection_point(fields=["NAME"])
colorScale = alt.Color("Household Income:Q", scale = alt.Scale(scheme='blues', type='ordinal'),
                       legend = alt.Legend(direction='vertical',orient='right',titleOrient='left'))
choropleth = (
alt.Chart(countries_shape)
.mark_geoshape()
.transform_lookup(
lookup='NAME',
from_=alt.LookupData(data=bli_df, key='Country', fields=['Household Income','Country'])
)
.encode(color=colorScale, opacity = alt.condition(click_countries,alt.value(1),alt.value(0)),
tooltip=['Country:N','Household Income:Q']
).interactive().add_params(click_countries)).properties(
title=alt.TitleParams(
text = 'Household Income Distribution Across OECD Countries',
    anchor='start',
    align='left')
)

base1 = (
alt.Chart(countries_shape)
.mark_bar()
.transform_lookup(
lookup='NAME',
from_=alt.LookupData(data=bli_df, key='Country', fields=['Household Income','Country'])
)
.encode(
x=alt.X("Country:N"),
y="Household Income:Q",
opacity=alt.condition(click_countries, alt.value(1), alt.value(0.2)),
color=colorScale,
)
.add_params(click_countries)
)

base2 = (
alt.Chart(countries_shape)
.mark_line()
.transform_lookup(
lookup='NAME',
from_=alt.LookupData(data=bli_df, key='Country', fields=['Household Income','Educational Attainment','Country'])
)
.encode(
x="Country:N",
y="Educational Attainment:Q",
opacity=alt.condition(click_countries, alt.value(1), alt.value(0.2)),
color=alt.value("red")
)
)
    
bars = (base1+base2).resolve_scale(y='independent').properties(width=600,height=180,title=alt.TitleParams(
text = 'Household Income vs Educational Attainment Across OECD Countries',
    anchor='start',
    align='left')
)

In [135]:
combinedChoro = (basemap + countries + choropleth).project("equalEarth").properties(width=600, height=400) & bars

combinedPcp = alt.vconcat(alt.hconcat(pcp,legend1,legend2),
title=alt.TitleParams(
text = 'Comparing Quality Of Life Variation Across OECD Countries',
    anchor='start',
    align='left'))

combinedBottom = alt.vconcat(
alt.hconcat(eduPlot,employmentPlot,housingPlot,waterPlot),
alt.hconcat(safetyPlot,supportPlot,voterPlot,incomePlot), 
title=alt.TitleParams(
text = 'Exploring Quality Of Life influence On Life Satisfaction Across OECD Countries',
    anchor='start',
    align='left'))

combinedLeft = alt.vconcat(alt.hconcat(combinedPcp), alt.hconcat(combinedBottom))
combinedFinal = alt.hconcat(combinedLeft,combinedChoro,spacing=-30)
combinedFinal.save("Data Visualisation Coursework HTML.html")