# Analytic Question 2
Which regions show the greatest internal variation in democracy? And how does this variation correlate with fertility rates?

### Data preparation 

In [None]:
# Load packages
import altair as alt
import pandas as pd
import numpy as np
from vega_datasets import data

# Load data 
geo_data = pd.read_csv("../../../data/processed/geo_data.csv")

# Load Country ID mapping 
country_ids = pd.read_csv('https://raw.githubusercontent.com/kemiolamudzengi/dsci-320-datasets/main/country-ids-and-continents.csv')
country_ids = country_ids.rename(columns={"Country": "country",
                                          "Continent": "continent"})

# Changing country names to match the names in geo_data (missing 16 countries)
country_ids['country'] = country_ids['country'].replace({
    'Democratic Republic of Congo': 'Democratic Republic of the Congo',
    'United States': 'United States of America',
    "Cote d'Ivoire": "Ivory Coast",
    'Turkey': 'TÃ¼rkiye',
    "Korea, Democratic People's Republic of": 'North Korea',
    'Myanmar': 'Burma/Myanmar',
    'Congo': 'Republic of the Congo',
    'Macedonia, the former Yugoslav Republic of': 'North Macedonia',
    'Gambia': 'The Gambia',
    'Taiwan, Province of China':'Taiwan',
    'Swaziland': 'Eswatini'
})

relevant_country_names = country_ids["country"]

# Merge Country IDs to geo_data and create view II dataframe, df2.
df2 = geo_data.query(  
    'country in @relevant_country_names'
    #  only countries that have IDs
).merge(  # Add the ID column by joining with country_ids
    country_ids,  # Automatically joins on 'country' column
)

df2.sample(10)

# Creating parameters


In [None]:
# Democracy indices
indices = [
    "mean_democracy",
    "electoral_democracy",
    "liberal_democracy",
    "participatory_democracy",
    "deliberative_democracy",
    "egalitarian_democracy"
]

# Pretty labels for dropdown
index_labels = {
    "mean_democracy": "Overall Democracy (mean)",
    "electoral_democracy": "Electoral Democracy",
    "liberal_democracy": "Liberal Democracy",
    "participatory_democracy": "Participatory Democracy",
    "deliberative_democracy": "Deliberative Democracy",
    "egalitarian_democracy": "Egalitarian Democracy"
}

# Creating dropdown menu for Democracy Index 
dropdown = alt.binding_select(
    options=list(index_labels.values()),
    name="Select index: "
)

index_param = alt.param(
    name="selected_label",
    bind=dropdown,
    value=list(index_labels.values())[0]
)

# Creating continent selection 
continent_select = alt.selection_point(
    fields=["continent"],
    toggle=True, 
    empty="all"     # if nothing is selected, show everything normally
)

# Creating checkbox parameter
show_points = alt.param(
    name="Show_Points",
    value=True,
    bind=alt.binding_checkbox(name="Show Points"))

# Creating color scheme 
color_scale = alt.Scale(scheme="blues", domain=[0,1])

# Customizing non-blue colors
nonblue_colors = [
    "#1b9e77",  
    "#d95f02",  
    "#7570b3",  
    "#e7298a",  
    "#66a61e",
    "#e6ab02"
]

# Create a color scale that maps each continent to a color in nonblue_colors
continents_sorted = sorted(df2['continent'].unique())
continent_color_scale = alt.Scale(
    domain=continents_sorted,  # e.g., ['Africa', 'Asia', 'Europe', ...]
    range=nonblue_colors
)

# Visualization 1: Boxplot of Regional Variance

In [None]:
# Layer 1: Base boxplot 
boxplot = (
    alt.Chart(df2[df2['year']==2024] # Filtering data to year==2024
    ).transform_calculate(
        selected_score=(
            "selected_label === 'Overall Democracy (mean)' ? datum.mean_democracy : "
            "selected_label === 'Electoral Democracy' ? datum.electoral_democracy : "
            "selected_label === 'Liberal Democracy' ? datum.liberal_democracy : "
            "selected_label === 'Participatory Democracy' ? datum.participatory_democracy : "
            "selected_label === 'Deliberative Democracy' ? datum.deliberative_democracy : "
            "datum.egalitarian_democracy"
        )
    ).mark_boxplot(
        size=30,
        color='lightgrey',
        median={"color" :'lightgrey'}
    ).encode(
        y=alt.Y("continent:N", title = None, axis=alt.Axis(
            domain=False,   # remove the axis line
            ticks=False     # remove ticks
        )),   # Horizontal grouping
        x=alt.X("selected_score:Q", title="Democracy Score", scale=alt.Scale(domain=(0, 1)))        
    ).properties(
        width=300,
        height=400,
        title="Distribution of Democracy Scores by Continent (2024)"
    )
)


# Layer 2: Points for individual countries
points = (
    alt.Chart(df2[df2['year']==2024]        # Filtering data to year==2024
    ).transform_calculate(
        selected_score=(
            "selected_label === 'Overall Democracy (mean)' ? datum.mean_democracy : "
            "selected_label === 'Electoral Democracy' ? datum.electoral_democracy : "
            "selected_label === 'Liberal Democracy' ? datum.liberal_democracy : "
            "selected_label === 'Participatory Democracy' ? datum.participatory_democracy : "
            "selected_label === 'Deliberative Democracy' ? datum.deliberative_democracy : "
            "datum.egalitarian_democracy"
        )
    ).mark_circle(
        size=100,
        color='red'
    ).encode(
        y=alt.Y("continent:N", title = 'Continent'),
        x=alt.X("selected_score:Q"),
        color=alt.Color("continent:N", title="Continent", scale=continent_color_scale, legend=None),
        tooltip=[
            alt.Tooltip('country:N', title = 'Country'),
            alt.Tooltip('selected_score:Q', title= 'Score', format= '.3f')
            ],
        opacity=alt.condition(
            show_points,
            alt.value(0.70),
            alt.value(0)))
)

# Layer 3: Global mean  
global_mean = (
    alt.Chart(df2[df2['year']==2024]        # Filtering data to year==2024
    ).transform_calculate(
        selected_score=(
            "selected_label === 'Overall Democracy (mean)' ? datum.mean_democracy : "
            "selected_label === 'Electoral Democracy' ? datum.electoral_democracy : "
            "selected_label === 'Liberal Democracy' ? datum.liberal_democracy : "
            "selected_label === 'Participatory Democracy' ? datum.participatory_democracy : "
            "selected_label === 'Deliberative Democracy' ? datum.deliberative_democracy : "
            "datum.egalitarian_democracy"
        )
    ).transform_aggregate(
        mean_score='mean(selected_score)'
    ).mark_rule(
        color='black',
        size=2
    ).encode(
        x='mean_score:Q',
        tooltip=[alt.Tooltip('mean_score:Q', title='Global Mean', format='.3f')])
)


# Layer 4: Continent-level mean as vertical lines spanning box height
continent_mean_tick = (
    alt.Chart(df2[df2['year'] == 2024])     # Filtering data to year==2024
    .transform_calculate(
        selected_score=(
            "selected_label === 'Overall Democracy (mean)' ? datum.mean_democracy : "
            "selected_label === 'Electoral Democracy' ? datum.electoral_democracy : "
            "selected_label === 'Participatory Democracy' ? datum.participatory_democracy : "
            "selected_label === 'Deliberative Democracy' ? datum.deliberative_democracy : "
            "datum.egalitarian_democracy"
        )
    ).transform_aggregate(
        mean_score="mean(selected_score)",
        groupby=["continent"]
    ).mark_tick(
        color="black",
        thickness=2,  # matches global mean thickness
        size=30,      # height of the tick to fit the box
        orient="vertical"
    ).encode(
        x="mean_score:Q",
        y=alt.Y("continent:N"),
        tooltip=[
            alt.Tooltip('continent:N', title='Continent'),
            alt.Tooltip('mean_score:Q', title='Mean', format='.3f')])
)


# Layering them
boxplot_points = alt.layer(boxplot, points, global_mean, continent_mean_tick
    ).add_params(index_param, show_points
    ).properties(
        width=300,
        height=600,
        title={
    "text": "Regional Democracy Variation",
    "subtitle": [
        "Use the checkbox to show/hide individual countries.",
        "Black lines indicate means."]})

boxplot_points

# Visualization 2: Choropleth map

In [None]:
alt.data_transformers.enable("vegafusion")

# Load world country boundaries from built-in TopoJSON file
world_map = alt.topo_feature(data.world_110m.url, 'countries')

# Create the choropleth map
base_map = alt.Chart(world_map).mark_geoshape()

# Join the data + encode
base_map = base_map.transform_lookup(
        # Join map data with democracy data using country IDs
        lookup='id',                    
        from_=alt.LookupData(df2[df2['year']==2024], 'ID', ['country', 'continent'] + indices) # Filtering data to year==2024
    ).transform_calculate(
        selected_score=(
            "selected_label === 'Overall Democracy (mean)' ? datum.mean_democracy : "
            "selected_label === 'Electoral Democracy' ? datum.electoral_democracy : "
            "selected_label === 'Liberal Democracy' ? datum.liberal_democracy : "
            "selected_label === 'Participatory Democracy' ? datum.participatory_democracy : "
            "selected_label === 'Deliberative Democracy' ? datum.deliberative_democracy : "
            "datum.egalitarian_democracy")
    ).encode(
        detail='continent:N',
        opacity=alt.Opacity(
            'continent:N', 
            legend=None),
        tooltip=[
            alt.Tooltip('continent:N', title='Continent'),
            alt.Tooltip('country:N', title='Country'),
            alt.Tooltip('selected_score:Q', title='Score', format='.3f')],
        color=alt.condition(
            continent_select, 
            alt.Color(
                'selected_score:Q',
                title="Democracy Score",
                scale=color_scale,
                legend=alt.Legend(
                    orient='right',
                    direction='vertical',
                    titleFontWeight='bold',
                    gradientLength=200,   # height of the color bar
                    gradientThickness=12, # width of the color bar  
                    padding=10)),
            alt.value("lightgray"))    # color for unselected continents
    ).project(
        'equalEarth'
    ).properties(
        width=500,                      
        height=300,
        title={
            "text": "Democracy Scores by Country",
            "subtitle": "Click a continent to explore country-level democracy and its relationship with fertility rate"}
    ).add_params(
        index_param,
        continent_select)

# Borders layer: only for selected continent
borders = alt.Chart(world_map).mark_geoshape(
        fill=None,
        strokeWidth=1.0
    ).transform_lookup(
        lookup='id',
        from_=alt.LookupData(df2[df2['year']==2024], 'ID', ['country', 'continent'])
    ).encode(
        stroke=alt.condition(
            continent_select,             # only apply color when a continent is selected
            alt.Color('continent:N',
                scale=continent_color_scale,
                legend=None),
            alt.value(None)),               # no stroke when nothing is selected
        opacity=alt.condition(
            continent_select, 
            alt.value(1), 
            alt.value(0)))

# Combine base and borders
democracy_map = alt.layer(base_map, borders).properties(
    width=500,
    height=300,
    title={
        "text": "Democracy Scores by Country",
        "subtitle": "Click a continent to explore country-level democracy and its relationship with fertility rate."}
).add_params(index_param, continent_select)

democracy_map


# Visualization 3: Scatterplot of Democracy vs Fertility Rate

In [None]:
scatter = (
    alt.Chart(df2[df2['year'] == 2024])         # Filtering data to year==2024
    .transform_calculate(
        selected_score=(
            "selected_label === 'Overall Democracy (mean)' ? datum.mean_democracy : "
            "selected_label === 'Electoral Democracy' ? datum.electoral_democracy : "
            "selected_label === 'Liberal Democracy' ? datum.liberal_democracy : "
            "selected_label === 'Participatory Democracy' ? datum.participatory_democracy : "
            "selected_label === 'Deliberative Democracy' ? datum.deliberative_democracy : "
            "datum.egalitarian_democracy")
    ).mark_circle(
        size=100
    ).encode(
        x=alt.X(
            "e_miferrat:Q", 
            title="Fertility Rate", 
            scale=alt.Scale(zero=False)),
        y=alt.Y(
            "selected_score:Q", 
            title="Democracy Score", 
            scale=alt.Scale(domain=(0, 1)),
            axis=alt.Axis(
                values=[i/5 for i in range(0, 11)],
                grid=True,
                gridColor='lightgray',
                gridWidth=1)),
        color=alt.Color(
            "continent:N", 
            title="Continent", 
            scale=continent_color_scale),
        opacity=alt.condition(
            continent_select,
            alt.value(0.9),
            alt.value(0.2)),
        tooltip=[
            alt.Tooltip("country:N", title="Country"),
            alt.Tooltip("continent:N", title="Continent"),
            alt.Tooltip("e_miferrat:Q", title="Fertility Rate", format=".3f"),
            alt.Tooltip("mean_democracy:Q", title="Democracy Score", format=".3f")]
    ).properties(
        width=450, 
        height=250,
        title={
            "text": "Fertility Rate vs Democracy Score by Country",
            "subtitle": "Click a continent on the map to highlight its countries (and vice versa)."}
    ).add_params(
        index_param, 
        continent_select)
)



# View II

In [None]:
# Enable large dataset support
alt.data_transformers.enable("vegafusion")

spacer = alt.Chart(pd.DataFrame({'x':[0]})).mark_text(text='').properties(width=30)


dashboard = alt.hconcat(
        boxplot_points,
        alt.vconcat(
            alt.hconcat(spacer,democracy_map),
            alt.hconcat(spacer, scatter))
    ).properties(
        title={
            "text": "Democracy Variation and Fertility Patterns in 2024",
            "subtitle": ["Select a democracy index from the dropdown menu."],
            "anchor": "middle",   # centers title
            "font": "Helvetica",
            "fontSize": 28,
            "fontWeight": "bold",
            "subtitleFont": "Helvetica",
            "subtitleFontSize": 16,
            "subtitleColor": "gray",
            "offset": 30},         # spacing between title + visualization
        padding={"right": 10, "left": 10, "top": 10, "bottom": 10}
    ).configure_legend(
        labelFont='Helvetica',
        titleFont='Helvetica',
        orient='right',
        titleFontSize=12,
        labelFontSize=12
    ).configure_title(
        font='Helvetica',
        fontSize=16,
        anchor='start',     # 'start' = left, 'middle' = center, 'end' = right
        color='black'
    ).configure_axis(
        labelFont='Helvetica',
        titleFont='Helvetica'
    ).configure_view(
        stroke=None
    )

dashboard
