In [None]:
import pandas as pd
import altair as alt
import geopandas as gpd

# Data is a little bit too big - disable max rows limit
alt.data_transformers.disable_max_rows()

drop = [
    "v2x_mpi", "v2x_polyarchy", "v2x_libdem", "v2x_partipdem", "v2x_delibdem", "v2x_egaldem"
]

component_names = {
    "v2x_polyarchy": "Electoral Democracy",
    "v2x_libdem": "Liberal Democracy",
    "v2x_partipdem": "Participatory Democracy",
    "v2x_delibdem": "Deliberative Democracy",
    "v2x_egaldem": "Egalitarian Democracy",
    "v2x_api": "Polyarchy",
    "v2x_mpi": "Polyarchy",
    "v2x_freexp_altinf": "Freedom of Expression",
    "v2x_frassoc_thick": "Freedom of Association",
    "v2x_suffr": "Share of Population with Suffrage",
    "v2xel_frefair": "Clean Elections",
    "v2x_elecoff": "Elected Officials",
    "v2x_liberal": "Liberal Component",
    "v2xcl_rol": "Equality Before the Law",
    "v2x_jucon": "Judicial Constraints",
    "v2xlg_legcon": "Legislative Constraints",
    "v2x_partip": "Participatory Component",
    "v2x_cspart": "Civil Society Participation",
    "v2xdd_dd": "Direct Popular Vote",
    "v2xel_locelec": "Local Government",
    "v2xel_regelec": "Regional Government",
    "v2xdl_delib": "Deliberative Component",
    "v2x_egal": "Egalitarian Component",
    "v2xeg_eqprotec": "Equal Protection",
    "v2xeg_eqaccess": "Equal Access",
    "v2xeg_eqdr": "Equal Distribution of Resources"
}

# Load and preprocess episode data
episodes = pd.read_csv("../../../data/processed/episodes_long.csv")
episodes = episodes.rename(columns={"type": "episode_type"})

episodes = episodes[~episodes["component"].isin(drop)]

episodes["component"] = episodes["component"].map(component_names)

In [None]:
# Create parameters and selectors

type_selector = alt.binding_radio(options=["autocratization", "democratization", "both"], labels=["Autocratization", "Democratization", "Both"], name="Episode Type: ")
type_param = alt.param(name="type_param", bind=type_selector, value="both")
type_filter = (alt.datum.episode_type == type_param) | (type_param == "both")
type_filter_explicit = alt.datum.episode_type == type_param
component_select = alt.selection_point(
    name="component_param",
    fields=["component"],
    on="click",
    value="Freedom of Expression",
    clear=False
)
country_select = alt.selection_point(
    name="country_param",
    fields=["country_name"],
    on="click"
)

In [None]:
# Create dumbbell plot

means = episodes.groupby(["component", "episode_type", "country_name"]).agg(mean_delta_abs=("delta_abs", "mean"), mean_peak_yoy_abs=("peak_yoy_abs", "mean")).reset_index()

base = (
    alt.Chart(means)
    .transform_filter(type_filter)
    .transform_filter(country_select)
    .transform_aggregate(
        mean_delta_abs="mean(mean_delta_abs)",
        mean_peak_yoy_abs="mean(mean_peak_yoy_abs)",
        groupby=["component"]
    )
    .encode(
        y=alt.Y("component:N", title="Indicator", sort=alt.SortField("mean_delta_abs", order="descending")),
    )
)

delta = base.mark_circle(size=100, opacity=1).encode(
    x=alt.X("mean_delta_abs:Q", title="Mean Change per Event"),
    color=alt.condition(
        component_select,
        alt.value("red"),
        alt.value("lightgray")
    ),
)

peak_yoy = base.mark_circle(size=40, opacity=1).encode(
    x=alt.X("mean_peak_yoy_abs:Q", title="Mean Change per Event"),
    color=alt.condition(
        component_select,
        alt.value("steelblue"),
        alt.value("lightgray")
    ),
)

diff = (
    base
    .mark_rule(width=30, opacity=1)
    .encode(
        x=alt.X("mean_delta_abs:Q", title="Mean Change per Event"),
        x2=alt.X2("mean_peak_yoy_abs:Q"),
        color=alt.condition(component_select, alt.value("black"), alt.value("lightgray"))
    )
)

highlight = base.mark_rect(opacity=0.1, cursor='pointer').encode(
    color=alt.condition(component_select, alt.value("black"), alt.value("white"), empty=False)
)

legend_layer = alt.Chart(pd.DataFrame({})).mark_circle(size=100).encode(
    color=alt.Color(
        'metric:N',
        scale=alt.Scale(domain=['Total Change', 'Peak Year-over-Year Change'],
                        range=['red', 'steelblue']),
        title='Metric'
    ),
)

dumbbell_plot = (highlight + diff + peak_yoy + delta + legend_layer).add_params(type_param, component_select).properties(height=500, width=400)

In [None]:
# Create choropleth map

# Load country IDs, clean up names
country_ids = pd.read_csv('https://raw.githubusercontent.com/kemiolamudzengi/dsci-320-datasets/main/country-ids-and-continents.csv')
country_ids = country_ids.rename(columns={"Country": "country",
                                          "Continent": "continent"})
country_ids['country'] = country_ids['country'].replace({
    'Democratic Republic of Congo': 'Democratic Republic of the Congo',
    'United States': 'United States of America',
    "Cote d'Ivoire": "Ivory Coast",
    'Turkey': 'TÃ¼rkiye',
    "Korea, Democratic People's Republic of": 'North Korea',
    'Myanmar': 'Burma/Myanmar',
    'Congo': 'Republic of the Congo',
    'Macedonia, the former Yugoslav Republic of': 'North Macedonia',
    'Gambia': 'The Gambia',
    'Taiwan, Province of China':'Taiwan',
    'Swaziland': 'Eswatini'
})

# Compute mean deltas by country/component/episode type
country_means = episodes.groupby(["component", "episode_type", "country_name"]).agg(mean_delta_abs=("delta_abs", "mean")).reset_index()
spatialized_episodes = country_means[country_means["country_name"].isin(country_ids["country"])].merge(country_ids, left_on="country_name", right_on="country", how="left")
spatialized_episodes = spatialized_episodes.drop(columns=["country", "continent"])
spatialized_episodes = spatialized_episodes[spatialized_episodes["mean_delta_abs"].notna()]
both = (
    spatialized_episodes
    .groupby(["country_name", "component", "ID"])
    .agg(mean_delta_abs=("mean_delta_abs", "mean"))
    .assign(episode_type="both")
    .reset_index()
)
spatialized_episodes = pd.concat([spatialized_episodes, both], ignore_index=True)

# Load geometry data for world map
# This is a non-standard way of doing this, but it is not possible to create this map using transform_lookup as it only looks up one row
gdf = gpd.GeoDataFrame.from_file("../../../data/raw/world-110m.json", layer='countries')
gdf["id"] = gdf["id"].astype(int)
gdf = gdf.merge(spatialized_episodes, left_on="id", right_on="ID")
gdf = gdf.drop(columns=["ID"])

choropleth = (
    alt.Chart(gdf).mark_geoshape()
    .transform_filter(type_filter_explicit)
    .transform_filter(component_select)
    .encode(
        color=alt.Color("mean_delta_abs:Q", scale=alt.Scale(scheme="reds", domain=(0, 1)), title="Mean Total Change per Episode"),
        stroke=alt.condition(country_select, alt.value("black"), alt.value("white"), empty=False),
        strokeWidth=alt.condition(country_select, alt.value(1), alt.value(0.5), empty=False),
        tooltip=[
            alt.Tooltip("country_name:N", title="Country"), 
            alt.Tooltip("mean_delta_abs:Q", title="Mean Total Change per Episode", format=".3f")
        ]
    )
    .add_params(type_param, country_select)
    .project("equalEarth")
).properties(
    height=500,
    width=800
)

In [None]:
dashboard = alt.hconcat(dumbbell_plot, choropleth).resolve_legend(
    color="independent"
).properties(
    title={
        "text": "Magnitudes of Political Change By Country and Indicator",
        "subtitle": "Select an indicator to view its geographic distribution of change, or select a country to view its pattern of change across indicators."
    }
)

dashboard