In [None]:
import pandas as pd
import altair as alt
import numpy as np

# Data is a little bit too big - disable max rows limit
alt.data_transformers.disable_max_rows()

drop = [
    "v2x_mpi", "v2x_polyarchy", "v2x_libdem", "v2x_partipdem", "v2x_delibdem", "v2x_egaldem"
]

component_names = {
    "v2x_polyarchy": "Electoral Democracy",
    "v2x_libdem": "Liberal Democracy",
    "v2x_partipdem": "Participatory Democracy",
    "v2x_delibdem": "Deliberative Democracy",
    "v2x_egaldem": "Egalitarian Democracy",
    "v2x_api": "Polyarchy",
    "v2x_mpi": "Polyarchy",
    "v2x_freexp_altinf": "Freedom of Expression",
    "v2x_frassoc_thick": "Freedom of Association",
    "v2x_suffr": "Share of Population with Suffrage",
    "v2xel_frefair": "Clean Elections",
    "v2x_elecoff": "Elected Officials",
    "v2x_liberal": "Liberal Component",
    "v2xcl_rol": "Equality Before the Law",
    "v2x_jucon": "Judicial Constraints",
    "v2xlg_legcon": "Legislative Constraints",
    "v2x_partip": "Participatory Component",
    "v2x_cspart": "Civil Society Participation",
    "v2xdd_dd": "Direct Popular Vote",
    "v2xel_locelec": "Local Government",
    "v2xel_regelec": "Regional Government",
    "v2xdl_delib": "Deliberative Component",
    "v2x_egal": "Egalitarian Component",
    "v2xeg_eqprotec": "Equal Protection",
    "v2xeg_eqaccess": "Equal Access",
    "v2xeg_eqdr": "Equal Distribution of Resources"
}

region_map = {
    1: "Western Europe",
    2: "Northern Europe",
    3: "Southern Europe",
    4: "Eastern Europe",
    5: "Northern Africa",
    6: "Western Africa",
    7: "Middle Africa",
    8: "Eastern Africa",
    9: "Southern Africa",
    10: "Western Asia",
    11: "Central Asia",
    12: "Eastern Asia",
    13: "South-Eastern Asia",
    14: "Southern Asia",
    15: "Oceania",
    16: "North America",
    17: "Central America",
    18: "South America",
    19: "Caribbean"
}

# Load and preprocess episode data
raw_episodes = pd.read_csv("../../../data/processed/episodes_long.csv")

episodes = raw_episodes[~raw_episodes["component"].isin(drop)].copy()

episodes["component"] = episodes["component"].map(component_names)
episodes["region"] = episodes["region"].map(region_map)

In [None]:
mean_lead = episodes.groupby("component")["lead"].mean().reset_index()

sorted_components = mean_lead.sort_values(by="lead", ascending=False)["component"].tolist()

by_component = alt.Chart(mean_lead).mark_circle(size=100).encode(
    y=alt.Y("component:N", title="Indicator", sort=sorted_components),
    x=alt.X("lead:Q", title="Mean Lead Time (years)")
)

In [None]:
hover = alt.selection_point(
    name="hover2",
    on="pointerover",
    fields=["lead_capped"],
    empty=False,
    clear="pointerout"
)

W = 10
heatmap_df = episodes.copy()
heatmap_df.dropna(subset=["lead"], inplace=True)
heatmap_df["lead_capped"] = heatmap_df["lead"].clip(lower=-W, upper=W).astype(int)
heatmap_data = heatmap_df.groupby(["component", "lead_capped"]).size().reset_index(name="count")

col_summary = heatmap_df.groupby("lead_capped").agg(n_episodes=("episode_id", "count"), mean_delta_abs=("delta_abs", "mean")).reset_index()

heatmap_data = heatmap_data.merge(col_summary, on="lead_capped", how="left")

heatmap = alt.Chart(heatmap_data).mark_rect().encode(
    x=alt.X("lead_capped:Q", bin=alt.Bin(maxbins=20), title="Lead Time (years)", scale=alt.Scale(domain=[-W, W])),
    y=alt.Y("component:N", title="Indicator", sort=sorted_components),
    color=alt.Color("sum(count):Q", scale=alt.Scale(scheme="blues", type="log"), title="Number of Episodes"),
    tooltip=[alt.Tooltip("lead_capped:Q", title="Lead Time (years)"), alt.Tooltip("sum(count):Q", title="Episodes in Cell"), alt.Tooltip("n_episodes:Q", title="Episodes in Column"), alt.Tooltip("mean_delta_abs:Q", title="Mean Absolute Change in Column")]
).add_params(hover)

highlight = alt.Chart(heatmap_data).mark_bar(fill="black", opacity=0.15).transform_calculate(
    lead_capped2="datum.lead_capped + 1"
).encode(
    x="lead_capped:Q",
    x2="lead_capped2:Q",
    y=alt.Y("component:N", title="Indicator", sort=sorted_components),
).transform_filter(hover)

heatmap = (heatmap + highlight)

In [None]:
magnitudes = raw_episodes[raw_episodes["component"] == "v2x_polyarchy"][["episode_id", "country_name", "delta_abs"]].rename(columns={"delta_abs": "magnitude"})
lead_times = episodes[["episode_id", "lead", "component"]]
scatter_df = lead_times.merge(magnitudes, on="episode_id").drop(columns=["episode_id"])

component_dropdown = alt.binding_select(options=sorted_components, name="Component: ")
component_select = alt.selection_point(fields=["component"], bind=component_dropdown, value=sorted_components[0])

magnitude_lead = alt.Chart(scatter_df).transform_filter(component_select).mark_circle(size=60, opacity=0.3).encode(
    x=alt.X("lead:Q", title="Lead Time (years)"),
    y=alt.Y("magnitude:Q", title="Episode Magnitude"),
    color=alt.Color("component:N", title="Indicator", legend=None)
).add_params(component_select)

In [None]:
dashboard = (by_component & heatmap & magnitude_lead).properties(
    title={
        "text": "Lead Times of Democratic Indicators",
        "subtitle": "Hover over a column in the heatmap for summary statistics; select an indicator in the scatter plot to filter by component."
    }
)

dashboard