In [2]:
import pandas as pd
import plotly.graph_objs as go
from ipywidgets import Dropdown, HBox, VBox, Output, Button, Layout

# =========================================================
# Styling
# =========================================================

GBR_COLORSCALE = [
    [0.00, "#e5f5e0"],
    [0.30, "#a1d99b"],
    [0.55, "#2b8cbe"],
    [0.80, "#253494"],
    [1.00, "#d73027"],
]

# =========================================================
# Text cleanup + canonicalization
# =========================================================

_INVIS = ["\u200b", "\u200c", "\u200d", "\ufeff", "\u2060", "\u00a0"]
_PUNCT_TO_SPACE = [".", ",", "'", '"', "'", "(", ")", "-", "–", "—"]

def clean_country_string(x):
    if not isinstance(x, str):
        return x
    s = x
    for ch in _INVIS:
        s = s.replace(ch, "")
    return s.strip()

def canon(x: str) -> str:
    if x is None:
        return ""
    s = clean_country_string(str(x)).lower()
    for ch in _PUNCT_TO_SPACE:
        s = s.replace(ch, " ")
    return " ".join(s.split())

# =========================================================
# Data helpers
# =========================================================

def require_columns(df: pd.DataFrame, cols: list[str], name: str):
    missing = [c for c in cols if c not in df.columns]
    if missing:
        raise ValueError(f"Missing required columns in {name}: {missing}")

def to_numeric_frame(df: pd.DataFrame, exclude: str = "Country") -> pd.DataFrame:
    out = df.copy()
    for c in out.columns:
        if c == exclude:
            continue
        out[c] = pd.to_numeric(out[c], errors="coerce").fillna(0.0)
    return out

def column_totals(df_num: pd.DataFrame, exclude: str = "Country") -> dict[str, float]:
    cols = [c for c in df_num.columns if c != exclude]
    return {c: float(df_num[c].sum()) for c in cols}

def hover_format(metric_label: str) -> str:
    if metric_label in ("WCI per capita", "WCI per GDP"):
        return ".2e"
    if metric_label.startswith("Respondents"):
        return ".0f"
    return ".4f"

# =========================================================
# App
# =========================================================

class WCIApp:
    def __init__(
        self,
        wci_path="data/df_wci_with_respondents.csv",
        acc_nat_path="data/accusations_nationality.csv",
        acc_res_path="data/accusations_residence.csv",
    ):
        # ----- Load -----
        self.df_wci = pd.read_csv(wci_path)
        self.df_wci["Country"] = self.df_wci["Country"].apply(clean_country_string)

        required_cols = [
            "Country", "ISO3",
            "WCI", "WCI_per_capita", "WCI_per_GDP",
            "respondents_nat", "respondents_res"
        ]
        require_columns(self.df_wci, required_cols, "df_wci")

        self.metric_to_col = {
            "WCI": "WCI",
            "WCI per capita": "WCI_per_capita",
            "WCI per GDP": "WCI_per_GDP",
            "Respondents (by nationality)": "respondents_nat",
            "Respondents (by residence)": "respondents_res",
        }

        self.acc_raw = {
            "By nationality": pd.read_csv(acc_nat_path),
            "By residence": pd.read_csv(acc_res_path),
        }

        for k, df in self.acc_raw.items():
            df["Country"] = df["Country"].apply(clean_country_string)
            df.columns = [clean_country_string(c) for c in df.columns]

        # ----- Precompute for speed -----
        self.acc_num = {k: to_numeric_frame(df) for k, df in self.acc_raw.items()}
        self.acc_row_lookup = {
            k: {canon(c): i for i, c in enumerate(df["Country"].tolist())}
            for k, df in self.acc_raw.items()
        }
        self.acc_totals = {k: column_totals(df) for k, df in self.acc_num.items()}

        # ----- State -----
        self.state = {"last_country": None}

        # ----- UI -----
        self.metric_dropdown = Dropdown(
            options=list(self.metric_to_col.keys()),
            value="WCI",
            description="Metric:",
            layout=Layout(width="360px"),
        )
        self.accuser_dropdown = Dropdown(
            options=["By nationality", "By residence"],
            value="By nationality",
            description="Attributions:",
            layout=Layout(width="240px"),
        )
        self.reset_button = Button(
            description="Reset",
            button_style="warning",
            layout=Layout(width="100px"),
        )

        self.info_out = Output()
        self.bar_out = Output()

        # ----- Figures -----
        self.map_widget = self._build_map(initial_metric="WCI")
        self.bar_widget = self._build_bar()

        with self.bar_out:
            self.bar_out.clear_output()
            display(self.bar_widget)

        # ----- Wire events -----
        self.metric_dropdown.observe(self._on_metric_change)
        self.accuser_dropdown.observe(self._on_accuser_mode_change)
        self.reset_button.on_click(self._on_reset)

        # bind click ONCE
        self.map_widget.data[0].on_click(self._on_map_click)

    # -------------------------
    # Figure construction
    # -------------------------

    def _metric_series(self, metric_label: str) -> pd.Series:
        col = self.metric_to_col[metric_label]
        return pd.to_numeric(self.df_wci[col], errors="coerce").fillna(0.0)

    def _build_map(self, initial_metric: str) -> go.FigureWidget:
        vals = self._metric_series(initial_metric)
        vmin, vmax = float(vals.min()), float(vals.max())
        if vmin == vmax:
            vmax = vmin + 1e-9

        chor = go.Choropleth(
            locations=self.df_wci["ISO3"],
            z=vals,
            text=self.df_wci["Country"],
            customdata=self.df_wci["Country"],
            colorscale=GBR_COLORSCALE,
            zmin=vmin,
            zmax=vmax,
            marker_line_color="black",
            marker_line_width=0.3,
            hovertemplate="<b>%{text}</b><br>" + initial_metric + ": %{z}<extra></extra>",
        )
        fig = go.FigureWidget(data=[chor])
        fig.update_geos(
            showcountries=True,
            countrycolor="black",
            projection_type="natural earth",
            bgcolor="#e6e8ff",
        )
        fig.update_layout(
            title="World Cybercrime Index - A collection of surveyed responses by cybercrime specialists",
            height=600,
            margin=dict(t=50, b=10, l=10, r=10),
        )
        return fig

    def _build_bar(self) -> go.FigureWidget:
        fig = go.FigureWidget()
        fig.add_trace(go.Bar(orientation="h", x=[], y=[]))
        fig.update_layout(
            title="Who attributes … ?",
            height=420,
            margin=dict(l=140, r=20, t=60, b=50),
            xaxis_title="Share of attributor's total attributions (%)",
        )
        return fig

    # -------------------------
    # Updates
    # -------------------------

    def update_map(self, metric_label: str):
        vals = self._metric_series(metric_label)
        vmin, vmax = float(vals.min()), float(vals.max())
        if vmin == vmax:
            vmax = vmin + 1e-9

        fmt = hover_format(metric_label)

        with self.map_widget.batch_update():
            tr = self.map_widget.data[0]
            tr.z = vals
            tr.zmin = vmin
            tr.zmax = vmax
            tr.colorscale = GBR_COLORSCALE
            tr.hovertemplate = (
                "<b>%{text}</b><br>"
                + metric_label
                + f": %{{z:{fmt}}}<extra></extra>"
            )
            self.map_widget.layout.title.text = (
                "World Cybercrime Index - A collection of surveyed responses by cybercrime specialists"
            )

    def _top_attributors_for(self, accused_country: str, mode: str, top_n: int = 10):
        df_num = self.acc_num[mode]
        row_lookup = self.acc_row_lookup[mode]
        totals = self.acc_totals[mode]

        key = canon(accused_country)
        i = row_lookup.get(key)
        if i is None:
            return []

        row = df_num.iloc[i]
        attributors = [c for c in df_num.columns if c != "Country"]

        out = []
        for attributor in attributors:
            count = float(row[attributor])
            denom = float(totals.get(attributor, 0.0))
            if count > 0 and denom > 0:
                out.append((attributor, count / denom, count, denom))

        out.sort(key=lambda t: t[1], reverse=True)
        return out[:top_n]

    def update_bar(self, accused_country: str):
        mode = self.accuser_dropdown.value
        items = self._top_attributors_for(accused_country, mode, top_n=10)

        with self.bar_widget.batch_update():
            if not items:
                self.bar_widget.data[0].x = [1.0]
                self.bar_widget.data[0].y = ["No data"]
                self.bar_widget.data[0].customdata = [(0.0, 0.0)]
                self.bar_widget.data[0].hovertemplate = "No attribution data<extra></extra>"
                self.bar_widget.data[0].marker = dict(color="#cccccc")
                self.bar_widget.layout.title = f"Who attributes {accused_country}? ({mode})"
                self.bar_widget.layout.xaxis.title = "Share of attributor's total attributions (%)"
                return

            names = [t[0] for t in items][::-1]
            shares = [t[1] * 100.0 for t in items][::-1]
            counts = [t[2] for t in items][::-1]
            denoms = [t[3] for t in items][::-1]

            tr = self.bar_widget.data[0]
            tr.x = shares
            tr.y = names
            tr.customdata = list(zip(counts, denoms))
            tr.marker = dict(
                color=shares,
                colorscale=GBR_COLORSCALE,
                reversescale=False,
                colorbar=dict(title="%", tickformat=".0f"),
            )
            tr.hovertemplate = (
                "%{y}<br>"
                "Share: %{x:.2f}%<br>"
                "Count: %{customdata[0]:.0f} of %{customdata[1]:.0f} attributor attributions"
                "<extra></extra>"
            )
            self.bar_widget.layout.title = f"Who attributes {accused_country}? ({mode})"
            self.bar_widget.layout.xaxis.title = "Share of attributor's total attributions (%)"

    def update_info(self, accused_country: str):
        metric_label = self.metric_dropdown.value
        col = self.metric_to_col[metric_label]
        s = self.df_wci.loc[self.df_wci["Country"] == accused_country, col]
        val = float(pd.to_numeric(s, errors="coerce").iloc[0]) if not s.empty else float("nan")

        fmt = hover_format(metric_label)

        with self.info_out:
            self.info_out.clear_output()
            print(f"=== {accused_country} ===")
            if fmt == ".2e":
                print(f"{metric_label}: {val:.2e}")
            elif fmt == ".0f":
                print(f"{metric_label}: {val:.0f}")
            else:
                print(f"{metric_label}: {val:.4f}")

    # -------------------------
    # Event handlers
    # -------------------------

    def _on_metric_change(self, change):
        if change.get("name") != "value":
            return
        self.update_map(change["new"])
        with self.info_out:
            self.info_out.clear_output()
        # bar stays until next click

    def _on_accuser_mode_change(self, change):
        if change.get("name") != "value":
            return
        last = self.state.get("last_country")
        if last:
            self.update_bar(last)

    def _on_map_click(self, trace, points, selector):
        if not points.point_inds:
            return
        idx = points.point_inds[0]
        accused = clean_country_string(trace.customdata[idx])
        self.state["last_country"] = accused
        self.update_info(accused)
        self.update_bar(accused)

    def _on_reset(self, _):
        self.metric_dropdown.value = "WCI"
        self.accuser_dropdown.value = "By nationality"
        self.state["last_country"] = None

        with self.info_out:
            self.info_out.clear_output()

        with self.bar_widget.batch_update():
            self.bar_widget.data[0].x = []
            self.bar_widget.data[0].y = []
            self.bar_widget.data[0].customdata = []
            self.bar_widget.layout.title = "Who attributes … ?"

    # -------------------------
    # Public: UI
    # -------------------------

    def ui(self):
        return VBox(
            [
                HBox([self.metric_dropdown, self.accuser_dropdown, self.reset_button]),
                self.map_widget,
                self.info_out,
                self.bar_out,
            ]
        )

# =========================================================
# Run
# =========================================================

app = WCIApp()
display(app.ui())


Message serialization failed with:
Out of range float values are not JSON compliant: nan
Supporting this message is deprecated in jupyter-client 7, please make sure your message is JSON-compliant



VBox(children=(HBox(children=(Dropdown(description='Metric:', layout=Layout(width='360px'), options=('WCI', 'W…