# WCI (World Cybercrime Index) — Exact Reproduction per Spec
This notebook rebuilds the per-country WCI exactly as specified:

1) Build a long-format nominations table (one row = one nomination) with columns:
   `ResponseID, Country, CrimeType, Impact, Professionalism, TechSkill`.

2) For each (Country, CrimeType):
   - `NomScore = (Impact + Professionalism + TechSkill) / 3` per nomination.
   - Let `n` = number of nominations for that pair.
   - `CountryScore_type = mean(NomScore)` across the `n` nominations.
   - Store `nominations_type = n`.

3) Per-type WCI (Eq. 2): `WCI_type = CountryScore_type * (nominations_type / 92) * 10`.
   Output columns: `Tech, Attacks, Data, Scams, Cash`.

4) Average raw type score (not WCI_type):
   `AvgTypeScore = (CountryScore_Tech + CountryScore_Attacks + CountryScore_Data + CountryScore_Scams + CountryScore_Cash) / 5`
   Missing types contribute 0.

5) Total nominations across all types:
   `TotalNominations = nominations_Tech + nominations_Attacks + nominations_Data + nominations_Scams + nominations_Cash`.
   Max possible = 92 respondents × 5 types = 460.

6) Overall WCI (Eq. 3): `WCI_overall = AvgTypeScore * (TotalNominations / 460) * 10`.

7) Overall I, P, TS: means of the three scores across all nominations of the country (ignoring types).

8) Final output (sorted by `WCI Score` desc):
   `Rank, Country, I, P, TS, WCI Score, Tech, Attacks, Data, Scams, Cash`.


In [None]:
import pandas as pd
from pathlib import Path

DATA_DIR = Path('data')
RAW_PATH = DATA_DIR / 'wci_data.csv'
OUT_PATH = DATA_DIR / 'WCI_recacl.csv'

df = pd.read_csv(RAW_PATH)
try:
    from IPython.display import display  # type: ignore
except Exception:
    display = lambda x: print(x.head() if hasattr(x, 'head') else x)

print('Loaded:', RAW_PATH, 'shape=', df.shape)
print('Unique ResponseID:', df['ResponseID'].nunique())


## 1) Long-format nominations table
We convert each of the 5 blocks (Technical, Attack, Data, Scams, Cash) and positions 1..5 into a long table.


In [None]:
type_map = [
    ('Tech', 'Technical'),   # output label Tech
    ('Attacks', 'Attack'),   # output label Attacks (note: source columns use singular Attack)
    ('Data', 'Data'),
    ('Scams', 'Scams'),
    ('Cash', 'Cash'),
]

records = []
for _, row in df.iterrows():
    rid = row.get('ResponseID')
    for out_label, prefix in type_map:
        for pos in range(1, 6):
            country = row.get(f'{prefix}{pos}')
            if pd.isna(country):
                continue
            country_str = str(country).strip()
            if country_str == '' or country_str == '--':
                continue
            impact = pd.to_numeric(row.get(f'{prefix}{pos}_impact'), errors='coerce')
            prof = pd.to_numeric(row.get(f'{prefix}{pos}_professional'), errors='coerce')
            tech = pd.to_numeric(row.get(f'{prefix}{pos}_techskill'), errors='coerce')
            # Keep the row even if some of the three are NaN; they will be averaged from available values
            records.append({
                'ResponseID': rid,
                'Country': country_str,
                'CrimeType': out_label,  # exact output names
                'Impact': impact,
                'Professionalism': prof,
                'TechSkill': tech,
            })

long_df = pd.DataFrame.from_records(records)

# Drop any residual empty countries (safety)
long_df = long_df.dropna(subset=['Country'])
long_df = long_df[long_df['Country'].astype(str).str.strip().isin(['', '--']) == False].copy()

print('Long table shape:', long_df.shape)
display(long_df.head())


## 2) CountryScore_type and nominations_type


In [None]:
# Nomination-level average (per row)
# Drop nominations with missing scores exactly as the authors did
long_df = long_df.dropna(subset=['Impact', 'Professionalism', 'TechSkill']).copy()
long_df['NomScore'] = long_df[['Impact', 'Professionalism', 'TechSkill']].mean(axis=1)

grp = long_df.groupby(['Country', 'CrimeType'])
agg = grp['NomScore'].agg(['mean', 'count']).reset_index()
agg = agg.rename(columns={
    'mean': 'CountryScore_type',
    'count': 'nominations_type',
})

display(agg.head())


## 3) Per-type WCI (Eq. 2)
We use the published denominator 92 (max nominations per type).


In [None]:
N_RESPONDENTS_PUBLISHED = 92
agg['WCI_type'] = agg['CountryScore_type'] * (agg['nominations_type'] / N_RESPONDENTS_PUBLISHED) * 10.0

# Pivot to wide per-type columns with required names
per_type = agg.pivot_table(
    index='Country',
    columns='CrimeType',
    values=['CountryScore_type', 'nominations_type', 'WCI_type'],
    aggfunc='first'
)

# Flatten columns
per_type.columns = [f"{a}__{b}" for a, b in per_type.columns]
per_type = per_type.reset_index()

# Ensure all five type columns exist; fill missing with 0
for t in ['Tech', 'Attacks', 'Data', 'Scams', 'Cash']:
    for base in ['CountryScore_type', 'nominations_type', 'WCI_type']:
        col = f'{base}__{t}'
        if col not in per_type.columns:
            per_type[col] = 0.0


## 4) Average Raw Type Score (missing types contribute 0)


In [None]:
# Collect all raw scores
raw_cols = [
    "CountryScore_type__Tech",
    "CountryScore_type__Attacks",
    "CountryScore_type__Data",
    "CountryScore_type__Scams",
    "CountryScore_type__Cash"
]

nom_cols = [
    "nominations_type__Tech",
    "nominations_type__Attacks",
    "nominations_type__Data",
    "nominations_type__Scams",
    "nominations_type__Cash"
]

# Replace zeros with NaN for averaging (R excludes missing types)
tmp = per_type[raw_cols].replace({0: pd.NA})

# Compute unweighted mean of non-missing types
per_type["AvgTypeScore"] = tmp.mean(axis=1, skipna=True)

## 5) Total nominations across all types (max 460)


In [None]:
per_type['TotalNominations'] = (
    per_type['nominations_type__Tech'] +
    per_type['nominations_type__Attacks'] +
    per_type['nominations_type__Data'] +
    per_type['nominations_type__Scams'] +
    per_type['nominations_type__Cash']
)

print('Max theoretical TotalNominations = 460 (92*5). Observed max =', int(per_type['TotalNominations'].max()))


## 6) Overall WCI Score (Eq. 3)


In [None]:
# 6) Overall WCI Score (as in the published table)

per_type['WCI Score'] = (
    per_type['WCI_type__Tech']
    + per_type['WCI_type__Attacks']
    + per_type['WCI_type__Data']
    + per_type['WCI_type__Scams']
    + per_type['WCI_type__Cash']
) / 5.0


## 7) Overall I, P, TS (unweighted across all nominations)


In [None]:
ipt = long_df.groupby('Country')[['Impact', 'Professionalism', 'TechSkill']].mean().reset_index()
ipt = ipt.rename(columns={'Impact': 'I', 'Professionalism': 'P', 'TechSkill': 'TS'})


## 8) Final output table


In [None]:
final = per_type.merge(ipt, on='Country', how='left')

# Add the five per-type WCI columns with required names
final['Tech'] = per_type['WCI_type__Tech']
final['Attacks'] = per_type['WCI_type__Attacks']
final['Data'] = per_type['WCI_type__Data']
final['Scams'] = per_type['WCI_type__Scams']
final['Cash'] = per_type['WCI_type__Cash']

cols_order = ['Country', 'I', 'P', 'TS', 'WCI Score', 'Tech', 'Attacks', 'Data', 'Scams', 'Cash']
final = final[cols_order].copy()

# Sort and rank
final = final.sort_values('WCI Score', ascending=False).reset_index(drop=True)
final.insert(0, 'Rank', final.index + 1)

display(final.head(15))


## Save to CSV


In [None]:
OUT_PATH.parent.mkdir(parents=True, exist_ok=True)
final.to_csv(OUT_PATH, index=False)
print('Wrote', OUT_PATH.resolve())


In [None]:
import pandas as pd
from pathlib import Path

# ============================================================
# LOAD RAW WCI SURVEY DATA
# ============================================================
DATA_DIR = Path("data")
RAW_PATH = DATA_DIR / "wci_data.csv"

df = pd.read_csv(RAW_PATH).fillna("")
print("Loaded:", RAW_PATH, "shape:", df.shape)

# ============================================================
# BUILD LONG TABLE (MATCHING R's LOGIC EXACTLY)
# ============================================================
type_map = [
    ("Tech",    "Technical"),
    ("Attacks", "Attack"),
    ("Data",    "Data"),
    ("Scams",   "Scams"),
    ("Cash",    "Cash"),
]

records = []

def as_numeric_r(x):
    """Mimic R's as.numeric(): convert '' or non-numeric to NA."""
    try:
        return float(x)
    except:
        return None

for _, row in df.iterrows():
    rid = row["ResponseID"]

    for out_label, prefix in type_map:
        for pos in range(1, 6):

            # Extract nominated country
            country = str(row[f"{prefix}{pos}"]).strip()

            # R code treats '--' as empty string
            if country == "--":
                country = ""

            # Extract the three scoring dimensions
            imp  = as_numeric_r(row.get(f"{prefix}{pos}_impact"))
            prof = as_numeric_r(row.get(f"{prefix}{pos}_professional"))
            tech = as_numeric_r(row.get(f"{prefix}{pos}_techskill"))

            # R logic: if ANY are NA → NomScore is NA
            if imp is None or prof is None or tech is None:
                NomScore = None
            else:
                NomScore = (imp + prof + tech) / 3

            # Append long record
            records.append({
                "ResponseID": rid,
                "Country": country,
                "CrimeType": out_label,
                "I": imp,
                "P": prof,
                "TS": tech,
                "NomScore": NomScore,
            })

long_df = pd.DataFrame(records)
print("Long DF:", long_df.shape)

# ============================================================
# SUMMARISE (MATCHING R)
# ============================================================
grp = long_df.groupby(["Country", "CrimeType"])
CountryScore_type = grp["NomScore"].mean()
nominations_type = grp.size()

agg = pd.concat([CountryScore_type, nominations_type], axis=1)
agg.columns = ["CountryScore_type", "nominations_type"]
agg = agg.reset_index()

# ============================================================
# WCI_type (Eq. 2)
# ============================================================
NRESP = 92
agg["WCI_type"] = agg["CountryScore_type"] * (agg["nominations_type"] / NRESP) * 10

# ============================================================
# PIVOT TO WIDE
# ============================================================
per_type = agg.pivot_table(
    index="Country",
    columns="CrimeType",
    values=["CountryScore_type", "nominations_type", "WCI_type"],
    aggfunc="first",
)

per_type.columns = [f"{a}__{b}" for a, b in per_type.columns]
per_type = per_type.reset_index()

# Ensure missing 0s exist
crime_types = ["Tech", "Attacks", "Data", "Scams", "Cash"]
bases = ["CountryScore_type", "nominations_type", "WCI_type"]

for t in crime_types:
    for base in bases:
        col = f"{base}__{t}"
        if col not in per_type.columns:
            per_type[col] = 0.0

# ============================================================
# COMPUTE AVERAGE TYPE SCORE
# ============================================================
per_type["AvgTypeScore"] = (
    per_type["CountryScore_type__Tech"] +
    per_type["CountryScore_type__Attacks"] +
    per_type["CountryScore_type__Data"] +
    per_type["CountryScore_type__Scams"] +
    per_type["CountryScore_type__Cash"]
) / 5

# ============================================================
# TOTAL NOMINATIONS
# ============================================================
per_type["TotalNominations"] = (
    per_type["nominations_type__Tech"] +
    per_type["nominations_type__Attacks"] +
    per_type["nominations_type__Data"] +
    per_type["nominations_type__Scams"] +
    per_type["nominations_type__Cash"]
)

# ============================================================
# OVERALL WCI SCORE (Eq. 3)
# ============================================================
DEN = 92 * 5
per_type["WCI Score"] = (
    per_type["AvgTypeScore"] *
    (per_type["TotalNominations"] / DEN) *
    10
)

# ============================================================
# Average I, P, TS
# ============================================================
ipt = long_df.groupby("Country")[["I", "P", "TS"]].mean().reset_index()

# ============================================================
# FINAL MERGE (I, P, TS + TYPE SCORES + WCI)
# ============================================================
final = per_type.merge(ipt, on="Country", how="left")

final["Tech"]    = final["WCI_type__Tech"]
final["Attacks"] = final["WCI_type__Attacks"]
final["Data"]    = final["WCI_type__Data"]
final["Scams"]   = final["WCI_type__Scams"]
final["Cash"]    = final["WCI_type__Cash"]

final = final[
    ["Country", "I", "P", "TS", "WCI Score",
     "Tech", "Attacks", "Data", "Scams", "Cash"]
]

final = final.sort_values("WCI Score", ascending=False).reset_index(drop=True)
final.insert(0, "Rank", final.index + 1)

display(final.head(20))

In [None]:
print("NomScore NA rows:", long_df['NomScore'].isna().sum())
print("NomScore total rows:", long_df.shape[0])

In [None]:
import pandas as pd
from pathlib import Path

# ============================================================
# Load data (as readr would)
# ============================================================
DATA_DIR = Path("data")
RAW_PATH = DATA_DIR / "wci_data.csv"

df = pd.read_csv(RAW_PATH)
df = df.fillna("")   # R keeps empty strings, not NaN

print("Loaded:", RAW_PATH, "shape:", df.shape)

# ============================================================
# Build long table EXACTLY like the R script does
# ============================================================
type_map = [
    ("Tech",    "Technical"),
    ("Attacks", "Attack"),
    ("Data",    "Data"),
    ("Scams",   "Scams"),
    ("Cash",    "Cash"),
]

records = []

def as_numeric_r(x):
    """Mimic R as.numeric: convert '' → NA"""
    try:
        return float(x)
    except:
        return None

for _, row in df.iterrows():
    rid = row["ResponseID"]

    for out_label, prefix in type_map:
        for pos in range(1, 6):

            raw_country = row[f"{prefix}{pos}"]
            raw_country = "" if pd.isna(raw_country) else str(raw_country).strip()

            # DO NOT skip empty countries here — R does not.
            country = raw_country

            # Extract ratings exactly like R does
            I  = as_numeric_r(row[f"{prefix}{pos}_impact"])
            P  = as_numeric_r(row[f"{prefix}{pos}_professional"])
            TS = as_numeric_r(row[f"{prefix}{pos}_techskill"])

            # Compute NomScore like R:
            # if any rating NA → NomScore NA
            if I is None or P is None or TS is None:
                NomScore = None
            else:
                NomScore = (I + P + TS) / 3.0

            records.append({
                "ResponseID": rid,
                "Country": country,
                "CrimeType": out_label,
                "I": I,
                "P": P,
                "TS": TS,
                "NomScore": NomScore,
            })

long_df = pd.DataFrame(records)
print("Long DF:", long_df.shape)

# ============================================================
# Summarise like the R code
# ============================================================
grp = long_df.groupby(["Country", "CrimeType"])

agg = pd.DataFrame({
    "CountryScore_type": grp["NomScore"].mean(),    # mean(..., na.rm=TRUE)
    "nominations_type": grp.size()                  # n()
}).reset_index()

# ============================================================
# Compute per-type WCI (Eq. 2)
# ============================================================
NRESP = 92
agg["WCI_type"] = agg["CountryScore_type"] * (agg["nominations_type"] / NRESP) * 10

# Pivot wide
per_type = agg.pivot_table(
    index="Country",
    columns="CrimeType",
    values=["CountryScore_type", "nominations_type", "WCI_type"],
    aggfunc="first"
)

per_type.columns = [f"{a}__{b}" for a, b in per_type.columns]
per_type = per_type.reset_index()

# ============================================================
# DROP blank-country rows here — this is what the published table does
# ============================================================
per_type = per_type[per_type["Country"].str.strip() != ""].copy()

# Ensure all 5 categories exist
for t in ["Tech", "Attacks", "Data", "Scams", "Cash"]:
    for base in ["CountryScore_type", "nominations_type", "WCI_type"]:
        col = f"{base}__{t}"
        if col not in per_type.columns:
            per_type[col] = 0.0

# ============================================================
# Compute OVERALL WCI exactly like the published table:
#
#     WCI Score = mean(Tech, Attacks, Data, Scams, Cash)
#
# NOT the Eq(3) formula.
# ============================================================
per_type["WCI Score"] = (
    per_type["WCI_type__Tech"]
    + per_type["WCI_type__Attacks"]
    + per_type["WCI_type__Data"]
    + per_type["WCI_type__Scams"]
    + per_type["WCI_type__Cash"]
) / 5.0

# ============================================================
# Compute overall I, P, TS (means across ALL nominations)
# ============================================================
ipt = long_df[long_df["Country"].str.strip() != ""].groupby("Country")[["I","P","TS"]].mean().reset_index()

# ============================================================
# Assemble final table
# ============================================================
final = per_type.merge(ipt, on="Country", how="left")

final["Tech"]    = final["WCI_type__Tech"]
final["Attacks"] = final["WCI_type__Attacks"]
final["Data"]    = final["WCI_type__Data"]
final["Scams"]   = final["WCI_type__Scams"]
final["Cash"]    = final["WCI_type__Cash"]

final = final[[
    "Country", "I", "P", "TS", "WCI Score",
    "Tech", "Attacks", "Data", "Scams", "Cash"
]]

final = final.sort_values("WCI Score", ascending=False).reset_index(drop=True)
final.insert(0, "Rank", final.index + 1)

display(final.head(20))

In [None]:
import pandas as pd
from pathlib import Path

DATA_DIR = Path("data")

# ============================================================
# 1. Attach nationality + residence to the long_df nomination table
# ============================================================

id_map = df[['ResponseID', 'Nationality', 'Residence']].copy()

votes = long_df.merge(id_map, on="ResponseID", how="left")

# Drop blank accused-country rows
votes = votes[votes['Country'].str.strip() != ""].copy()

# ============================================================
# 2. Build NATIONALITY accusation COUNT matrix
# ============================================================

nat_matrix = (
    votes.pivot_table(
        index="Country",
        columns="Nationality",
        values="ResponseID",
        aggfunc="count",
        fill_value=0
    )
    .sort_index()
)

# Row-normalise to percentages
nat_percent = nat_matrix.div(nat_matrix.sum(axis=1), axis=0) * 100

# Save
nat_path = DATA_DIR / "accusations_nationality_percent.csv"
nat_percent.to_csv(nat_path)
print("Wrote nationality % matrix:", nat_path)

# ============================================================
# 3. Build RESIDENCE accusation COUNT matrix
# ============================================================

res_matrix = (
    votes.pivot_table(
        index="Country",
        columns="Residence",
        values="ResponseID",
        aggfunc="count",
        fill_value=0
    )
    .sort_index()
)

# Row-normalise to percentages
res_percent = res_matrix.div(res_matrix.sum(axis=1), axis=0) * 100

# Save
res_path = DATA_DIR / "accusations_residence_percent.csv"
res_percent.to_csv(res_path)
print("Wrote residence % matrix:", res_path)

# ============================================================
# Preview
# ============================================================
print("\nNATIONALITY % (head)")
display(nat_percent.head())

print("\nRESIDENCE % (head)")
display(res_percent.head())

In [None]:
import pandas as pd
from pathlib import Path

DATA_DIR = Path("data")

# We assume df (wide) and long_df (correct long table) already exist.

# ------------------------------------------------------------
# 1) Attach nationality + residence to long_df
# ------------------------------------------------------------
id_map = df[['ResponseID', 'Nationality', 'Residence']].copy()

votes = long_df.merge(id_map, on="ResponseID", how="left")

# Drop blank accused-country rows
votes = votes[votes['Country'].str.strip() != ""].copy()

# ------------------------------------------------------------
# 2) SELF-NOMINATIONS by NATIONALITY
# ------------------------------------------------------------
self_nat = (
    votes[votes['Country'] == votes['Nationality']]
    .groupby('Country')
    .size()
    .reset_index(name='SelfNominations')
    .sort_values('SelfNominations', ascending=False)
    .reset_index(drop=True)
)

# Save
out_nat = DATA_DIR / "selfnominate_nationality.csv"
self_nat.to_csv(out_nat, index=False)
print("Wrote:", out_nat)

# ------------------------------------------------------------
# 3) SELF-NOMINATIONS by RESIDENCE
# ------------------------------------------------------------
self_res = (
    votes[votes['Country'] == votes['Residence']]
    .groupby('Country')
    .size()
    .reset_index(name='SelfNominations')
    .sort_values('SelfNominations', ascending=False)
    .reset_index(drop=True)
)

# Save
out_res = DATA_DIR / "selfnominate_residence.csv"
self_res.to_csv(out_res, index=False)
print("Wrote:", out_res)

# ------------------------------------------------------------
# Preview
# ------------------------------------------------------------
print("\nSelf-nomination by NATIONALITY:")
display(self_nat.head(20))

print("\nSelf-nomination by RESIDENCE:")
display(self_res.head(20))

In [None]:
!pip install pycountry

In [None]:
import pandas as pd
from pathlib import Path

DATA_DIR = Path("data")

# ------------------------------------------------------------
# 1) Attach nationality + residence to long_df
# ------------------------------------------------------------
id_map = df[['ResponseID', 'Nationality', 'Residence']].copy()

votes = long_df.merge(id_map, on="ResponseID", how="left")

# Keep only nominations with real accused country
votes = votes[votes['Country'].str.strip() != ""].copy()

# ------------------------------------------------------------
# 2) BUILD ANIMOSITY MATRIX (nationality → accused country)
# ------------------------------------------------------------

# Count accusations from nationality -> country
ani_counts = (
    votes
    .pivot_table(
        index="Nationality",     # accuser
        columns="Country",       # accused
        values="ResponseID",
        aggfunc="count",
        fill_value=0
    )
    .sort_index()
)

# Row-normalise to percentages
ani_percent = ani_counts.div(ani_counts.sum(axis=1), axis=0) * 100

# ------------------------------------------------------------
# 3) SAVE
# ------------------------------------------------------------
out_path = DATA_DIR / "animosity_index.csv"
ani_percent.to_csv(out_path)

print("Wrote animosity index:", out_path)

# ------------------------------------------------------------
# 4) Preview
# ------------------------------------------------------------
display(ani_percent.head(20))

In [None]:
import pandas as pd

# ============================================================
# 1. Load your recalculated WCI scores
# ============================================================
wci = pd.read_csv("data/WCI_recacl.csv")  # adjust path if needed

# Standardise columns
wci = wci.rename(columns={"WCI Score": "WCI"})
wci = wci[["Country", "WCI"]]

# ============================================================
# 2. Load the world population + GDP dataset for normalisation
# ============================================================
world = pd.read_csv("data/world-data-2023.csv")  # adjust if needed

# Ensure country names match your WCI dataset
# (Lowercase strip for matching)
wci['Country_clean'] = wci['Country'].str.lower().str.strip()
world['Country_clean'] = world['Country'].str.lower().str.strip()

# Merge to get population and GDP
df_wci = wci.merge(
    world[["Country_clean", "Abbreviation", "Population", "GDP"]],
    on="Country_clean",
    how="left"
)

df_wci = df_wci.rename(columns={
    "Abbreviation": "ISO3"
})
# ============================================================
# 3. Add normalised metrics
# ============================================================
df_wci["WCI_per_capita"] = df_wci["WCI"] / df_wci["Population"]
df_wci["WCI_per_GDP"] = df_wci["WCI"] / df_wci["GDP"]

# Clean final columns
df_wci = df_wci[["Country", "ISO3", "WCI", "WCI_per_capita", "WCI_per_GDP"]]

print("df_wci ready:")
display(df_wci.head())

# ============================================================
# 4. Build accusers dict (top accusers per country)
# ============================================================
acc_matrix = pd.read_csv("data/accusations_nationality_percent.csv")

# We want: dict[country] = List[(accuser, percentage)]
accusers = {}

# All unique countries appearing as columns
for country in acc_matrix["Country"].unique():
    row = acc_matrix[acc_matrix["Country"] == country].drop(columns=["Country"]).iloc[0]
    # Convert to list of (accuser, percentage)
    items = [(col, float(val)) for col, val in row.items() if float(val) > 0]
    # Sort descending
    items = sorted(items, key=lambda x: x[1], reverse=True)
    # Store
    accusers[country] = items

print("\nAccusers dict example:")
example_country = list(accusers.keys())[0]
print(example_country, accusers[example_country][:5])

In [None]:
import pandas as pd
import pycountry

# ============================================================
# 0. Helpers
# ============================================================

def iso2_to_iso3(code):
    """Convert ISO2 → ISO3; return None if not found."""
    try:
        return pycountry.countries.get(alpha_2=code.upper()).alpha_3
    except:
        return None

def clean_population(x):
    """Remove commas and convert to float."""
    if isinstance(x, str):
        x = x.replace(",", "").strip()
        if x == "" or x.lower() == "nan":
            return None
    try:
        return float(x)
    except:
        return None

def clean_gdp(value):
    """Parse GDP like '$2.1T', '$900B', '1,200,000,000', etc."""
    if isinstance(value, str):
        v = value.strip()
        if v == "" or v.lower() == "nan":
            return None

        # Remove currency symbols + commas + spaces
        v = (
            v.replace("$", "")
             .replace(",", "")
             .replace(" ", "")
             .strip()
        )

        # Handle suffixes
        if v.endswith(("T", "t")):
            return float(v[:-1]) * 1_000_000_000_000
        if v.endswith(("B", "b")):
            return float(v[:-1]) * 1_000_000_000
        if v.endswith(("M", "m")):
            return float(v[:-1]) * 1_000_000

        # Fallback direct parse
        try:
            return float(v)
        except:
            return None

    return None


# ============================================================
# 1. Load recalculated WCI
# ============================================================

wci = pd.read_csv("data/WCI_recacl.csv")
wci = wci.rename(columns={"WCI Score": "WCI"})
wci["Country_clean"] = wci["Country"].str.lower().str.strip()


# ============================================================
# 2. Load world data + clean numeric columns
# ============================================================

world = pd.read_csv("data/world-data-2023.csv")
world["Country_clean"] = world["Country"].str.lower().str.strip()

# ISO2 → ISO3
world["ISO3"] = world["Abbreviation"].apply(iso2_to_iso3)

# Clean population + GDP
world["Population_clean"] = world["Population"].apply(clean_population)
world["GDP_clean"] = world["GDP"].apply(clean_gdp)


# ============================================================
# 3. Merge WCI with world metrics
# ============================================================

df_wci = wci.merge(
    world[["Country_clean", "ISO3", "Population_clean", "GDP_clean"]],
    on="Country_clean",
    how="left"
)

df_wci["WCI_per_capita"] = df_wci["WCI"] / df_wci["Population_clean"]
df_wci["WCI_per_GDP"] = df_wci["WCI"] / df_wci["GDP_clean"]

df_wci = df_wci.rename(columns={
    "Population_clean": "Population",
    "GDP_clean": "GDP"
})

df_wci = df_wci[["Country", "ISO3", "WCI", "Population", "GDP", "WCI_per_capita", "WCI_per_GDP"]]

print("df_wci ready:")
display(df_wci.head(20))


# ============================================================
# 4. Build accusers dictionary (from accusation % matrix)
# ============================================================

acc_matrix = pd.read_csv("data/accusations_nationality_percent.csv")

accusers = {}

for i, row in acc_matrix.iterrows():
    country = row["Country"]
    items = []
    for col in row.index[1:]:  # skip 'Country'
        val = row[col]
        if isinstance(val, (int, float)) and val > 0:
            items.append((col, float(val)))

    # Sort by % descending
    items = sorted(items, key=lambda x: x[1], reverse=True)
    accusers[country] = items

print("\nAccusers example:")
first = list(accusers.keys())[0]
print(first, accusers[first][:5])

In [None]:
import pandas as pd

# ---------------------------------------------------
# 1. Load WCI recalculated results
# ---------------------------------------------------
wci = pd.read_csv("data/WCI_recacl.csv")
# Expecting columns: Country, I, P, TS, WCI Score, Tech, Attacks, Data, Scams, Cash

wci = wci.rename(columns={"WCI Score": "WCI"})

# ---------------------------------------------------
# 2. Load world data (2023)
# ---------------------------------------------------
world = pd.read_csv("data/world-data-2023.csv")

# Keep only what we need
world = world[["Country", "Abbreviation", "Population", "GDP"]].copy()
world = world.rename(columns={"Abbreviation": "ISO3"})

# ---------------------------------------------------
# 3. Clean numeric fields
# ---------------------------------------------------
def clean_numeric(x):
    if isinstance(x, str):
        x = x.replace("$","").replace(",", "").strip()
    return pd.to_numeric(x, errors="coerce")

world["Population"] = world["Population"].apply(clean_numeric)
world["GDP"] = world["GDP"].apply(clean_numeric)

# ---------------------------------------------------
# 4. Build merge key
# ---------------------------------------------------
wci["Country_clean"] = wci["Country"].str.lower().str.strip()
world["Country_clean"] = world["Country"].str.lower().str.strip()

# ---------------------------------------------------
# 5. Merge
# ---------------------------------------------------
df = wci.merge(world[["Country_clean","ISO3","Population","GDP"]],
               on="Country_clean",
               how="left")

# ---------------------------------------------------
# 6. Create normalised metrics
# ---------------------------------------------------
df["WCI_per_capita"] = df["WCI"] / df["Population"]
df["WCI_per_GDP"] = df["WCI"] / df["GDP"]

# ---------------------------------------------------
# 7. Final cleaned export
# ---------------------------------------------------
df_final = df[["Country","ISO3","WCI","Population","GDP","WCI_per_capita","WCI_per_GDP"]]

df_final.to_csv("data/df_wci_ready.csv", index=False)
print("Wrote data/df_wci_ready.csv")
df_final.head(20)

In [None]:
import pandas as pd
import plotly.express as px
import plotly.graph_objs as go
from ipywidgets import VBox, HBox, Dropdown, Output

# ------------------------------------------------------------
# 1. Load datasets
# ------------------------------------------------------------
df_wci = pd.read_csv("data/df_wci_ready.csv")
acc_nat = pd.read_csv("data/accusations_nationality_percent.csv", index_col=0)
acc_res = pd.read_csv("data/accusations_residence_percent.csv", index_col=0)

# Convert accusation matrices into usable dicts
acc_nat = acc_nat.fillna(0)
acc_res = acc_res.fillna(0)

# ------------------------------------------------------------
# 2. Dropdown for metric selection
# ------------------------------------------------------------
metric_dropdown = Dropdown(
    options={
        "Raw WCI": "WCI",
        "WCI per capita": "WCI_per_capita",
        "WCI per GDP": "WCI_per_GDP"
    },
    value="WCI",
    description="Metric:"
)

# Dropdown for accuser mode
accuser_dropdown = Dropdown(
    options={
        "Accusations by nationality": "nat",
        "Accusations by residence": "res"
    },
    value="nat",
    description="Accusers:"
)

# ------------------------------------------------------------
# 3. Output panels
# ------------------------------------------------------------
map_out = Output()
details_out = Output()

# ------------------------------------------------------------
# 4. Function to build choropleth figure
# ------------------------------------------------------------
def make_map(metric):
    fig = px.choropleth(
        df_wci,
        locations="ISO3",
        color=metric,
        hover_name="Country",
        color_continuous_scale=["white","green","blue","red"],
    )
    fig.update_layout(
        title=f"World Cybercrime Index — {metric}",
        clickmode='event+select',
        height=600
    )
    return fig

# ------------------------------------------------------------
# 5. Event handler: when user clicks a country
# ------------------------------------------------------------
def update_details(trace, points, selector):
    if len(points.point_inds) == 0:
        return

    idx = points.point_inds[0]
    country = df_wci.iloc[idx]["Country"]
    iso = df_wci.iloc[idx]["ISO3"]

    # Choose accuser mode
    if accuser_dropdown.value == "nat":
        accdf = acc_nat
    else:
        accdf = acc_res

    # Extract row
    if country in accdf.index:
        row = accdf.loc[country].sort_values(ascending=False)
        row = row[row > 0].head(10)
    else:
        row = pd.Series(dtype=float)

    with details_out:
        details_out.clear_output()

        # Build bar chart for accusers
        if len(row) > 0:
            bar = go.FigureWidget(
                go.Bar(
                    x=row.values,
                    y=row.index,
                    orientation='h',
                    marker_color="crimson"
                )
            )
            bar.update_layout(
                title=f"Top accusators of {country}",
                height=400,
                margin=dict(l=100)
            )
        else:
            bar = "No accuser data found."

        display(
            f"COUNTRY SELECTED: {country} ({iso})",
        )
        display(bar)

# ------------------------------------------------------------
# 6. Function to refresh map when dropdown changes
# ------------------------------------------------------------
def refresh_map(_=None):
    with map_out:
        map_out.clear_output()
        fig = make_map(metric_dropdown.value)

        # Convert to FigureWidget to capture click events
        fw = go.FigureWidget(fig)
        fw.data[0].on_click(update_details)

        display(fw)

# ------------------------------------------------------------
# 7. Connect dropdown events
# ------------------------------------------------------------
metric_dropdown.observe(refresh_map, names="value")
accuser_dropdown.observe(refresh_map, names="value")

# ------------------------------------------------------------
# 8. Render UI
# ------------------------------------------------------------
refresh_map()
ui = VBox([
    HBox([metric_dropdown, accuser_dropdown]),
    map_out,
    details_out
])

ui

In [None]:
!pip install plotly==5.22.0 ipywidgets

In [None]:
bad = world[world["GDP"].apply(lambda x: isinstance(x, str))]
print("Rows with GDP still as string:")
display(bad[["Country", "GDP"]])

In [None]:
import pandas as pd

# ============================================================
# 1. Load recalculated WCI (correct file name)
# ============================================================
wci = pd.read_csv("data/WCI_recacl.csv")  # THIS EXISTS
wci = wci.rename(columns={"WCI Score": "WCI"})
wci = wci[["Country", "WCI"]]
wci["Country_clean"] = wci["Country"].str.lower().str.strip()

# ============================================================
# 2. Load and clean population + GDP
# ============================================================
world = pd.read_csv("data/world-data-2023.csv")
world["Country_clean"] = world["Country"].str.lower().str.strip()

def clean_pop(x):
    return pd.to_numeric(str(x).replace(",", ""), errors="coerce")

world["Population"] = world["Population"].apply(clean_pop)

def clean_gdp(val):
    if pd.isna(val): 
        return None
    v = str(val).strip().replace("$","").replace(",","")
    try:
        return float(v)
    except:
        return None

world["GDP"] = world["GDP"].apply(clean_gdp)

# ============================================================
# 3. Merge WCI with world data
# ============================================================
df_wci = wci.merge(
    world[["Country_clean", "Abbreviation", "Population", "GDP"]],
    on="Country_clean",
    how="left"
)

df_wci = df_wci.rename(columns={"Abbreviation": "ISO3"})

# ============================================================
# 4. Compute normalised metrics
# ============================================================
df_wci["WCI_per_capita"] = df_wci["WCI"] / df_wci["Population"]
df_wci["WCI_per_GDP"]     = df_wci["WCI"] / df_wci["GDP"]

df_wci = df_wci[["Country", "ISO3", "WCI", "Population", "GDP", "WCI_per_capita", "WCI_per_GDP"]]

print("df_wci ready:")
display(df_wci)

# ============================================================
# 5. Build accuser dictionary
# ============================================================
acc_matrix = pd.read_csv("data/accusations_nationality_percent.csv", index_col=0)
acc_matrix = acc_matrix.fillna(0)

accusers = {}
for target in acc_matrix.index:
    row = acc_matrix.loc[target]
    row = row[row > 0].sort_values(ascending=False)
    accusers[target] = list(row.items())

print("\nAccuser example:")
first = list(accusers.keys())[0]
print(first, accusers[first][:5])

# ============================================================
# 6. Save final WCI for choropleth
# ============================================================
df_wci.to_csv("data/df_wci_ready.csv", index=False)
print("\nSaved to data/df_wci_ready.csv")

In [None]:
import pandas as pd

# ============================================================
# 1. Load recalculated WCI scores
# ============================================================
wci = pd.read_csv("data/WCI_recacl.csv").rename(columns={"WCI Score": "WCI"})
wci["Country_clean"] = wci["Country"].str.lower().str.strip()

# ============================================================
# 2. Load world dataset (Pop + GDP + ISO2)
# ============================================================
world = pd.read_csv("data/world-data-2023.csv")
world["Country_clean"] = world["Country"].str.lower().str.strip()
world["ISO2"] = world["Abbreviation"].str.upper()

# ============================================================
# 3. OFFLINE ISO2 → ISO3 lookup table
# ============================================================
iso_lookup = pd.DataFrame({
    "ISO2": [
        "AF","AL","DZ","AD","AO","AR","AM","AU","AT","AZ","BH","BD","BY","BE","BZ",
        "BJ","BT","BO","BA","BW","BR","BN","BG","BF","BI","KH","CM","CA","CF","TD",
        "CL","CN","CO","KM","CG","CD","CR","CI","HR","CU","CY","CZ","DK","DJ","DM",
        "DO","EC","EG","SV","GQ","ER","EE","ET","FJ","FI","FR","GA","GM","GE","DE",
        "GH","GR","GT","GN","GW","GY","HT","HN","HU","IS","IN","ID","IR","IQ","IE",
        "IL","IT","JM","JP","JO","KZ","KE","KI","KP","KR","KW","KG","LA","LV","LB",
        "LS","LR","LY","LT","LU","MG","MW","MY","MV","ML","MT","MR","MU","MX","MD",
        "MN","ME","MA","MZ","MM","NA","NP","NL","NZ","NI","NE","NG","NO","OM","PK",
        "PA","PG","PY","PE","PH","PL","PT","QA","RO","RU","RW","SA","SN","RS","SC",
        "SL","SG","SK","SI","SB","SO","ZA","SS","ES","LK","SD","SR","SE","CH","SY",
        "TW","TJ","TZ","TH","TL","TO","TT","TN","TR","TM","UG","UA","AE","GB","UY",
        "US","UZ","VU","VE","VN","YE","ZM","ZW"
    ],
    "ISO3": [
        "AFG","ALB","DZA","AND","AGO","ARG","ARM","AUS","AUT","AZE","BHR","BGD","BLR","BEL","BLZ",
        "BEN","BTN","BOL","BIH","BWA","BRA","BRN","BGR","BFA","BDI","KHM","CMR","CAN","CAF","TCD",
        "CHL","CHN","COL","COM","COG","COD","CRI","CIV","HRV","CUB","CYP","CZE","DNK","DJI","DMA",
        "DOM","ECU","EGY","SLV","GNQ","ERI","EST","ETH","FJI","FIN","FRA","GAB","GMB","GEO","DEU",
        "GHA","GRC","GTM","GIN","GNB","GUY","HTI","HND","HUN","ISL","IND","IDN","IRN","IRQ","IRL",
        "ISR","ITA","JAM","JPN","JOR","KAZ","KEN","KIR","PRK","KOR","KWT","KGZ","LAO","LVA","LBN",
        "LSO","LBR","LBY","LTU","LUX","MDG","MWI","MYS","MDV","MLI","MLT","MRT","MUS","MEX","MDA",
        "MNG","MNE","MAR","MOZ","MMR","NAM","NPL","NLD","NZL","NIC","NER","NGA","NOR","OMN","PAK",
        "PAN","PNG","PRY","PER","PHL","POL","PRT","QAT","ROU","RUS","RWA","SAU","SEN","SRB","SYC",
        "SLE","SGP","SVK","SVN","SLB","SOM","ZAF","SSD","ESP","LKA","SDN","SUR","SWE","CHE","SYR",
        "TWN","TJK","TZA","THA","TLS","TON","TTO","TUN","TUR","TKM","UGA","UKR","ARE","GBR","URY",
        "USA","UZB","VUT","VEN","VNM","YEM","ZMB","ZWE"
    ]
})

# Merge ISO3 into world data
world = world.merge(iso_lookup, on="ISO2", how="left")

# ============================================================
# 4. Merge WCI <-> world dataset
# ============================================================
df_wci = wci.merge(
    world[["Country_clean", "ISO3", "Population", "GDP"]],
    on="Country_clean",
    how="left"
)

# ============================================================
# 5. Convert GDP + Population to numeric
# ============================================================
def clean_numeric(x):
    if isinstance(x, str):
        x = x.replace("$","").replace(",","").strip()
    try:
        return float(x)
    except:
        return None

df_wci["GDP"] = df_wci["GDP"].apply(clean_numeric)
df_wci["Population"] = df_wci["Population"].apply(clean_numeric)

# ============================================================
# 6. Normalised metrics
# ============================================================
df_wci["WCI_per_capita"] = df_wci["WCI"] / df_wci["Population"]
df_wci["WCI_per_GDP"] = df_wci["WCI"] / df_wci["GDP"]

df_wci = df_wci[["Country", "ISO3", "WCI", "Population", "GDP",
                 "WCI_per_capita", "WCI_per_GDP"]]

print("df_wci ready:")
display(df_wci.head())

# ============================================================
# 7. Accuser matrix → dictionary
# ============================================================
acc_matrix = pd.read_csv("data/accusations_nationality_percent.csv")

accusers = {}
for country in acc_matrix["Country"].unique():
    row = acc_matrix[acc_matrix["Country"] == country].drop(columns=["Country"]).iloc[0]
    pairs = [(col, float(val)) for col, val in row.items() if float(val) > 0]
    accusers[country] = sorted(pairs, key=lambda x: x[1], reverse=True)

print("\nAccuser example:")
ex = list(accusers.keys())[0]
print(ex, accusers[ex][:5])

# Optional: Save cleaned WCI for Plotly
df_wci.to_csv("data/df_wci_ready.csv", index=False)
print("\nSaved df_wci_ready.csv")

In [None]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from ipywidgets import VBox, HBox, Dropdown, Output, Button

# ============================================================
# 1. DROPDOWNS + RESET BUTTON
# ============================================================

metric_dropdown = Dropdown(
    options={
        "Raw WCI": "WCI",
        "WCI per capita": "WCI_per_capita",
        "WCI per GDP": "WCI_per_GDP",
    },
    value="WCI",
    description="Metric:",
    style={'description_width': '80px'},
    layout=dict(width="250px")
)

acc_dropdown = Dropdown(
    options={
        "By nationality": "nationality",
        "By residence": "residence"
    },
    value="nationality",
    description="Accusers:",
    style={'description_width': '80px'},
    layout=dict(width="250px")
)

reset_button = Button(
    description="Reset",
    button_style="warning",
    layout=dict(width="120px"),
    tooltip="Clear selection and reset map"
)

controls = HBox([metric_dropdown, acc_dropdown, reset_button])


# ============================================================
# 2. MAKE MAP → Returns a NEW FigureWidget each time
# ============================================================

def make_map(metric):
    fig = px.choropleth(
        df_wci,
        locations="ISO3",
        color=metric,
        hover_name="Country",
        color_continuous_scale=["white", "green", "blue", "red"],
    )
    fig.update_layout(
        margin=dict(l=0, r=0, t=0, b=0),
        height=550,
        clickmode='event+select'
    )
    return go.FigureWidget(fig)


# initial map
fig_map = make_map(metric_dropdown.value)


# ============================================================
# 3. BAR CHART OUTPUT AREA
# ============================================================

bar_out = Output()


# ============================================================
# 4. CLICK HANDLER
# ============================================================

def update_bar(country):
    bar_out.clear_output(wait=True)
    with bar_out:
        metric = metric_dropdown.value
        val = df_wci.loc[df_wci["Country"] == country, metric].values[0]
        print(f"=== {country} ===")
        print(f"{metric}: {val:.4g}\n")

        # select accusers source
        source = acc_dropdown.value
        data = accusers.get(country, []) if source == "nationality" else accusers_residence.get(country, [])

        if not data:
            print("No accusation data.")
            return

        labels = [x[0] for x in data][:12]
        values = [float(x[1]) for x in data][:12]

        bar = go.FigureWidget(
            data=[go.Bar(
                x=values[::-1],
                y=labels[::-1],
                orientation="h",
                marker=dict(color="crimson")
            )]
        )
        bar.update_layout(
            title=f"Top accusers of {country}",
            margin=dict(l=140, r=40, t=40, b=40),
            height=400
        )
        display(bar)


def attach_click(fig):
    """Attach click handler safely to the NEW map."""
    def _click(trace, points, selector):
        if points.point_inds:
            idx = points.point_inds[0]
            country = df_wci.iloc[idx]["Country"]
            update_bar(country)
    fig.data[0].on_click(_click)


# Attach handler to initial map
attach_click(fig_map)


# ============================================================
# 5. RESET BUTTON
# ============================================================

def reset_action(btn):
    bar_out.clear_output()

    # rebuild NEW map
    new_fig = make_map(metric_dropdown.value)
    attach_click(new_fig)

    # replace widget content
    map_out.children = (new_fig,)
    

reset_button.on_click(reset_action)


# ============================================================
# 6. METRIC CHANGE → rebuild map cleanly
# ============================================================

def metric_change(ev):
    if ev["name"] != "value":
        return

    new_fig = make_map(ev["new"])
    attach_click(new_fig)
    map_out.children = (new_fig,)

metric_dropdown.observe(metric_change)


# ============================================================
# 7. DISPLAY EVERYTHING
# ============================================================

map_out = VBox([fig_map])
ui = VBox([controls, map_out, bar_out])
ui

In [None]:
import pandas as pd

# ------------------------------------------------------------------
# 1. We assume df_wci is already loaded and has Country_clean
#    (as in your printout above)
# ------------------------------------------------------------------
print("df_wci columns:", df_wci.columns.tolist())

# Sanity: make sure Country_clean exists
if "Country_clean" not in df_wci.columns:
    raise RuntimeError("df_wci must already have a 'Country_clean' column.")


# ------------------------------------------------------------------
# 2. Load raw survey data and build respondent counts
# ------------------------------------------------------------------
raw = pd.read_csv("data/wci_data.csv").fillna("")

def clean_country(x: str) -> str:
    s = str(x).strip()
    if s == "" or s.lower() in {"nan", "none"}:
        return "--"
    return s

# Clean nationality & residence to same format as df_wci.Country_clean
raw["Nat_clean"] = raw["Nationality"].map(clean_country)
raw["Res_clean"] = raw["Residence"].map(clean_country)

# Count respondents by nationality and by residence
respondents_nat = (
    raw.groupby("Nat_clean", dropna=False)
       .size()
       .reset_index(name="respondents_nat")
       .rename(columns={"Nat_clean": "Country_clean"})
)

respondents_res = (
    raw.groupby("Res_clean", dropna=False)
       .size()
       .reset_index(name="respondents_res")
       .rename(columns={"Res_clean": "Country_clean"})
)

print("\nRespondent counts by nationality (head):")
display(respondents_nat.head())

print("\nRespondent counts by residence (head):")
display(respondents_res.head())


# ------------------------------------------------------------------
# 3. Merge respondent counts into df_wci
# ------------------------------------------------------------------
df_wci = (
    df_wci
    .merge(respondents_nat, on="Country_clean", how="left")
    .merge(respondents_res, on="Country_clean", how="left")
)

# Replace NaN with 0 and cast to int
df_wci["respondents_nat"] = df_wci["respondents_nat"].fillna(0).astype(int)
df_wci["respondents_res"] = df_wci["respondents_res"].fillna(0).astype(int)

print("\ndf_wci with respondent counts merged:")
display(df_wci.head(20))

# ------------------------------------------------------------------
# 4. Save enriched table for downstream use
# ------------------------------------------------------------------
df_wci.to_csv("data/df_wci_with_respondents.csv", index=False)
print("\nSaved → data/df_wci_with_respondents.csv")

In [None]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from ipywidgets import VBox, HBox, Dropdown, Button, Output

# -------------------------------------------------------------
# 1. Load enriched WCI dataset
# -------------------------------------------------------------
df_wci = pd.read_csv("data/df_wci_with_respondents.csv")

# Ensure ISO3 exists
if "ISO3" not in df_wci.columns:
    raise RuntimeError("df_wci_with_respondents.csv must contain ISO3 codes.")

# Country selection output
info_out = Output()

# -------------------------------------------------------------
# 2. Load accusation matrices
# -------------------------------------------------------------
acc_nat = pd.read_csv("data/accusations_nationality_percent.csv")
acc_res = pd.read_csv("data/accusations_residence_percent.csv")

# Convert wide → dict of {country: [(accuser, %), ...]}
def build_accuser_dict(df):
    acc = {}
    for _, row in df.iterrows():
        country = row["Country"]
        items = []
        for col in df.columns:
            if col == "Country":
                continue
            try:
                v = float(row[col])
            except:
                v = 0
            if v > 0:
                items.append((col, v))
        items.sort(key=lambda x: x[1], reverse=True)
        acc[country] = items
    return acc

accusers_nat = build_accuser_dict(acc_nat)
accusers_res = build_accuser_dict(acc_res)

# -------------------------------------------------------------
# 3. Dropdown for metric and accuser source
# -------------------------------------------------------------
metric_dropdown = Dropdown(
    options={
        "Raw WCI": "WCI",
        "WCI per capita": "WCI_per_capita",
        "WCI per GDP": "WCI_per_GDP",
        "Respondents (nationality)": "respondents_nat",
        "Respondents (residence)": "respondents_res",
    },
    value="WCI",
    description="Metric:"
)

accuser_dropdown = Dropdown(
    options={
        "By nationality": "nat",
        "By residence": "res"
    },
    value="nat",
    description="Accusers:"
)

reset_btn = Button(description="Reset", button_style="warning")


# -------------------------------------------------------------
# 4. Build initial map
# -------------------------------------------------------------
def make_map(metric):
    return px.choropleth(
        df_wci,
        locations="ISO3",
        color=metric,
        hover_name="Country",
        color_continuous_scale=["white", "green", "blue", "red"],
    )

map_fig = make_map("WCI")
map_widget = go.FigureWidget(map_fig)


# -------------------------------------------------------------
# 5. Update map when metric changes
# -------------------------------------------------------------
def update_map(change):
    metric = metric_dropdown.value
    with map_widget.batch_update():
        map_widget.data[0].z = df_wci[metric]
        map_widget.layout.coloraxis.colorbar.title = metric

metric_dropdown.observe(update_map, names="value")


# -------------------------------------------------------------
# 6. Click handler & bar chart
# -------------------------------------------------------------
bar_out = Output()

def on_click(trace, points, selector):
    bar_out.clear_output()
    info_out.clear_output()

    if not points.point_inds:
        return

    idx = points.point_inds[0]
    row = df_wci.iloc[idx]
    country = row["Country"]

    # Select accusation source
    acc_source = accuser_dropdown.value
    acc_dict = accusers_nat if acc_source == "nat" else accusers_res

    if country not in acc_dict:
        with info_out:
            print(f"No accusation data for {country}")
        return

    acc_list = acc_dict[country][:10]  # top 10
    acc_users = [a for a, v in acc_list]
    acc_vals  = [v for a, v in acc_list]

    # Normalise by respondent count
    if acc_source == "nat":
        denom = row["respondents_nat"] if row["respondents_nat"] > 0 else 1
    else:
        denom = row["respondents_res"] if row["respondents_res"] > 0 else 1

    norm_vals = [(v/denom) * 100 for v in acc_vals]

    with info_out:
        print(f"=== {country} ===")
        print(f"Metric value: {row[metric_dropdown.value]}")
        print(f"\nTop accusers (normalised by respondents):")

    # Bar chart
    with bar_out:
        fig = go.FigureWidget(
            data=[
                go.Bar(
                    x=norm_vals,
                    y=acc_users,
                    orientation="h",
                    marker_color="#b30000"
                )
            ]
        )
        fig.update_layout(
            title=f"Top accusers of {country} (normalised)",
            height=400,
            margin=dict(l=150),
        )
        display(fig)

map_widget.data[0].on_click(on_click)


# -------------------------------------------------------------
# 7. Reset button clears panels
# -------------------------------------------------------------
def do_reset(btn):
    info_out.clear_output()
    bar_out.clear_output()

reset_btn.on_click(do_reset)


# -------------------------------------------------------------
# 8. UI layout
# -------------------------------------------------------------
ui = VBox([
    HBox([metric_dropdown, accuser_dropdown, reset_btn]),
    map_widget,
    info_out,
    bar_out
])

ui

In [None]:
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
from ipywidgets import VBox, HBox, Dropdown, Button, Output

# ============================================================
# 1. LOAD BASE DATA
# ============================================================
df_wci = pd.read_csv("data/df_wci_ready.csv")
acc_nat = pd.read_csv("data/accusations_nationality_percent.csv")
acc_res = pd.read_csv("data/accusations_residence_percent.csv")
respondents_raw = pd.read_csv("data/wci_data.csv")

df_wci["Country_clean"] = df_wci["Country"].str.lower().str.strip()

# ============================================================
# 2. RESPONDENT COUNTS
# ============================================================
respondents_raw["Nationality_clean"] = respondents_raw["Nationality"].fillna("").str.lower().str.strip()
respondents_raw["Residence_clean"]  = respondents_raw["Residence"].fillna("").str.lower().str.strip()

resp_nat = respondents_raw.groupby("Nationality_clean").size().rename("respondents_nat").reset_index()
resp_res = respondents_raw.groupby("Residence_clean").size().rename("respondents_res").reset_index()

df_wci = df_wci.merge(resp_nat, left_on="Country_clean", right_on="Nationality_clean", how="left")
df_wci = df_wci.merge(resp_res, left_on="Country_clean", right_on="Residence_clean", how="left")

df_wci["respondents_nat"] = df_wci["respondents_nat"].fillna(0).astype(int)
df_wci["respondents_res"] = df_wci["respondents_res"].fillna(0).astype(int)

df_wci = df_wci.drop(columns=["Nationality_clean","Residence_clean"], errors="ignore")

# ============================================================
# 3. BUILD ACCUSER DICTIONARIES
# ============================================================
def build_acc_dict(df):
    acc = {}
    for country in df["Country"].unique():
        row = df[df["Country"] == country].drop(columns=["Country"]).iloc[0]
        items = [(col, float(val)) for col, val in row.items() if float(val) > 0]
        acc[country] = sorted(items, key=lambda x: x[1], reverse=True)
    return acc

acc_users_nat = build_acc_dict(acc_nat)
acc_users_res = build_acc_dict(acc_res)

# ============================================================
# 4. WIDGETS
# ============================================================
metric_dropdown = Dropdown(
    options={
        "Raw WCI": "WCI",
        "WCI per capita": "WCI_per_capita",
        "WCI per GDP": "WCI_per_GDP",
        "Respondents (nat.)": "respondents_nat",
        "Respondents (res.)": "respondents_res",
    },
    value="WCI",
    description="Metric:",
    style={"description_width": "100px"}
)

accuser_dropdown = Dropdown(
    options={"By nationality": "nat", "By residence": "res"},
    value="nat",
    description="Accusers:",
    style={"description_width": "100px"}
)

reset_button = Button(
    description="Reset",
    button_style="warning",
    layout={"width": "120px"}
)

info_out = Output()
bar_out = Output()

# ============================================================
# 5. BASE CHOROPLETH
# ============================================================
map_fig = px.choropleth(
    df_wci,
    locations="ISO3",
    color="WCI",
    hover_name="Country",
    color_continuous_scale=["white","green","blue","red"],
    title="World Cybercrime Index (Interactive)"
)

map_widget = go.FigureWidget(map_fig)
map_widget.update_layout(clickmode="event+select")

# Correct: use numeric height
map_widget.layout.height = 700   # FIXED
# No width setting needed — Jupyter expands automatically.

# ============================================================
# 6. CLICK HANDLER
# ============================================================
def on_click(trace, points, selector):
    info_out.clear_output()
    bar_out.clear_output()

    if not points.point_inds:
        return

    row = df_wci.iloc[points.point_inds[0]]
    country = row["Country"]
    metric = metric_dropdown.value
    acc_src = accuser_dropdown.value

    acc_dict = acc_users_nat if acc_src == "nat" else acc_users_res

    if country not in acc_dict:
        with info_out:
            print(f"No accusation data for {country}")
        return

    acc_list = acc_dict[country][:10]
    acc_users = [a for a, v in acc_list]
    acc_percent = [v for a, v in acc_list]

    denom = row["respondents_nat"] if acc_src == "nat" else row["respondents_res"]
    if denom == 0:
        denom = 1

    raw_counts = [(p / 100) * denom for p in acc_percent]
    norm_percent = [(c / denom) * 100 for c in raw_counts]

    acc_users = acc_users[::-1]
    norm_percent = norm_percent[::-1]

    with info_out:
        print(f"=== {country} ===")
        print(f"Metric value: {row[metric]}")
        print("\nTop accusers (normalised by respondents):\n")

    with bar_out:
        fig = go.FigureWidget()
        fig.add_bar(
            x=norm_percent,
            y=acc_users,
            orientation="h",
            marker_color="#aa0000"
        )
        fig.update_layout(
            title=f"Top accusers of {country} (normalised)",
            height=450,
            margin=dict(l=160),
            xaxis_title="% of respondents"
        )
        display(fig)

for trace in map_widget.data:
    trace.on_click(on_click)

# ============================================================
# 7. RESET BUTTON
# ============================================================
def reset_clicked(_):
    info_out.clear_output()
    bar_out.clear_output()
    map_widget.data = map_fig.data

reset_button.on_click(reset_clicked)

# ============================================================
# 8. DISPLAY UI
# ============================================================
ui = VBox([
    HBox([metric_dropdown, accuser_dropdown, reset_button]),
    map_widget,
    info_out,
    bar_out
])

ui

In [None]:
import pandas as pd
from collections import defaultdict

# ============================================================
# 1. Load raw survey data
# ============================================================
df = pd.read_csv("data/wci_data.csv")
df = df.fillna("")

# These are the 5 crime categories, each with 5 nominations
CATEGORIES = ["Technical", "Attack", "Data", "Scams", "Cash"]

NOM_COLS = []
for cat in CATEGORIES:
    for i in range(1, 6):
        NOM_COLS.append(f"{cat}{i}")

# ============================================================
# 2. Helper: Extract all nominations per respondent
# ============================================================
def extract_nominations(row):
    noms = []
    for col in NOM_COLS:
        val = str(row[col]).strip()
        if val not in ["", "--"]:   # keep intentional blanks out
            noms.append(val)
    return noms

df["nominations"] = df.apply(extract_nominations, axis=1)

# ============================================================
# 3. Compute respondent counts per nationality / residence
# ============================================================
resp_nat = df["Nationality"].value_counts().to_dict()
resp_res = df["Residence"].value_counts().to_dict()

# Countries with zero respondents must still appear if they get nominated
all_possible_targets = set()
for row in df["nominations"]:
    all_possible_targets.update(row)

all_possible_nationalities = set(resp_nat.keys()) | set(all_possible_targets)
all_possible_residences   = set(resp_res.keys()) | set(all_possible_targets)

# ============================================================
# 4. Build accusation matrices (raw counts)
# ============================================================
# nationality-based: A (nationality) → T (target country)
acc_nat_counts = {A: defaultdict(int) for A in all_possible_nationalities}

# residence-based: A (residence) → T (target country)
acc_res_counts = {A: defaultdict(int) for A in all_possible_residences}

for _, row in df.iterrows():
    nat = row["Nationality"]
    res = row["Residence"]
    nominations = row["nominations"]

    for target in nominations:
        # Count nationality accusations
        acc_nat_counts[nat][target] += 1

        # Count residence accusations
        acc_res_counts[res][target] += 1

# ============================================================
# 5. Convert raw counts -> accusing factor:
#       P(accuse T | respondent from A)
#       = raw_counts[A→T] / total_respondents_from_A
# ============================================================

def build_factor_matrix(acc_count_dict, respondent_dict, all_As, all_Ts):
    data = []
    for A in sorted(all_As):
        row = {"Country": A}

        total_resp = respondent_dict.get(A, 0)

        for T in sorted(all_Ts):
            raw = acc_count_dict[A].get(T, 0)

            if total_resp > 0:
                factor = raw / total_resp   # THIS IS YOUR METRIC
            else:
                factor = 0.0

            row[T] = factor * 100   # express as % for readability

        data.append(row)

    return pd.DataFrame(data)

# Build matrices
acc_nat_factor_df = build_factor_matrix(
    acc_nat_counts,
    resp_nat,
    all_possible_nationalities,
    all_possible_targets
)

acc_res_factor_df = build_factor_matrix(
    acc_res_counts,
    resp_res,
    all_possible_residences,
    all_possible_targets
)

# ============================================================
# 6. Save CSVs
# ============================================================
acc_nat_factor_df.to_csv("data/accusing_factor_nationality.csv", index=False)
acc_res_factor_df.to_csv("data/accusing_factor_residence.csv", index=False)

print("ACCUSE FACTOR FILES CREATED:")
print(" - accusing_factor_nationality.csv")
print(" - accusing_factor_residence.csv")

In [None]:
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
import plotly.graph_objects as go
from ipywidgets import VBox, HBox, Dropdown, Button, Output

# ============================================================
# 1. Load data
# ============================================================
df_wci = pd.read_csv("data/df_wci_ready.csv")
df_wci["Country_clean"] = df_wci["Country"].str.strip().str.lower()

# Guarantee respondent columns
for col in ["respondents_nat", "respondents_res"]:
    if col not in df_wci.columns:
        df_wci[col] = 0
df_wci["respondents_nat"] = df_wci["respondents_nat"].fillna(0)
df_wci["respondents_res"] = df_wci["respondents_res"].fillna(0)

# Accusing factors
acc_nat_factor = pd.read_csv("data/accusing_factor_nationality.csv")
acc_res_factor = pd.read_csv("data/accusing_factor_residence.csv")
for acc_df in (acc_nat_factor, acc_res_factor):
    acc_df["Country_clean"] = acc_df["Country"].str.strip().str.lower()

# ============================================================
# 2. Widgets
# ============================================================
metric_options = {
    "WCI": "WCI",
    "WCI per capita": "WCI_per_capita",
    "WCI per GDP": "WCI_per_GDP",
    "Respondents (nat.)": "respondents_nat",
    "Respondents (res.)": "respondents_res",
}

metric_dropdown = Dropdown(
    options=list(metric_options.keys()),
    value="WCI",
    description="Metric:"
)

accuser_dropdown = Dropdown(
    options=[("By nationality","nat"), ("By residence","res")],
    value="nat",
    description="Accusers:"
)

reset_button = Button(
    description="Reset",
    button_style="warning",
    layout={"width":"120px"}
)

output = Output()

# ============================================================
# 3. Sanitise Z-values
# ============================================================
def safe_z(series):
    z = series.replace([np.inf,-np.inf], np.nan)
    z = z.fillna(0)           # << IMPORTANT FOR RESPONDENTS METRICS
    return z.to_numpy().astype(float)

# ============================================================
# 4. Colour scale: green → blue → red (red = max)
# ============================================================
colour_scale = [
    [0.0, "#16a34a"],   # green
    [0.5, "#2563eb"],   # blue
    [1.0, "#dc2626"],   # red
]

# ============================================================
# 5. Build map
# ============================================================
def build_map(metric_label):
    col = metric_options[metric_label]
    z_vals = safe_z(df_wci[col])

    # Avoid min=max crash
    z_min = float(np.min(z_vals))
    z_max = float(np.max(z_vals))
    if z_max == z_min:
        z_max = z_min + 1e-9

    fig = go.Figure(
        go.Choropleth(
            locations=df_wci["ISO3"],
            z=z_vals,
            text=df_wci["Country"],
            customdata=df_wci["Country"],
            colorscale=colour_scale,
            zmin=z_min,
            zmax=z_max,
            colorbar_title=metric_label,
            marker_line_color="black",
            marker_line_width=0.4,
            hovertemplate="<b>%{text}</b><br>" + metric_label + ": %{z}<extra></extra>"
        )
    )

    fig.update_layout(
        title=f"World Cybercrime Index — {metric_label}",
        geo=dict(
            projection_type="natural earth",
            showframe=False,
            showcoastlines=True,
            coastlinecolor="rgb(80,80,80)",
            landcolor="rgb(240,248,255)",
            oceancolor="rgb(225,235,245)",
            showocean=True,
        ),
        margin=dict(l=20, r=20, t=60, b=20),
        height=550
    )
    return fig

fig_w = go.FigureWidget(build_map(metric_dropdown.value))

# ============================================================
# 6. Update map on metric change
# ============================================================
def update_map(*_):
    metric_label = metric_dropdown.value
    col = metric_options[metric_label]
    
    z_vals = safe_z(df_wci[col])
    z_min = float(np.min(z_vals))
    z_max = float(np.max(z_vals))
    if z_max == z_min:
        z_max = z_min + 1e-9

    fig_w.data[0].z = z_vals
    fig_w.data[0].zmin = z_min
    fig_w.data[0].zmax = z_max
    fig_w.data[0].colorbar.title = metric_label
    fig_w.data[0].hovertemplate = "<b>%{text}</b><br>" + metric_label + ": %{z}<extra></extra>"
    fig_w.layout.title.text = f"World Cybercrime Index — {metric_label}"

metric_dropdown.observe(update_map, names="value")

# ============================================================
# 7. Click handler → accusers bar chart
# ============================================================
def on_click(trace, points, selector):
    if not points.point_inds:
        return
    idx = points.point_inds[0]

    country = df_wci.loc[idx,"Country"]
    country_clean = df_wci.loc[idx,"Country_clean"]

    metric_label = metric_dropdown.value
    col = metric_options[metric_label]
    metric_val = df_wci.loc[idx,col]

    acc_df = acc_nat_factor if accuser_dropdown.value=="nat" else acc_res_factor
    row = acc_df[acc_df["Country_clean"] == country_clean]

    with output:
        output.clear_output(wait=True)
        print(f"=== {country} ===")
        print(f"{metric_label}: {metric_val:.4g}\n")

        if row.empty:
            print("No accuser data.")
            return
        
        r = row.drop(columns=["Country","Country_clean"]).T
        r.columns=["factor"]
        r = r[r["factor"]>0]
        if r.empty:
            print("No non-zero accusations.")
            return

        r["percent"]=r["factor"] * 100
        r = r.sort_values("percent",ascending=False).reset_index()
        r = r.rename(columns={"index":"Accuser"})

        bar = go.Figure(
            go.Bar(
                x=r["percent"],
                y=r["Accuser"],
                orientation="h",
                marker_color="crimson",
                hovertemplate="%{y}: %{x:.1f}%<extra></extra>"
            )
        )
        bar.update_layout(
            title=f"Top accusers of {country} (normalised)",
            xaxis_title="Percentage of respondents",
            margin=dict(l=120,r=40,t=40,b=40),
            height=420
        )
        bar.update_yaxes(autorange="reversed")
        display(bar)

fig_w.data[0].on_click(on_click)

# ============================================================
# 8. Reset button
# ============================================================
def on_reset(_):
    metric_dropdown.value = "WCI"
    accuser_dropdown.value = "nat"
    update_map()
    with output:
        output.clear_output()

reset_button.on_click(on_reset)

# ============================================================
# 9. UI
# ============================================================
ui = VBox([
    HBox([metric_dropdown, accuser_dropdown, reset_button]),
    fig_w,
    output
])

ui

In [None]:
import warnings
warnings.filterwarnings("ignore")

import pandas as pd
import plotly.graph_objects as go
from ipywidgets import VBox, HBox, Dropdown, Button, Output

# ------------------------------------------------------------
# 1. Load data
# ------------------------------------------------------------
# df_wci must already exist in memory at this point.
# If not, uncomment and adjust this:
# df_wci = pd.read_csv("data/df_wci_ready.csv")

# Accuser “factor” tables (already normalised 0–100)
acc_nat = pd.read_csv("data/accusing_factor_nationality.csv")
acc_res = pd.read_csv("data/accusing_factor_residence.csv")

# Normalise country names for joins / lookups
df_wci["Country_clean"] = df_wci["Country"].str.strip().str.lower()
acc_nat["Country_clean"] = acc_nat["Country"].str.strip().str.lower()
acc_res["Country_clean"] = acc_res["Country"].str.strip().str.lower()

acc_nat.set_index("Country_clean", inplace=True)
acc_res.set_index("Country_clean", inplace=True)

# ------------------------------------------------------------
# 2. Metric config & colour scale (green → blue → red)
# ------------------------------------------------------------

metric_options = {
    "WCI":              "WCI",
    "WCI per capita":   "WCI_per_capita",
    "WCI per GDP":      "WCI_per_GDP",
    "Respondents (nat.)": "respondents_nat",
    "Respondents (res.)": "respondents_res",
}

# Natural-ish green → blue → red
gbr_colorscale = [
    [0.0,  "#006400"],   # dark green
    [0.5,  "#1e90ff"],   # dodger blue
    [1.0,  "#b22222"],   # firebrick red
]

# Global z-ranges per metric (so respondents use absolute counts, US is “hot”)
z_ranges = {}
for label, col in metric_options.items():
    vals = pd.to_numeric(df_wci[col], errors="coerce")
    z_ranges[label] = (float(vals.min(skipna=True)), float(vals.max(skipna=True)))

# ------------------------------------------------------------
# 3. Map builder
# ------------------------------------------------------------

def build_map(metric_label: str) -> go.Figure:
    col = metric_options[metric_label]
    vals = pd.to_numeric(df_wci[col], errors="coerce").fillna(0)

    zmin, zmax = z_ranges[metric_label]

    fig = go.Figure(
        go.Choropleth(
            locations=df_wci["ISO3"],
            z=vals,
            text=df_wci["Country"],
            customdata=df_wci["Country"],   # used by click handler
            colorscale=gbr_colorscale,
            zmin=zmin,
            zmax=zmax,
            colorbar_title=metric_label,
            marker_line_color="black",
            marker_line_width=0.4,
            hovertemplate="<b>%{text}</b><br>" +
                          f"{metric_label}: %{z:.4g}<extra></extra>",
        )
    )

    fig.update_layout(
        title=f"World Cybercrime Index — {metric_label}",
        width=1200,
        height=650,
        geo=dict(
            showframe=False,
            showcoastlines=True,
            projection_type="natural earth",  # the layout you liked
            bgcolor="rgba(0,0,0,0)",
        ),
        margin=dict(t=60, b=10, l=10, r=10),
    )
    return fig

# ------------------------------------------------------------
# 4. Widgets
# ------------------------------------------------------------

metric_dropdown = Dropdown(
    options=list(metric_options.keys()),
    value="WCI",
    description="Metric:",
)

accuser_dropdown = Dropdown(
    options=["By nationality", "By residence"],
    value="By nationality",
    description="Accusers:",
)

reset_button = Button(description="Reset", button_style="")
output = Output()

# Create the map widget
fig_widget = go.FigureWidget(build_map(metric_dropdown.value))

# ------------------------------------------------------------
# 5. Helper: rebuild map in-place (so callbacks keep working)
# ------------------------------------------------------------

def rebuild_map(metric_label: str):
    """Rebuild the choropleth inside fig_widget, reattaching the click callback."""
    new_fig = build_map(metric_label)

    with fig_widget.batch_update():
        fig_widget.data = ()  # clear traces
        for tr in new_fig.data:
            fig_widget.add_trace(tr)
        # copy layout (but keep width/height if you want to override)
        fig_widget.layout.update(new_fig.layout)

    # reattach click callback after replacing trace
    if fig_widget.data:
        fig_widget.data[0].on_click(on_click)

# ------------------------------------------------------------
# 6. Click handler: show top accusers bar chart
# ------------------------------------------------------------

def on_click(trace, points, selector):
    if not points.point_inds:
        return

    idx = points.point_inds[0]
    country = df_wci.iloc[idx]["Country"]
    country_key = df_wci.iloc[idx]["Country_clean"]

    metric_label = metric_dropdown.value
    col = metric_options[metric_label]
    metric_val = df_wci.iloc[idx][col]

    # Choose which accuser table
    table = acc_nat if accuser_dropdown.value == "By nationality" else acc_res

    with output:
        output.clear_output(wait=True)

        print(f"=== {country} ===")
        print(f"{metric_label}: {metric_val:.4g}\n")

        if country_key not in table.index:
            print("No accuser data available for this country.")
            return

        row = table.loc[country_key].drop(labels=["Country"], errors="ignore")
        # row is 1D, index = accuser countries, values = percentage or factor
        series = row.astype(float).sort_values(ascending=False)

        # top 10
        top = series.head(10)
        if top.sum() == 0:
            print("No accusations recorded for this country.")
            return

        bar_fig = go.Figure(
            go.Bar(
                x=top.values,
                y=top.index,
                orientation="h",
                marker_color="#b22222",
            )
        )
        bar_fig.update_layout(
            title=f"Top accusers of {country} (normalised)",
            xaxis_title="Accusing factor (normalised to % of that country's respondents)",
            yaxis_title="Accuser country",
            height=400,
            margin=dict(l=120, r=20, t=40, b=40),
            yaxis=dict(autorange="reversed"),  # biggest at top
        )

        bar_fig.show()

# Attach initial click callback
if fig_widget.data:
    fig_widget.data[0].on_click(on_click)

# ------------------------------------------------------------
# 7. Callbacks for widgets
# ------------------------------------------------------------

def on_metric_change(change):
    if change["name"] == "value":
        rebuild_map(change["new"])
        # also clear the lower panel when metric changes
        with output:
            output.clear_output(wait=True)

def on_reset_clicked(_):
    metric_dropdown.value = "WCI"
    accuser_dropdown.value = "By nationality"
    rebuild_map("WCI")
    with output:
        output.clear_output(wait=True)

metric_dropdown.observe(on_metric_change, names="value")
reset_button.on_click(on_reset_clicked)

# ------------------------------------------------------------
# 8. Layout
# ------------------------------------------------------------

ui = VBox([
    HBox([metric_dropdown, accuser_dropdown, reset_button]),
    fig_widget,
    output,
])

ui

In [None]:
import warnings
warnings.filterwarnings("ignore")

import pandas as pd
import plotly.graph_objects as go
from ipywidgets import VBox, HBox, Dropdown, Button, Output


# ------------------------------------------------------------
# 1. Load required data
# ------------------------------------------------------------
# df_wci must already exist with:
# ['Country','ISO3','WCI','Population','GDP','WCI_per_capita',
#  'WCI_per_GDP','respondents_nat','respondents_res']

# If needed, uncomment:
# df_wci = pd.read_csv("data/df_wci_ready.csv")

acc_nat = pd.read_csv("data/accusing_factor_nationality.csv")
acc_res = pd.read_csv("data/accusing_factor_residence.csv")

df_wci["Country_clean"] = df_wci["Country"].str.strip().str.lower()
acc_nat["Country_clean"] = acc_nat["Country"].str.strip().str.lower()
acc_res["Country_clean"] = acc_res["Country"].str.strip().str.lower()

acc_nat = acc_nat.set_index("Country_clean")
acc_res = acc_res.set_index("Country_clean")


# ------------------------------------------------------------
# 2. Metric definitions + colour scale
# ------------------------------------------------------------

metric_options = {
    "WCI":                "WCI",
    "WCI per capita":     "WCI_per_capita",
    "WCI per GDP":        "WCI_per_GDP",
    "Respondents (nat.)": "respondents_nat",
    "Respondents (res.)": "respondents_res",
}

# Green → Blue → Red (low → mid → high)
gbr_colorscale = [
    [0.0,  "#006400"],     # dark green
    [0.50, "#1e90ff"],     # blue
    [1.0,  "#b22222"],     # red
]

# Compute z-ranges so each metric has stable min/max
z_ranges = {}
for label, col in metric_options.items():
    vals = pd.to_numeric(df_wci[col], errors="coerce")
    z_ranges[label] = (float(vals.min(skipna=True)), float(vals.max(skipna=True)))


# ------------------------------------------------------------
# 3. Function to build the choropleth map
# ------------------------------------------------------------

def build_map(metric_label: str) -> go.Figure:
    col = metric_options[metric_label]
    vals = pd.to_numeric(df_wci[col], errors="coerce").fillna(0)

    zmin, zmax = z_ranges[metric_label]

    fig = go.Figure(
        go.Choropleth(
            locations=df_wci["ISO3"],
            z=vals,
            text=df_wci["Country"],
            customdata=df_wci["Country"],   # used by click handler
            colorscale=gbr_colorscale,
            zmin=zmin,
            zmax=zmax,
            marker_line_color="black",
            marker_line_width=0.4,
            colorbar_title=metric_label,
            hovertemplate=(
                "<b>%{text}</b><br>"
                + metric_label + ": %{z:.4g}"
                + "<extra></extra>"
            ),
        )
    )

    fig.update_layout(
        title=f"World Cybercrime Index — {metric_label}",
        width=1200,
        height=650,
        geo=dict(
            showframe=False,
            showcoastlines=True,
            projection_type="natural earth",
        ),
        margin=dict(t=60, b=10, l=10, r=10),
    )
    return fig


# ------------------------------------------------------------
# 4. Widgets
# ------------------------------------------------------------

metric_dropdown = Dropdown(
    options=list(metric_options.keys()),
    value="WCI",
    description="Metric:",
)

accuser_dropdown = Dropdown(
    options=["By nationality", "By residence"],
    value="By nationality",
    description="Accusers:",
)

reset_button = Button(description="Reset")
output = Output()


# ------------------------------------------------------------
# 5. Create interactive FigureWidget
# ------------------------------------------------------------

fig_widget = go.FigureWidget(build_map(metric_dropdown.value))


# ------------------------------------------------------------
# 6. Click handler — show bar chart of accusers
# ------------------------------------------------------------

def on_click(trace, points, selector):
    if not points.point_inds:
        return

    idx = points.point_inds[0]
    country = df_wci.iloc[idx]["Country"]
    country_key = df_wci.iloc[idx]["Country_clean"]

    metric_label = metric_dropdown.value
    metric_col = metric_options[metric_label]
    metric_val = df_wci.iloc[idx][metric_col]

    # choose accuser dataset
    table = acc_nat if accuser_dropdown.value == "By nationality" else acc_res

    with output:
        output.clear_output(wait=True)
        print(f"=== {country} ===")
        print(f"{metric_label}: {metric_val:.4g}\n")

        if country_key not in table.index:
            print("No accuser data.")
            return

        row = table.loc[country_key].drop(labels=["Country"], errors="ignore")
        series = pd.to_numeric(row, errors="coerce").fillna(0)

        # only keep nonzero accusers
        series = series[series > 0].sort_values(ascending=False)

        if series.empty:
            print("No accusations recorded.")
            return

        top = series.head(10)

        bar_fig = go.Figure(
            go.Bar(
                x=top.values,
                y=top.index,
                orientation="h",
                marker_color="#b22222",
            )
        )
        bar_fig.update_layout(
            title=f"Top accusers of {country}",
            xaxis_title="Normalised accusing factor",
            yaxis_title="Country",
            height=400,
            margin=dict(l=140, r=20, t=40, b=40),
            yaxis=dict(autorange="reversed"),
        )

        bar_fig.show()


# Attach click handler
if fig_widget.data:
    fig_widget.data[0].on_click(on_click)


# ------------------------------------------------------------
# 7. Handle metric dropdown change
# ------------------------------------------------------------

def on_metric_change(change):
    if change["name"] != "value":
        return
    new_fig = build_map(change["new"])

    with fig_widget.batch_update():
        fig_widget.data = ()
        for tr in new_fig.data:
            fig_widget.add_trace(tr)
        fig_widget.layout.update(new_fig.layout)

        # reattach click callback
        if fig_widget.data:
            fig_widget.data[0].on_click(on_click)

    with output:
        output.clear_output(wait=True)


metric_dropdown.observe(on_metric_change, names="value")


# ------------------------------------------------------------
# 8. Reset button
# ------------------------------------------------------------

def on_reset_clicked(_):
    metric_dropdown.value = "WCI"
    accuser_dropdown.value = "By nationality"
    on_metric_change({"name": "value", "new": "WCI"})

reset_button.on_click(on_reset_clicked)


# ------------------------------------------------------------
# 9. Final UI layout
# ------------------------------------------------------------

ui = VBox([
    HBox([metric_dropdown, accuser_dropdown, reset_button]),
    fig_widget,
    output
])

ui

In [None]:
import pandas as pd
df = pd.read_csv("data/wci_data.csv")
print(df.columns)

In [None]:
import pandas as pd
import plotly.graph_objs as go
from ipywidgets import Dropdown, HBox, VBox, Output, Button, Layout

# ======================================================
# 1. Load data
# ======================================================

df_wci = pd.read_csv("data/df_wci_with_respondents.csv")

# Ensure expected columns exist
required_cols = [
    "Country", "ISO3", "WCI", "WCI_per_capita", "WCI_per_GDP",
    "respondents_nat", "respondents_res"
]
for c in required_cols:
    if c not in df_wci.columns:
        raise ValueError(f"Missing column in df_wci: {c}")

# Load accusing-factor matrices (already normalised by respondents)
acc_nat = pd.read_csv("data/accusing_factor_nationality.csv")
acc_res = pd.read_csv("data/accusing_factor_residence.csv")

def build_accuser_dict(acc_df: pd.DataFrame):
    out = {}
    for _, row in acc_df.iterrows():
        country = row["Country"]
        r = row.drop(labels=["Country"])
        items = []
        for col, val in r.items():
            try:
                v = float(val)
            except (TypeError, ValueError):
                continue
            if v > 0:
                items.append((col, v))
        items.sort(key=lambda x: x[1], reverse=True)
        out[country] = items
    return out

acc_nat_dict = build_accuser_dict(acc_nat)
acc_res_dict = build_accuser_dict(acc_res)

# ======================================================
# 2. Colour scale: green → blue → red
# ======================================================

gbr_colorscale = [
    [0.00, "#e5f5e0"],  # light green
    [0.30, "#a1d99b"],  # green
    [0.55, "#2b8cbe"],  # blue
    [0.80, "#253494"],  # dark blue
    [1.00, "#d73027"],  # red
]

# ======================================================
# 3. Metric mapping
# ======================================================

metric_to_col = {
    "WCI": "WCI",
    "WCI per capita": "WCI_per_capita",
    "WCI per GDP": "WCI_per_GDP",
    "Respondents (nat.)": "respondents_nat",
    "Respondents (res.)": "respondents_res",
}

# ======================================================
# 4. Build empty FigureWidget once
# ======================================================

# Initialise with WCI
initial_metric = "WCI"
initial_col = metric_to_col[initial_metric]
vals0 = pd.to_numeric(df_wci[initial_col], errors="coerce").fillna(0)
zmin0, zmax0 = float(vals0.min()), float(vals0.max())
if zmin0 == zmax0:
    zmax0 = zmin0 + 1e-9

chor = go.Choropleth(
    locations=df_wci["ISO3"],
    z=vals0,
    text=df_wci["Country"],
    customdata=df_wci["Country"],
    colorscale=gbr_colorscale,
    zmin=zmin0,
    zmax=zmax0,
    marker_line_color="black",
    marker_line_width=0.3,
    hovertemplate="<b>%{text}</b><br>WCI: %{z:.4f}<extra></extra>",
)

map_widget = go.FigureWidget(data=[chor])
map_widget.update_geos(
    showcountries=True,
    countrycolor="black",
    projection_type="natural earth",
    bgcolor="#e6e8ff",
)
map_widget.update_layout(
    title="World Cybercrime Index — WCI",
    height=600,
    margin=dict(t=50, b=10, l=10, r=10),
)

# ======================================================
# 5. Widgets
# ======================================================

metric_dropdown = Dropdown(
    options=list(metric_to_col.keys()),
    value="WCI",
    description="Metric:",
    layout=Layout(width="260px"),
)

accuser_dropdown = Dropdown(
    options=["By nationality", "By residence"],
    value="By nationality",
    description="Accusers:",
    layout=Layout(width="220px"),
)

reset_button = Button(
    description="Reset",
    button_style="warning",
    layout=Layout(width="100px"),
)

info_out = Output()
bar_out = Output()

# ======================================================
# 6. Helper: update map for selected metric
# ======================================================

def update_map(metric_label: str):
    col = metric_to_col[metric_label]
    vals = pd.to_numeric(df_wci[col], errors="coerce").fillna(0)

    vmin = float(vals.min())
    vmax = float(vals.max())
    if vmin == vmax:
        vmax = vmin + 1e-9

    if metric_label in ["WCI per capita", "WCI per GDP"]:
        hover_fmt = ".2e"
    else:
        hover_fmt = ".4f"

    with map_widget.batch_update():
        trace = map_widget.data[0]
        trace.z = vals
        trace.zmin = vmin
        trace.zmax = vmax
        trace.colorscale = gbr_colorscale
        trace.hovertemplate = (
            "<b>%{text}</b><br>"
            + metric_label
            + f": %{{z:{hover_fmt}}}<extra></extra>"
        )
        map_widget.layout.title.text = f"World Cybercrime Index — {metric_label}"

# Apply once so it’s consistent
update_map(initial_metric)

# ======================================================
# 7. Metric change handler
# ======================================================

def on_metric_change(change):
    if change["name"] == "value":
        update_map(change["new"])
        with info_out:
            info_out.clear_output()
        with bar_out:
            bar_out.clear_output()

metric_dropdown.observe(on_metric_change)

# ======================================================
# 8. Reset button
# ======================================================

def on_reset_click(_):
    metric_dropdown.value = "WCI"  # triggers update_map via observer
    with info_out:
        info_out.clear_output()
    with bar_out:
        bar_out.clear_output()

reset_button.on_click(on_reset_click)

# ======================================================
# 9. Click handler → accuser bar chart
# ======================================================

def on_click(trace, points, selector):
    if not points.point_inds:
        return
    idx = points.point_inds[0]
    country = trace.customdata[idx]

    # Pick accuser dict
    acc_dict = acc_nat_dict if accuser_dropdown.value == "By nationality" else acc_res_dict
    acc_list = acc_dict.get(country, [])

    with info_out:
        info_out.clear_output()
        print(f"=== {country} ===")
        metric_label = metric_dropdown.value
        col = metric_to_col[metric_label]
        val = df_wci.loc[df_wci["Country"] == country, col]
        if not val.empty:
            v = float(val.iloc[0])
            if metric_label in ["WCI per capita", "WCI per GDP"]:
                print(f"{metric_label}: {v:.2e}")
            else:
                print(f"{metric_label}: {v:.4f}")
        else:
            print(f"{metric_label}: N/A")

    with bar_out:
        bar_out.clear_output()

        if not acc_list:
            with bar_out:
                print("No accuser data for this country.")
            return

        top = acc_list[:10]
        names = [a for a, _ in top]
        vals = [v for _, v in top]

        # Largest at the top, horizontal bars
        names_rev = names[::-1]
        vals_rev = vals[::-1]

        bar_fig = go.FigureWidget(
            data=[
                go.Bar(
                    x=vals_rev,
                    y=names_rev,
                    orientation="h",
                    marker=dict(
                        color=vals_rev,
                        colorscale=gbr_colorscale,
                        reversescale=False,
                        colorbar=dict(title="% of that accuser’s respondents"),
                    ),
                )
            ]
        )
        bar_fig.update_layout(
            title=f"Top accusers of {country} ({accuser_dropdown.value})",
            height=400,
            margin=dict(l=100, r=20, t=40, b=40),
            xaxis_title="% of that accuser’s respondents nominating this country",
        )
        display(bar_fig)

# Attach click handler
map_widget.data[0].on_click(on_click)

# ======================================================
# 10. Compose UI
# ======================================================

ui = VBox(
    [
        HBox([metric_dropdown, accuser_dropdown, reset_button]),
        map_widget,
        info_out,
        bar_out,
    ]
)

display(ui)

In [None]:
def norm_country(x):
    if not isinstance(x, str):
        return x
    x = x.strip()
    x = x.replace("\u00a0", " ")
    x = x.replace(" (mainland)", "")
    x = x.replace("People's Republic of ", "")
    return x

df_wci["Country"] = df_wci["Country"].apply(norm_country)
acc_nat["Country"] = acc_nat["Country"].apply(norm_country)
acc_res["Country"] = acc_res["Country"].apply(norm_country)

In [None]:
# ======================================================
# REPLACEMENT CLICK HANDLER WITH HARD DEBUGGING
# ======================================================

def on_click(trace, points, selector, acc_nat=acc_nat_dict, acc_res=acc_res_dict):
    import unicodedata, difflib

    if not points.point_inds:
        return

    idx = points.point_inds[0]
    raw = trace.customdata[idx]   # This is whatever Plotly sends back

    # --------------------------------------------------
    # DEBUG BLOCK — prints EXACT underlying string data
    # --------------------------------------------------
    with info_out:
        info_out.clear_output()

        print("=== DEBUG CLICK ===")
        print("Raw customdata repr:", repr(raw))
        print("Raw bytes:", list(raw.encode("utf-8")))
        print("Raw length:", len(raw))

        # Normalise for invisible Unicode differences
        norm = unicodedata.normalize("NFC", raw.strip())
        print("\nNormalized repr:", repr(norm))
        print("Normalized bytes:", list(norm.encode("utf-8")))
        print("Normalized length:", len(norm))

        print("\nExists in acc_nat_dict?")
        print("  raw in dict:", raw in acc_nat)
        print("  norm in dict:", norm in acc_nat)

        print("\nClosest matches to raw:")
        print(difflib.get_close_matches(raw, acc_nat.keys(), n=5))

        print("\nClosest matches to normalized:")
        print(difflib.get_close_matches(norm, acc_nat.keys(), n=5))

    # --------------------------------------------------
    # Use NORMALIZED name for actual lookup
    # --------------------------------------------------
    country = unicodedata.normalize("NFC", raw.strip())
    acc_dict = acc_nat if accuser_dropdown.value == "By nationality" else acc_res
    acc_list = acc_dict.get(country, [])

    # --------------------------------------------------
    # Print selected country + metric
    # --------------------------------------------------
    with info_out:
        print("\n=== SELECTED COUNTRY ===")
        print("Country used for lookup:", repr(country))

        metric_label = metric_dropdown.value
        col = metric_to_col[metric_label]
        row = df_wci.loc[df_wci["Country"] == country, col]

        if not row.empty:
            v = float(row.iloc[0])
            if metric_label in ["WCI per capita", "WCI per GDP"]:
                print(f"{metric_label}: {v:.2e}")
            else:
                print(f"{metric_label}: {v:.4f}")
        else:
            print(f"{metric_label}: N/A")

    # --------------------------------------------------
    # Now show bar chart if accusers exist
    # --------------------------------------------------
    with bar_out:
        bar_out.clear_output()

        if not acc_list:
            print("\nNo accuser data found — check debugging output above.")
            return

        # Top 10
        top = acc_list[:10]
        names = [a for a, _ in top][::-1]
        vals = [v for _, v in top][::-1]

        bar_fig = go.FigureWidget(
            data=[
                go.Bar(
                    x=vals,
                    y=names,
                    orientation="h",
                    marker=dict(
                        color=vals,
                        colorscale=gbr_colorscale,
                        colorbar=dict(title="% of that accuser’s respondents")
                    ),
                )
            ]
        )

        bar_fig.update_layout(
            title=f"Who accuses {country}? ({accuser_dropdown.value})",
            height=400,
            margin=dict(l=120, r=20, t=50, b=40),
            xaxis_title="% of that accuser’s respondents",
        )

        display(bar_fig)


# ATTACH THE CLICK HANDLER
map_widget.data[0].on_click(on_click)

In [None]:
# ======================================================
# HARD DEBUG CLICK HANDLER (prints directly to stdout)
# ======================================================

def on_click_harddebug(trace, points, selector):
    print("\n========== CLICK EVENT ==========")

    if not points.point_inds:
        print("No point indices returned.")
        return

    idx = points.point_inds[0]
    print("Clicked index:", idx)

    raw = trace.customdata[idx]
    print("Raw customdata:", repr(raw))

    # Normalise
    import unicodedata, difflib
    norm = unicodedata.normalize("NFC", raw.strip())

    print("Normalised:", repr(norm))
    print("Bytes:", list(norm.encode("utf-8")))

    print("\nDoes raw exist in acc_nat_dict?", raw in acc_nat_dict)
    print("Does normalised exist in acc_nat_dict?", norm in acc_nat_dict)

    print("\nClosest matches to raw:")
    print(difflib.get_close_matches(raw, acc_nat_dict.keys(), n=5))

    print("\nClosest matches to normalised:")
    print(difflib.get_close_matches(norm, acc_nat_dict.keys(), n=5))

    print("==================================\n")


# FORCE ATTACH THE CLICK HANDLER
map_widget.data[0].on_click(on_click_harddebug)

print("Hard-debug click handler attached.")

In [42]:
import pandas as pd
import plotly.graph_objs as go
from ipywidgets import Dropdown, HBox, VBox, Output, Button, Layout

# ======================================================
# 0. Utility: remove invisible Unicode characters
# ======================================================

def clean_country_string(s):
    if not isinstance(s, str):
        return s
    invis = ["\u200b", "\u200c", "\u200d", "\ufeff", "\u2060", "\u00a0"]
    for ch in invis:
        s = s.replace(ch, "")
    return s.strip()


# ======================================================
# 1. Load data
# ======================================================

df_wci = pd.read_csv("data/df_wci_with_respondents.csv")
df_wci["Country"] = df_wci["Country"].apply(clean_country_string)

required_cols = [
    "Country", "ISO3", "WCI", "WCI_per_capita", "WCI_per_GDP",
    "respondents_nat", "respondents_res"
]
for c in required_cols:
    if c not in df_wci.columns:
        raise ValueError(f"Missing column in df_wci: {c}")

# Load accusing-factor matrices
acc_nat = pd.read_csv("data/accusing_factor_nationality.csv")
acc_res = pd.read_csv("data/accusing_factor_residence.csv")

acc_nat["Country"] = acc_nat["Country"].apply(clean_country_string)
acc_res["Country"] = acc_res["Country"].apply(clean_country_string)
acc_nat.columns = [clean_country_string(c) for c in acc_nat.columns]
acc_res.columns = [clean_country_string(c) for c in acc_res.columns]


# ======================================================
# 2. Build accuser dictionaries
# ======================================================

def build_accuser_dict(acc_df):
    out = {}
    for _, row in acc_df.iterrows():
        country = clean_country_string(row["Country"])
        r = row.drop(labels=["Country"])
        items = []
        for col, val in r.items():
            try:
                v = float(val)
            except:
                continue
            if v > 0:
                items.append((clean_country_string(col), v))
        items.sort(key=lambda x: x[1], reverse=True)
        out[country] = items
    return out

acc_nat_dict = build_accuser_dict(acc_nat)
acc_res_dict = build_accuser_dict(acc_res)


# ======================================================
# 3. Colour scale
# ======================================================

gbr_colorscale = [
    [0.00, "#e5f5e0"],
    [0.30, "#a1d99b"],
    [0.55, "#2b8cbe"],
    [0.80, "#253494"],
    [1.00, "#d73027"]
]


# ======================================================
# 4. Metric mapping
# ======================================================

metric_to_col = {
    "WCI": "WCI",
    "WCI per capita": "WCI_per_capita",
    "WCI per GDP": "WCI_per_GDP",
    "Respondents (nat.)": "respondents_nat",
    "Respondents (res.)": "respondents_res",
}


# ======================================================
# 5. Build initial FigureWidget
# ======================================================

initial_metric = "WCI"
initial_col = metric_to_col[initial_metric]
vals0 = pd.to_numeric(df_wci[initial_col], errors="coerce").fillna(0)

zmin0, zmax0 = float(vals0.min()), float(vals0.max())
if zmin0 == zmax0:
    zmax0 = zmin0 + 1e-9

chor = go.Choropleth(
    locations=df_wci["ISO3"],
    z=vals0,
    text=df_wci["Country"],
    customdata=df_wci["ISO3"],       # use ISO3 for stable lookups
    colorscale=gbr_colorscale,
    zmin=zmin0,
    zmax=zmax0,
    marker_line_color="black",
    marker_line_width=0.3,
    hoverinfo="location+text",       # <<< CRITICAL FIX for click events
    hovertemplate="<b>%{text}</b><br>WCI: %{z:.4f}<extra></extra>",
)

map_widget = go.FigureWidget(data=[chor])
map_widget.update_geos(
    showcountries=True,
    countrycolor="black",
    projection_type="natural earth",
    bgcolor="#e6e8ff",
)
map_widget.update_layout(
    title="WCI — WCI",
    height=600,
    margin=dict(t=50, b=10, l=10, r=10),
)


# ======================================================
# 6. Widgets
# ======================================================

metric_dropdown = Dropdown(
    options=list(metric_to_col.keys()),
    value="WCI",
    description="Metric:",
    layout=Layout(width="250px"),
)

accuser_dropdown = Dropdown(
    options=["By nationality", "By residence"],
    value="By nationality",
    description="Accusers:",
    layout=Layout(width="220px"),
)

reset_button = Button(
    description="Reset",
    button_style="warning",
    layout=Layout(width="100px"),
)

info_out = Output()
bar_out = Output()


# ======================================================
# 7. Update map
# ======================================================

def update_map(metric_label):
    col = metric_to_col[metric_label]
    vals = pd.to_numeric(df_wci[col], errors="coerce").fillna(0)

    vmin, vmax = float(vals.min()), float(vals.max())
    if vmin == vmax:
        vmax = vmin + 1e-9

    fmt = ".2e" if metric_label in ["WCI per capita", "WCI per GDP"] else ".4f"

    with map_widget.batch_update():
        trace = map_widget.data[0]
        trace.z = vals
        trace.zmin = vmin
        trace.zmax = vmax
        trace.hovertemplate = f"<b>%{{text}}</b><br>{metric_label}: %{{z:{fmt}}}<extra></extra>"
        map_widget.layout.title.text = f"WCI — {metric_label}"


update_map(initial_metric)


# ======================================================
# 8. Metric change callback
# ======================================================

def on_metric_change(change):
    if change["name"] == "value":
        update_map(change["new"])
        info_out.clear_output()
        bar_out.clear_output()

metric_dropdown.observe(on_metric_change)


# ======================================================
# 9. Reset button
# ======================================================

def on_reset_click(_):
    metric_dropdown.value = "WCI"

reset_button.on_click(on_reset_click)


# ======================================================
# 10. Click handler
# ======================================================

def on_click(trace, points, selector):
    if not points.point_inds:
        return

    idx = points.point_inds[0]
    iso3 = trace.customdata[idx]

    # Find country name via ISO3
    row = df_wci.loc[df_wci["ISO3"] == iso3]
    if row.empty:
        return
    country = row["Country"].iloc[0]

    acc_dict = acc_nat_dict if accuser_dropdown.value == "By nationality" else acc_res_dict
    acc_list = acc_dict.get(country, [])

    metric_label = metric_dropdown.value
    col = metric_to_col[metric_label]
    val = row[col].iloc[0]

    info_out.clear_output()
    with info_out:
        print(f"=== {country} ===")
        if metric_label in ["WCI per capita", "WCI per GDP"]:
            print(f"{metric_label}: {val:.2e}")
        else:
            print(f"{metric_label}: {val:.4f}")

    bar_out.clear_output()

    if not acc_list:
        with bar_out:
            print("No accuser data for this country.")
        return

    top = acc_list[:10]
    names = [n for n, _ in top]
    vals = [v for _, v in top]

    names_rev = names[::-1]
    vals_rev = vals[::-1]

    bar_fig = go.FigureWidget(
        data=[
            go.Bar(
                x=vals_rev,
                y=names_rev,
                orientation="h",
                marker=dict(
                    color=vals_rev,
                    colorscale=gbr_colorscale,
                    colorbar=dict(title="% of accusers")
                ),
            )
        ]
    )

    bar_fig.update_layout(
        title=f"Who accuses {country}? ({accuser_dropdown.value})",
        height=400,
        margin=dict(l=120, r=20, t=40, b=40),
        xaxis_title="% of accuser respondents",
    )

    display(bar_fig)


map_widget.data[0].on_click(on_click)


# ======================================================
# 11. Compose UI
# ======================================================

ui = VBox([
    HBox([metric_dropdown, accuser_dropdown, reset_button]),
    map_widget,
    info_out,
    bar_out,
])

display(ui)

VBox(children=(HBox(children=(Dropdown(description='Metric:', layout=Layout(width='250px'), options=('WCI', 'W…

FigureWidget({
    'data': [{'marker': {'color': [41.66666666666667, 54.166666666666664,
                                   58.333333333333336, 95.83333333333334,
                                   129.16666666666669, 154.16666666666669,
                                   170.83333333333331, 175.0, 258.33333333333337,
                                   395.83333333333337],
                         'colorbar': {'title': {'text': '% of accusers'}},
                         'colorscale': [[0.0, '#e5f5e0'], [0.3, '#a1d99b'], [0.55,
                                        '#2b8cbe'], [0.8, '#253494'], [1.0,
                                        '#d73027']]},
              'orientation': 'h',
              'type': 'bar',
              'uid': '592337bc-19d6-4fc5-86c5-6d83e1ad3c18',
              'x': [41.66666666666667, 54.166666666666664, 58.333333333333336,
                    95.83333333333334, 129.16666666666669, 154.16666666666669,
                    170.83333333333331, 175.0, 258.333

FigureWidget({
    'data': [{'marker': {'color': [75.0, 100.0, 100.0, 100.0, 125.0, 200.0, 200.0,
                                   225.0, 325.0, 375.0],
                         'colorbar': {'title': {'text': '% of accusers'}},
                         'colorscale': [[0.0, '#e5f5e0'], [0.3, '#a1d99b'], [0.55,
                                        '#2b8cbe'], [0.8, '#253494'], [1.0,
                                        '#d73027']]},
              'orientation': 'h',
              'type': 'bar',
              'uid': 'b268e4e8-d3bd-4943-8896-b41ae8fcecae',
              'x': [75.0, 100.0, 100.0, 100.0, 125.0, 200.0, 200.0, 225.0, 325.0,
                    375.0],
              'y': [Brazil, South Africa, Israel, India, Belarus, Romania,
                    Nigeria, United States, China, Russia]}],
    'layout': {'height': 400,
               'margin': {'b': 40, 'l': 120, 'r': 20, 't': 40},
               'template': '...',
               'title': {'text': 'Who accuses Nigeria? (By

In [43]:
# ================================================================
# CLEAN REBUILD OF THE ENTIRE WCI INTERACTIVE NOTEBOOK BLOCK
# ================================================================

import pandas as pd
import plotly.graph_objs as go
from ipywidgets import Dropdown, HBox, VBox, Output, Button, Layout

# -------------------------------------------------------
# Utility: remove invisible Unicode characters everywhere
# -------------------------------------------------------
def clean_name(s):
    if not isinstance(s, str):
        return s
    invis = ["\u200b","\u200c","\u200d","\ufeff","\u2060","\u00a0"]
    for ch in invis:
        s = s.replace(ch,"")
    return s.strip()


# -------------------------------------------------------
# Load and clean data
# -------------------------------------------------------
df_wci = pd.read_csv("data/df_wci_with_respondents.csv")
df_wci["Country"] = df_wci["Country"].apply(clean_name)

acc_nat = pd.read_csv("data/accusing_factor_nationality.csv")
acc_res = pd.read_csv("data/accusing_factor_residence.csv")

acc_nat["Country"] = acc_nat["Country"].apply(clean_name)
acc_res["Country"] = acc_res["Country"].apply(clean_name)

acc_nat.columns = [clean_name(c) for c in acc_nat.columns]
acc_res.columns = [clean_name(c) for c in acc_res.columns]


# -------------------------------------------------------
# Build accuser dictionaries (ignore tiny floating noise)
# -------------------------------------------------------
def build_dict(df):
    out = {}
    for _, row in df.iterrows():
        base = clean_name(row["Country"])
        items = []
        for col, val in row.items():
            if col == "Country":
                continue
            try:
                v = float(val)
            except:
                continue
            if v > 0.001:
                items.append((clean_name(col), v))
        items.sort(key=lambda x: x[1], reverse=True)
        out[base] = items
    return out

acc_nat_dict = build_dict(acc_nat)
acc_res_dict = build_dict(acc_res)


# -------------------------------------------------------
# Metric mapping
# -------------------------------------------------------
metric_to_col = {
    "WCI": "WCI",
    "WCI per capita": "WCI_per_capita",
    "WCI per GDP": "WCI_per_GDP",
    "Respondents (nat.)": "respondents_nat",
    "Respondents (res.)": "respondents_res",
}

gbr_colorscale = [
    [0.00, "#e5f5e0"],
    [0.30, "#a1d99b"],
    [0.55, "#2b8cbe"],
    [0.80, "#253494"],
    [1.00, "#d73027"],
]


# -------------------------------------------------------
# Build a brand-new fully clean figure
# -------------------------------------------------------
def make_figure(metric_label):
    column = metric_to_col[metric_label]
    vals = pd.to_numeric(df_wci[column], errors="coerce").fillna(0)

    mn, mx = float(vals.min()), float(vals.max())
    if mn == mx:
        mx = mn + 1e-9

    hover = ".2e" if "per" in metric_label else ".4f"

    fig = go.FigureWidget(
        data=[
            go.Choropleth(
                locations=df_wci["ISO3"],
                z=vals,
                text=df_wci["Country"],
                customdata=df_wci["Country"],   # FULLY rebuilt
                colorscale=gbr_colorscale,
                zmin=mn,
                zmax=mx,
                marker_line_color="black",
                marker_line_width=0.3,
                hovertemplate=f"<b>%{{text}}</b><br>{metric_label}: %{{z:{hover}}}<extra></extra>"
            )
        ]
    )

    fig.update_geos(
        showcountries=True,
        countrycolor="black",
        projection_type="natural earth",
        bgcolor="#eef1ff"
    )

    fig.update_layout(
        title=f"WCI — {metric_label}",
        height=600,
        margin=dict(t=50,b=10,l=10,r=10)
    )

    return fig


# -------------------------------------------------------
# Widgets
# -------------------------------------------------------
metric_dropdown = Dropdown(
    options=list(metric_to_col.keys()),
    value="WCI",
    description="Metric:",
    layout=Layout(width="240px")
)

accuser_dropdown = Dropdown(
    options=["By nationality","By residence"],
    value="By nationality",
    description="Accusers:",
    layout=Layout(width="220px")
)

reset_button = Button(
    description="Reset",
    button_style="warning",
    layout=Layout(width="100px")
)

info_out = Output()
bar_out = Output()


# -------------------------------------------------------
# Instantiate the first clean figure
# -------------------------------------------------------
map_widget = make_figure("WCI")


# -------------------------------------------------------
# Metric change always rebuilds the *entire* figure
# -------------------------------------------------------
def on_metric_change(change):
    if change["name"] != "value":
        return

    new_metric = change["new"]
    new_fig = make_figure(new_metric)

    # Replace entire map cleanly
    map_widget.data = new_fig.data
    map_widget.layout = new_fig.layout
    map_widget._props = new_fig._props

    info_out.clear_output()
    bar_out.clear_output()

metric_dropdown.observe(on_metric_change)


# -------------------------------------------------------
# Reset button
# -------------------------------------------------------
def on_reset_click(_):
    metric_dropdown.value = "WCI"
    info_out.clear_output()
    bar_out.clear_output()

reset_button.on_click(on_reset_click)


# -------------------------------------------------------
# Click handler — fully correct & non-sticky
# -------------------------------------------------------
def on_click(trace, points, selector):
    if not points.point_inds:
        return

    idx = points.point_inds[0]
    country_raw = trace.customdata[idx]
    country = clean_name(country_raw)

    # Pick accuser dictionary
    acc_dict = acc_nat_dict if accuser_dropdown.value == "By nationality" else acc_res_dict
    acc_list = acc_dict.get(country, [])

    # ----------------------
    # Info panel
    # ----------------------
    info_out.clear_output()
    with info_out:
        print(f"=== {country} ===")
        col = metric_to_col[metric_dropdown.value]
        val = df_wci.loc[df_wci["Country"] == country, col]
        if val.empty:
            print(f"{metric_dropdown.value}: N/A")
        else:
            v = float(val.iloc[0])
            fmt = ".2e" if "per" in metric_dropdown.value else ".4f"
            print(f"{metric_dropdown.value}: {format(v, fmt)}")

    # ----------------------
    # Bar chart panel
    # ----------------------
    bar_out.clear_output()

    if not acc_list:
        with bar_out:
            print("No accuser data for this country.")
        return

    top = acc_list[:10]
    names = [n for n, _ in top][::-1]
    vals = [v for _, v in top][::-1]

    bar_fig = go.FigureWidget(
        data=[
            go.Bar(
                x=vals,
                y=names,
                orientation="h",
                marker=dict(color=vals, colorscale=gbr_colorscale),
            )
        ]
    )

    bar_fig.update_layout(
        title=f"Who accuses {country}? ({accuser_dropdown.value})",
        height=400,
        margin=dict(l=120,r=20,t=40,b=40),
        xaxis_title="% of accuser respondents"
    )

    with bar_out:
        display(bar_fig)

map_widget.data[0].on_click(on_click)


# -------------------------------------------------------
# UI
# -------------------------------------------------------
ui = VBox([
    HBox([metric_dropdown, accuser_dropdown, reset_button]),
    map_widget,
    info_out,
    bar_out
])

ui

VBox(children=(HBox(children=(Dropdown(description='Metric:', layout=Layout(width='240px'), options=('WCI', 'W…

In [None]:
import pandas as pd
import plotly.graph_objs as go
from ipywidgets import Dropdown, HBox, VBox, Output, Button, Layout

# ---------------------------------------------------------
# Utility: strip invisible Unicode characters
# ---------------------------------------------------------
def clean(s):
    if not isinstance(s, str):
        return s
    hidden = ["\u200b", "\u200c", "\u200d", "\ufeff", "\u2060", "\u00a0"]
    for h in hidden:
        s = s.replace(h, "")
    return s.strip()


# ---------------------------------------------------------
# Load main WCI dataset
# ---------------------------------------------------------
df_wci = pd.read_csv("data/df_wci_with_respondents.csv")
df_wci["Country"] = df_wci["Country"].apply(clean)

metric_to_col = {
    "WCI": "WCI",
    "WCI per capita": "WCI_per_capita",
    "WCI per GDP": "WCI_per_GDP",
    "Respondents (nat.)": "respondents_nat",
    "Respondents (res.)": "respondents_res",
}

# ---------------------------------------------------------
# Load accusing-factor matrices (normalised)
# ---------------------------------------------------------
acc_nat = pd.read_csv("data/accusing_factor_nationality.csv")
acc_res = pd.read_csv("data/accusing_factor_residence.csv")

# Clean column names
acc_nat.columns = [clean(c) for c in acc_nat.columns]
acc_res.columns = [clean(c) for c in acc_res.columns]
acc_nat["Country"] = acc_nat["Country"].apply(clean)
acc_res["Country"] = acc_res["Country"].apply(clean)

# ---------------------------------------------------------
# Build COLUMN-BASED accuser lookup
# ---------------------------------------------------------
def build_column_accusers(df):
    out = {}
    cols = [c for c in df.columns if c != "Country"]
    for accused in cols:
        series = df[["Country", accused]].copy()
        series = series[pd.to_numeric(series[accused], errors="coerce").fillna(0) > 0]
        pairs = [(clean(r["Country"]), float(r[accused])) for _, r in series.iterrows()]
        pairs.sort(key=lambda x: x[1], reverse=True)
        out[clean(accused)] = pairs
    return out

acc_nat_dict = build_column_accusers(acc_nat)
acc_res_dict = build_column_accusers(acc_res)


# ---------------------------------------------------------
# Colour scale
# ---------------------------------------------------------
gbr_colorscale = [
    [0.00, "#e5f5e0"],
    [0.30, "#a1d99b"],
    [0.55, "#2b8cbe"],
    [0.80, "#253494"],
    [1.00, "#d73027"],
]


# ---------------------------------------------------------
# Build initial choropleth map
# ---------------------------------------------------------
initial_metric = "WCI"
col0 = metric_to_col[initial_metric]
vals0 = pd.to_numeric(df_wci[col0], errors="coerce").fillna(0)
zmin0, zmax0 = float(vals0.min()), float(vals0.max())
if zmax0 == zmin0:
    zmax0 = zmin0 + 1e-9

chor = go.Choropleth(
    locations=df_wci["ISO3"],
    z=vals0,
    text=df_wci["Country"],
    customdata=df_wci["Country"],
    colorscale=gbr_colorscale,
    zmin=zmin0,
    zmax=zmax0,
    marker_line_color="black",
    marker_line_width=0.3,
    hovertemplate="<b>%{text}</b><br>WCI: %{z:.4f}<extra></extra>",
)

map_widget = go.FigureWidget([chor])
map_widget.update_geos(
    showcountries=True,
    countrycolor="black",
    projection_type="natural earth",
    bgcolor="#e6e8ff",
)
map_widget.update_layout(
    title="WCI — WCI",
    height=600,
    margin=dict(t=50, b=10, l=10, r=10),
)


# ---------------------------------------------------------
# Widgets
# ---------------------------------------------------------
metric_dropdown = Dropdown(
    options=list(metric_to_col.keys()),
    value="WCI",
    description="Metric:",
    layout=Layout(width="250px"),
)

accuser_dropdown = Dropdown(
    options=["By nationality", "By residence"],
    value="By nationality",
    description="Accusers:",
    layout=Layout(width="220px"),
)

reset_button = Button(
    description="Reset",
    button_style="warning",
    layout=Layout(width="100px"),
)

info_out = Output()
bar_out = Output()


# ---------------------------------------------------------
# Map updater
# ---------------------------------------------------------
def update_map(metric_label):
    col = metric_to_col[metric_label]
    vals = pd.to_numeric(df_wci[col], errors="coerce").fillna(0)
    vmin, vmax = float(vals.min()), float(vals.max())
    if vmin == vmax:
        vmax = vmin + 1e-9

    fmt = ".2e" if metric_label in ["WCI per capita", "WCI per GDP"] else ".4f"

    with map_widget.batch_update():
        tr = map_widget.data[0]
        tr.z = vals
        tr.zmin = vmin
        tr.zmax = vmax
        tr.hovertemplate = (
            f"<b>%{{text}}</b><br>{metric_label}: %{{z:{fmt}}}<extra></extra>"
        )
        map_widget.layout.title.text = f"WCI — {metric_label}"


# ---------------------------------------------------------
# Metric change callback
# ---------------------------------------------------------
def on_metric_change(change):
    if change["name"] == "value":
        update_map(change["new"])
        info_out.clear_output()
        bar_out.clear_output()

metric_dropdown.observe(on_metric_change)


# ---------------------------------------------------------
# Reset button
# ---------------------------------------------------------
def on_reset(_):
    metric_dropdown.value = "WCI"
    info_out.clear_output()
    bar_out.clear_output()

reset_button.on_click(on_reset)


# ---------------------------------------------------------
# Click callback (COLUMN-based accusers)
# ---------------------------------------------------------
def on_click(trace, points, selector):
    if not points.point_inds:
        return

    idx = points.point_inds[0]
    country = clean(trace.customdata[idx])

    acc_dict = acc_nat_dict if accuser_dropdown.value == "By nationality" else acc_res_dict
    acc_list = acc_dict.get(country, [])

    # Display metric for selected country
    metric = metric_dropdown.value
    col = metric_to_col[metric]
    val = df_wci.loc[df_wci["Country"] == country, col]
    number = float(val.iloc[0]) if not val.empty else None

    info_out.clear_output()
    with info_out:
        print(f"=== {country} ===")
        if number is None:
            print(f"{metric}: N/A")
        else:
            if metric in ["WCI per capita", "WCI per GDP"]:
                print(f"{metric}: {number:.2e}")
            else:
                print(f"{metric}: {number:.4f}")

    # Plot accusers
    bar_out.clear_output()

    if not acc_list:
        with bar_out:
            print("No accusers found.")
        return

    top = acc_list[:10]
    names = [n for n, _ in top][::-1]
    vals = [v for _, v in top][::-1]

    fig = go.FigureWidget(
        [
            go.Bar(
                x=vals,
                y=names,
                orientation="h",
                marker=dict(
                    color=vals,
                    colorscale=gbr_colorscale,
                    colorbar=dict(title="% accusing"),
                ),
            )
        ]
    )
    fig.update_layout(
        title=f"Who accuses {country}? ({accuser_dropdown.value})",
        height=400,
        margin=dict(l=120, r=20, t=40, b=40),
        xaxis_title="% of respondents",
    )

    with bar_out:
        display(fig)


map_widget.data[0].on_click(on_click)


# ---------------------------------------------------------
# Display UI
# ---------------------------------------------------------
ui = VBox([
    HBox([metric_dropdown, accuser_dropdown, reset_button]),
    map_widget,
    info_out,
    bar_out,
])

display(ui)

VBox(children=(HBox(children=(Dropdown(description='Metric:', layout=Layout(width='250px'), options=('WCI', 'W…

In [49]:
import pandas as pd
import plotly.graph_objs as go
from ipywidgets import Dropdown, HBox, VBox, Output, Button, Layout

# =========================================================
# 0. Utility
# =========================================================

def clean(s):
    if not isinstance(s, str):
        return s
    for ch in ["\u200b", "\u200c", "\u200d", "\ufeff", "\u2060", "\u00a0"]:
        s = s.replace(ch, "")
    return s.strip()

# =========================================================
# 1. Load WCI
# =========================================================

df_wci = pd.read_csv("data/df_wci_with_respondents.csv")
df_wci["Country"] = df_wci["Country"].apply(clean)

metric_to_col = {
    "WCI": "WCI",
    "WCI per capita": "WCI_per_capita",
    "WCI per GDP": "WCI_per_GDP",
    "Respondents (nat.)": "respondents_nat",
    "Respondents (res.)": "respondents_res",
}

# =========================================================
# 2. Load RAW accusation counts
# =========================================================

raw_nat = pd.read_csv("data/accusations_nationality.csv")
raw_res = pd.read_csv("data/accusations_residence.csv")

for df in (raw_nat, raw_res):
    df["Country"] = df["Country"].apply(clean)
    df.columns = [clean(c) for c in df.columns]

# =========================================================
# 3. Convert counts → percentages (per accuser country)
# =========================================================

def to_percent(df):
    out = df.copy()
    for i, row in out.iterrows():
        total = row.drop("Country").sum()
        if total > 0:
            out.loc[i, out.columns != "Country"] = row.drop("Country") / total * 100
        else:
            out.loc[i, out.columns != "Country"] = 0.0
    return out

nat_pct = to_percent(raw_nat)
res_pct = to_percent(raw_res)

# =========================================================
# 4. Build lookup dicts: accused → [(accuser, %)]
# =========================================================

def build_lookup(df):
    d = {}
    for _, row in df.iterrows():
        accused = row["Country"]
        items = []
        for accuser, val in row.drop("Country").items():
            val = float(val)
            if val > 0:
                items.append((accuser, val))
        items.sort(key=lambda x: x[1], reverse=True)
        d[accused] = items
    return d

acc_nat = build_lookup(nat_pct)
acc_res = build_lookup(res_pct)

# =========================================================
# 5. Colour scale
# =========================================================

gbr = [
    [0.00, "#e5f5e0"],
    [0.30, "#a1d99b"],
    [0.55, "#2b8cbe"],
    [0.80, "#253494"],
    [1.00, "#d73027"],
]

# =========================================================
# 6. Build map widget
# =========================================================

metric0 = "WCI"
vals0 = pd.to_numeric(df_wci[metric_to_col[metric0]], errors="coerce").fillna(0)

chor = go.Choropleth(
    locations=df_wci["ISO3"],
    z=vals0,
    text=df_wci["Country"],
    customdata=df_wci["Country"],
    colorscale=gbr,
    zmin=float(vals0.min()),
    zmax=float(vals0.max()),
    marker_line_color="black",
    marker_line_width=0.3,
    hovertemplate="<b>%{text}</b><br>WCI: %{z:.4f}<extra></extra>"
)

map_widget = go.FigureWidget([chor])
map_widget.update_geos(
    showcountries=True,
    countrycolor="black",
    projection_type="natural earth",
    bgcolor="#e6e8ff"
)
map_widget.update_layout(
    title="World Cybercrime Index — WCI",
    height=600,
    margin=dict(t=50, b=10, l=10, r=10)
)

# =========================================================
# 7. Widgets
# =========================================================

metric_dd = Dropdown(
    options=list(metric_to_col.keys()),
    value="WCI",
    description="Metric:",
    layout=Layout(width="260px")
)

accuser_dd = Dropdown(
    options=["By nationality", "By residence"],
    value="By nationality",
    description="Accusers:",
    layout=Layout(width="220px")
)

reset_btn = Button(description="Reset", button_style="warning")

info_out = Output()
bar_out = Output()

# =========================================================
# 8. Metric update
# =========================================================

def update_map(metric):
    col = metric_to_col[metric]
    vals = pd.to_numeric(df_wci[col], errors="coerce").fillna(0)

    vmin, vmax = float(vals.min()), float(vals.max())
    if vmin == vmax:
        vmax = vmin + 1e-9

    fmt = ".2e" if metric.startswith("WCI per") else ".4f"

    with map_widget.batch_update():
        tr = map_widget.data[0]
        tr.z = vals
        tr.zmin = vmin
        tr.zmax = vmax
        tr.hovertemplate = f"<b>%{{text}}</b><br>{metric}: %{{z:{fmt}}}<extra></extra>"
        map_widget.layout.title.text = f"World Cybercrime Index — {metric}"

def on_metric(change):
    update_map(change["new"])
    info_out.clear_output()
    bar_out.clear_output()

metric_dd.observe(on_metric)

# =========================================================
# 9. Click handler (FIXED)
# =========================================================

try:
    map_widget.data[0].on_click(None)
except:
    pass

def on_click(trace, points, _):
    if not points.point_inds:
        return

    idx = points.point_inds[0]
    country = trace.customdata[idx]

    info_out.clear_output()
    bar_out.clear_output()

    acc_dict = acc_nat if accuser_dd.value == "By nationality" else acc_res
    acc_list = acc_dict.get(country, [])

    metric = metric_dd.value
    val = df_wci.loc[df_wci["Country"] == country, metric_to_col[metric]]

    with info_out:
        print(f"=== {country} ===")
        if not val.empty:
            v = float(val.iloc[0])
            print(f"{metric}: {v:.2e}" if metric.startswith("WCI per") else f"{metric}: {v:.4f}")

    if not acc_list:
        with bar_out:
            print("No accuser data for this country.")
        return

    names = [a for a, _ in acc_list[:10]][::-1]
    vals = [v for _, v in acc_list[:10]][::-1]

    fig = go.FigureWidget(
        [go.Bar(
            x=vals,
            y=names,
            orientation="h",
            marker=dict(color=vals, colorscale=gbr)
        )]
    )

    fig.update_layout(
        title=f"Who accuses {country}? ({accuser_dd.value})",
        height=420,
        margin=dict(l=120, r=20, t=50, b=40),
        xaxis_title="% of accuser's total accusations"
    )

    with bar_out:
        display(fig)

map_widget.data[0].on_click(on_click)

# =========================================================
# 10. Reset
# =========================================================

def on_reset(_):
    metric_dd.value = "WCI"
    info_out.clear_output()
    bar_out.clear_output()

reset_btn.on_click(on_reset)

# =========================================================
# 11. UI
# =========================================================

display(VBox([
    HBox([metric_dd, accuser_dd, reset_btn]),
    map_widget,
    info_out,
    bar_out
]))

VBox(children=(HBox(children=(Dropdown(description='Metric:', layout=Layout(width='260px'), options=('WCI', 'W…

TypeError: unhashable type: 'dict'

TypeError: unhashable type: 'dict'

TypeError: unhashable type: 'dict'

In [50]:
import pandas as pd

# =====================================
# 0. Utility
# =====================================

def clean_country_string(s):
    if not isinstance(s, str):
        return s
    invis = ["\u200b", "\u200c", "\u200d", "\ufeff", "\u2060", "\u00a0"]
    for ch in invis:
        s = s.replace(ch, "")
    return s.strip()

# =====================================
# 1. Load response matrices
# =====================================

nat = pd.read_csv("data/accusations_nationality.csv")
res = pd.read_csv("data/accusations_residence.csv")

for df in (nat, res):
    df["Country"] = df["Country"].apply(clean_country_string)
    df.columns = [clean_country_string(c) for c in df.columns]

# =====================================
# 2. Collapse to response totals
# =====================================

def build_response_totals(df):
    """
    Each row = respondent country
    Each cell = accusation count
    Response total = row sum
    """
    totals = df.copy()
    totals["Responses"] = totals.drop(columns=["Country"]).sum(axis=1)
    return totals[["Country", "Responses"]]

nat_responses = build_response_totals(nat)
res_responses = build_response_totals(res)

# =====================================
# 3. Sanity checks
# =====================================

print("Nationality responses (top 10):")
print(nat_responses.sort_values("Responses", ascending=False).head(10))

print("\nResidence responses (top 10):")
print(res_responses.sort_values("Responses", ascending=False).head(10))

# =====================================
# 4. Save ground-truth CSVs
# =====================================

nat_responses.to_csv(
    "data/responses_by_nationality.csv",
    index=False
)

res_responses.to_csv(
    "data/responses_by_residence.csv",
    index=False
)

print("\nSaved:")
print(" - data/responses_by_nationality.csv")
print(" - data/responses_by_residence.csv")

Nationality responses (top 10):
           Country  Responses
0               --        563
76          Russia        304
92         Ukraine        202
21           China        162
95   United States        154
69         Nigeria        143
75         Romania         96
52    Korea, North         64
14          Brazil         63
94  United Kingdom         57

Residence responses (top 10):
           Country  Responses
0               --        563
76          Russia        304
92         Ukraine        202
21           China        162
95   United States        154
69         Nigeria        143
75         Romania         96
52    Korea, North         64
14          Brazil         63
94  United Kingdom         57

Saved:
 - data/responses_by_nationality.csv
 - data/responses_by_residence.csv


In [54]:
import pandas as pd

# =====================================
# Utility
# =====================================

def clean_country_string(s):
    if not isinstance(s, str):
        return s
    invis = ["\u200b", "\u200c", "\u200d", "\ufeff", "\u2060", "\u00a0"]
    for ch in invis:
        s = s.replace(ch, "")
    return s.strip()

TOTAL_RESPONSES = 92

# =====================================
# Load source data
# =====================================

df = pd.read_csv("data/df_wci_with_respondents.csv")
df["Country"] = df["Country"].apply(clean_country_string)

required = ["Country", "respondents_nat", "respondents_res"]
missing = [c for c in required if c not in df.columns]
if missing:
    raise ValueError(f"Missing required columns: {missing}")

# =====================================
# Base respondent table
# =====================================

respondents = (
    df[["Country", "respondents_nat", "respondents_res"]]
    .rename(columns={
        "respondents_nat": "Respondents_by_nationality",
        "respondents_res": "Respondents_by_residence"
    })
    .copy()
)

# =====================================
# Compute "Not given"
# =====================================

nat_sum = respondents["Respondents_by_nationality"].sum()
res_sum = respondents["Respondents_by_residence"].sum()

nat_missing = TOTAL_RESPONSES - nat_sum
res_missing = TOTAL_RESPONSES - res_sum

if nat_missing < 0 or res_missing < 0:
    raise ValueError("Respondent counts exceed total responses")

not_given_row = pd.DataFrame([{
    "Country": "Not given",
    "Respondents_by_nationality": nat_missing,
    "Respondents_by_residence": res_missing
}])

# =====================================
# Final table
# =====================================

respondents_final = (
    pd.concat([respondents, not_given_row], ignore_index=True)
    .sort_values("Country")
    .reset_index(drop=True)
)

# =====================================
# Final sanity check (hard invariant)
# =====================================

assert respondents_final["Respondents_by_nationality"].sum() == TOTAL_RESPONSES
assert respondents_final["Respondents_by_residence"].sum() == TOTAL_RESPONSES

print("Final respondent totals (guaranteed correct):")
print(respondents_final.sum(numeric_only=True))

# =====================================
# Save ground truth
# =====================================

out_path = "data/respondents_by_country.csv"
respondents_final.to_csv(out_path, index=False)
print(f"\nSaved: {out_path}")

Final respondent totals (guaranteed correct):
Respondents_by_nationality    92
Respondents_by_residence      92
dtype: int64

Saved: data/respondents_by_country.csv


In [57]:
import pandas as pd
import plotly.graph_objs as go
from ipywidgets import Dropdown, HBox, VBox, Output, Button, Layout

# =========================================================
# 0) Utility
# =========================================================

def clean_country_string(s):
    if not isinstance(s, str):
        return s
    invis = ["\u200b", "\u200c", "\u200d", "\ufeff", "\u2060", "\u00a0"]
    for ch in invis:
        s = s.replace(ch, "")
    return s.strip()

def canon(s: str) -> str:
    if s is None:
        return ""
    s = clean_country_string(str(s)).lower()
    for ch in [".", ",", "'", '"', "’", "(", ")", "-", "–", "—"]:
        s = s.replace(ch, " ")
    s = " ".join(s.split())
    return s

GBR_COLORSCALE = [
    [0.00, "#e5f5e0"],  # light green
    [0.30, "#a1d99b"],  # green
    [0.55, "#2b8cbe"],  # blue
    [0.80, "#253494"],  # dark blue
    [1.00, "#d73027"],  # red
]

# =========================================================
# 1) Load WCI + respondents
# =========================================================

df_wci = pd.read_csv("data/df_wci_with_respondents.csv")
df_wci["Country"] = df_wci["Country"].apply(clean_country_string)

required_cols = ["Country", "ISO3", "WCI", "WCI_per_capita", "WCI_per_GDP", "respondents_nat", "respondents_res"]
missing = [c for c in required_cols if c not in df_wci.columns]
if missing:
    raise ValueError(f"Missing required columns in df_wci: {missing}")

# map metrics (respondents come from df_wci, NOT the accusation matrices)
metric_to_col = {
    "WCI": "WCI",
    "WCI per capita": "WCI_per_capita",
    "WCI per GDP": "WCI_per_GDP",
    "Respondents (by nationality)": "respondents_nat",
    "Respondents (by residence)": "respondents_res",
}

# =========================================================
# 2) Load RAW accusation matrices (counts)
#    IMPORTANT: rows = accused, columns = accusers
# =========================================================

acc_nat = pd.read_csv("data/accusations_nationality.csv")
acc_res = pd.read_csv("data/accusations_residence.csv")

for df in (acc_nat, acc_res):
    df["Country"] = df["Country"].apply(clean_country_string)
    df.columns = [clean_country_string(c) for c in df.columns]

# Build quick lookup: canon(accused) -> row index
acc_nat_row = {canon(c): i for i, c in enumerate(acc_nat["Country"].tolist())}
acc_res_row = {canon(c): i for i, c in enumerate(acc_res["Country"].tolist())}

# Precompute accuser totals (denominator) per matrix: totals over all accused for each accuser column
def accuser_totals(acc_df: pd.DataFrame) -> dict[str, float]:
    cols = [c for c in acc_df.columns if c != "Country"]
    totals = {}
    for c in cols:
        totals[c] = float(pd.to_numeric(acc_df[c], errors="coerce").fillna(0).sum())
    return totals

nat_totals = accuser_totals(acc_nat)
res_totals = accuser_totals(acc_res)

# =========================================================
# 3) Map widget (one persistent FigureWidget)
# =========================================================

def metric_series(metric_label: str) -> pd.Series:
    col = metric_to_col[metric_label]
    return pd.to_numeric(df_wci[col], errors="coerce").fillna(0)

initial_metric = "WCI"
vals0 = metric_series(initial_metric)
zmin0, zmax0 = float(vals0.min()), float(vals0.max())
if zmin0 == zmax0:
    zmax0 = zmin0 + 1e-9

chor = go.Choropleth(
    locations=df_wci["ISO3"],
    z=vals0,
    text=df_wci["Country"],
    customdata=df_wci["Country"],  # accused country name from df_wci
    colorscale=GBR_COLORSCALE,
    zmin=zmin0,
    zmax=zmax0,
    marker_line_color="black",
    marker_line_width=0.3,
    hovertemplate="<b>%{text}</b><br>" + initial_metric + ": %{z}<extra></extra>",
)

map_widget = go.FigureWidget(data=[chor])
map_widget.update_geos(
    showcountries=True,
    countrycolor="black",
    projection_type="natural earth",
    bgcolor="#e6e8ff",
)
map_widget.update_layout(
    title=f"World Cybercrime Index — {initial_metric}",
    height=600,
    margin=dict(t=50, b=10, l=10, r=10),
)

# =========================================================
# 4) Bar widget (persistent, UPDATED not re-displayed)
# =========================================================

bar_widget = go.FigureWidget()
bar_widget.add_trace(go.Bar(orientation="h", x=[], y=[]))
bar_widget.update_layout(
    title="Who accuses … ?",
    height=420,
    margin=dict(l=140, r=20, t=60, b=50),
    xaxis_title="Share of accuser’s total accusations (%)",
)

# =========================================================
# 5) Widgets + outputs
# =========================================================

metric_dropdown = Dropdown(
    options=list(metric_to_col.keys()),
    value=initial_metric,
    description="Metric:",
    layout=Layout(width="360px"),
)

accuser_dropdown = Dropdown(
    options=["By nationality", "By residence"],
    value="By nationality",
    description="Accusers:",
    layout=Layout(width="240px"),
)

reset_button = Button(
    description="Reset",
    button_style="warning",
    layout=Layout(width="100px"),
)

info_out = Output()
bar_out = Output()

# Show the bar widget ONCE (then we only update it)
with bar_out:
    bar_out.clear_output()
    display(bar_widget)

# =========================================================
# 6) Update map
# =========================================================

def update_map(metric_label: str):
    vals = metric_series(metric_label)
    vmin, vmax = float(vals.min()), float(vals.max())
    if vmin == vmax:
        vmax = vmin + 1e-9

    # formatting
    if metric_label in ("WCI per capita", "WCI per GDP"):
        hover_fmt = ".2e"
    elif metric_label.startswith("Respondents"):
        hover_fmt = ".0f"
    else:
        hover_fmt = ".4f"

    with map_widget.batch_update():
        tr = map_widget.data[0]
        tr.z = vals
        tr.zmin = vmin
        tr.zmax = vmax
        tr.colorscale = GBR_COLORSCALE
        tr.hovertemplate = (
            "<b>%{text}</b><br>"
            + metric_label
            + f": %{{z:{hover_fmt}}}<extra></extra>"
        )
        map_widget.layout.title.text = f"World Cybercrime Index — {metric_label}"

def on_metric_change(change):
    if change["name"] == "value":
        update_map(change["new"])
        with info_out:
            info_out.clear_output()
        # keep bar until next click (don’t nuke it on metric change)

metric_dropdown.observe(on_metric_change)

# =========================================================
# 7) Accuser bar computation (FIXED orientation + normalisation)
# =========================================================

def top_accusers_for(accused_country: str, mode: str, top_n: int = 10):
    # choose matrix
    if mode == "By nationality":
        acc_df = acc_nat
        row_lookup = acc_nat_row
        totals = nat_totals
    else:
        acc_df = acc_res
        row_lookup = acc_res_row
        totals = res_totals

    key = canon(accused_country)
    if key not in row_lookup:
        return []  # accused not present in matrix

    i = row_lookup[key]
    row = acc_df.iloc[i]

    # row contains: Country + columns(accusers)
    out = []
    for accuser in [c for c in acc_df.columns if c != "Country"]:
        count = float(pd.to_numeric(row[accuser], errors="coerce") or 0.0)
        denom = float(totals.get(accuser, 0.0))
        if count > 0 and denom > 0:
            share = count / denom  # 0..1
            out.append((accuser, share, count, denom))

    out.sort(key=lambda t: t[1], reverse=True)
    return out[:top_n]

def update_bar(accused_country: str):
    mode = accuser_dropdown.value
    items = top_accusers_for(accused_country, mode, top_n=10)

    with bar_widget.batch_update():
        if not items:
            bar_widget.data[0].x = [1.0]
            bar_widget.data[0].y = ["No data"]
            bar_widget.data[0].hovertemplate = "No accuser data<extra></extra>"
            bar_widget.data[0].marker = dict(color="#cccccc")
            bar_widget.layout.title = f"Who accuses {accused_country}? ({mode})"
            bar_widget.layout.xaxis.title = "Share of accuser’s total accusations (%)"
            return

        names = [t[0] for t in items]
        shares = [t[1] * 100.0 for t in items]  # percent
        counts = [t[2] for t in items]
        denoms = [t[3] for t in items]

        # horizontal bars, biggest on top
        names_rev = names[::-1]
        shares_rev = shares[::-1]
        counts_rev = counts[::-1]
        denoms_rev = denoms[::-1]

        bar_widget.data[0].x = shares_rev
        bar_widget.data[0].y = names_rev
        bar_widget.data[0].marker = dict(
            color=shares_rev,
            colorscale=GBR_COLORSCALE,
            reversescale=False,
            colorbar=dict(title="%", tickformat=".0f"),
        )
        bar_widget.data[0].hovertemplate = (
            "%{y}<br>"
            "Share: %{x:.2f}%<br>"
            "Count: %{customdata[0]:.0f} of %{customdata[1]:.0f} accuser accusations"
            "<extra></extra>"
        )
        bar_widget.data[0].customdata = list(zip(counts_rev, denoms_rev))
        bar_widget.layout.title = f"Who accuses {accused_country}? ({mode})"
        bar_widget.layout.xaxis.title = "Share of accuser’s total accusations (%)"

# =========================================================
# 8) Click handler (updates info + bar)
# =========================================================

def on_click(trace, points, selector):
    if not points.point_inds:
        return
    idx = points.point_inds[0]
    accused = clean_country_string(trace.customdata[idx])

    # info panel
    metric_label = metric_dropdown.value
    col = metric_to_col[metric_label]
    s = df_wci.loc[df_wci["Country"] == accused, col]
    val = float(pd.to_numeric(s, errors="coerce").iloc[0]) if not s.empty else float("nan")

    with info_out:
        info_out.clear_output()
        print(f"=== {accused} ===")
        if metric_label in ("WCI per capita", "WCI per GDP"):
            print(f"{metric_label}: {val:.2e}")
        elif metric_label.startswith("Respondents"):
            print(f"{metric_label}: {val:.0f}")
        else:
            print(f"{metric_label}: {val:.4f}")

    update_bar(accused)

# attach once
map_widget.data[0].on_click(on_click)

# if the accuser mode changes, re-render bar for the last clicked country (store state)
_last_clicked = {"country": None}

def on_click_with_state(trace, points, selector):
    if not points.point_inds:
        return
    idx = points.point_inds[0]
    accused = clean_country_string(trace.customdata[idx])
    _last_clicked["country"] = accused
    on_click(trace, points, selector)

# rebind (Plotly stacks handlers; safest is restart kernel if you’ve run multiple versions)
map_widget.data[0].on_click(on_click_with_state)

def on_accuser_mode_change(change):
    if change["name"] == "value" and _last_clicked["country"] is not None:
        update_bar(_last_clicked["country"])

accuser_dropdown.observe(on_accuser_mode_change)

# =========================================================
# 9) Reset
# =========================================================

def on_reset(_):
    metric_dropdown.value = "WCI"
    accuser_dropdown.value = "By nationality"
    _last_clicked["country"] = None
    with info_out:
        info_out.clear_output()
    with bar_widget.batch_update():
        bar_widget.data[0].x = []
        bar_widget.data[0].y = []
        bar_widget.layout.title = "Who accuses … ?"

reset_button.on_click(on_reset)

# =========================================================
# 10) Compose UI
# =========================================================

ui = VBox(
    [
        HBox([metric_dropdown, accuser_dropdown, reset_button]),
        map_widget,
        info_out,
        bar_out,
    ]
)

display(ui)

VBox(children=(HBox(children=(Dropdown(description='Metric:', layout=Layout(width='360px'), options=('WCI', 'W…