In [None]:
import pandas as pd
import numpy as np

# 1. Load
df = pd.read_csv("imputed_and_standardised.csv")

# 2. Pick the race columns you want in the index  ──────────
race_groups = {
    "White"          : ["WA_MALE",  "WA_FEMALE"],
    "Black"          : ["BA_MALE",  "BA_FEMALE"],
    "Indigenous"     : ["IA_MALE",  "IA_FEMALE"],   # American Indian / Alaska Native
    "Asian"          : ["AA_MALE",  "AA_FEMALE"],
    "NativeHawaiian" : ["NA_MALE",  "NA_FEMALE"],
    "TwoOrMore"      : ["TOM_MALE", "TOM_FEMALE"],  # Two-or-more races
}

# 3. Aggregate counts for each group
for group, cols in race_groups.items():
    df[f"{group}_TOTAL"] = df[cols].sum(axis=1)

# 4. Convert to proportions
total_pop_col = "TOTAL_RACE_POP"
df[total_pop_col] = df[[f"{g}_TOTAL" for g in race_groups]].sum(axis=1)
for group in race_groups:
    df[f"{group}_PCT"] = df[f"{group}_TOTAL"] / df[total_pop_col]

pct_cols = [f"{g}_PCT" for g in race_groups]
eps = 1e-12                      # avoids log(0)

# 5-A. Shannon diversity (entropy)
df["racial_diversity_shannon"] = - (df[pct_cols] * np.log(df[pct_cols] + eps)).sum(axis=1)

# 5-B. Simpson diversity (1 – Σ p²)
df["racial_diversity_simpson"] = 1 - (df[pct_cols] ** 2).sum(axis=1)

# 6. Save (optional)
df.to_csv("imputed_and_standardised_with_diversity.csv", index=False)


  result = func(self.values, **kwargs)


: 