# 📊 MPIToolbox Quickstart Guide
This notebook replicates a typical Stata `.do` file for MPI estimation using the `MPIToolbox` and `mpi_results_utils` in Python.

## Step 1: Load Required Packages

In [None]:
import pandas as pd
from mpitb.core import MPIToolbox
from mpitb.utils import (
    extract_model_summary,
    extract_and_sort_by_time,
    extract_and_pivot_all_stats_by_time,
    extract_and_pivot_by_group,
    pivot_cot_summary,
    extract_cot_summary
)


## Step 2: Load Data

In [None]:
# Update this path to your local or project data path
path = 'mpitb/syn_cdta.dta'
df = pd.read_stata(path)
df = df[df['t'] == 1].copy() # single wave

## Step 3: Initialize MPIToolbox and Set Survey Design

In [None]:
mpi = MPIToolbox()
mpi.svyset(psu="psu", weight="weight", strata="stratum")

## Step 4: Define MPI Specification

In [None]:
mpi.set(
    name="trial01",
    description="Preferred specification",
    dimensions=[
        (['d_cm', 'd_nutr'], 'hl'),
        (['d_satt', 'd_educ'], 'ed'),
        (['d_elct', 'd_wtr', 'd_sani', 'd_hsg', 'd_ckfl', 'd_asst'], 'ls')
    ],
    replace=True
)

## Step 5: Estimate MPI (National, Region, Area)

In [None]:
results = mpi.est(
    df=df,
    name="trial01",
    klist=[20, 33, 50],
    weights="equal",
    measures=["M0", "H", "A"],
    indmeasures=["hdk", "actb", "pctb"],
    aux=["hd"],
    svy=True,
    over=["region", "area"],
    lframe="trial01_combined",
    replace=True
)

## Step 6: View National Model Summary

In [None]:
df_results = pd.DataFrame(results)
df_nat = df_results[df_results["loa"] == "nat"]
df_nat.head()

## Step 7: Compare Core Measures Across Models (optional)

In [None]:
# Assuming multiple models estimated previously:
summary = extract_model_summary(df_results)
display(summary)

## Step 8: Regional Summary at k=33

In [None]:
extract_and_pivot_by_group(mpi.results, "trial01_combined", measure=["H", "M0"], group_level="region", k=33)

## Step 9: similar output as Stata Tutorial at k=33

In [None]:
hd_tab = df_results[
    (df_results["measure"].isin(["hd", "hdk"])) & 
    (df_results["loa"] == "area") & 
    (df_results["k"] == 33)
].copy()

hd_tab["area"] = hd_tab["subg"].map({0: "rural", 1: "urban"})
# Filter just the relevant measures
filtered = hd_tab[hd_tab["measure"].isin(["hd", "hdk"])]

# Pivot with multiple columns using both subgroup and measure
pivot = filtered.pivot_table(
    index="indicator",
    columns=["subg", "measure"],
    values="b",
    aggfunc="first"
)

# Optional: sort column MultiIndex to ensure "hd" before "hdk" per group
pivot = pivot.sort_index(axis=1, level=[0, 1])

# Rename axes for clarity (optional)
pivot.columns.names = ["area", "measure"]

# Display formatted output
print(pivot.round(4))

## 📘 Example: Compare Multiple Models with Different Weights

In [None]:
# Set alternative weighting schemes
mpi.setwgts("trial01", "educ50", dimw=[0.25, 0.5, 0.25], store=True)
mpi.setwgts("trial01", "ls50", dimw=[0.25, 0.25, 0.5], store=True)
mpi.setwgts("trial01", "health70", dimw=[0.7, 0.15, 0.15], store=True)
mpi.setwgts("trial01", "ind_equal", indw=[0.1] * 10, store=True)

In [None]:
# Estimate each model
model_names = ["educ50", "ls50", "health70", "ind_equal"]
all_results = []
for model in model_names:
    res = mpi.est(
        df=df,
        name="trial01",
        klist=[33],
        weights=model,
        measures=["M0", "H", "A"],
        indmeasures=["hdk", "actb", "pctb"],
        aux=["hd"],
        svy=True,
        lframe=model,
        replace=True
    )
    all_results.extend(res)

results_models = pd.DataFrame(all_results)

In [None]:
# Compare core MPI values across models
extract_model_summary(results_models)

In [None]:
# If you want to add extra models to compare: extract_model_summary is flexible to pass multiple dfs
mpi.set(
    name="trial02",
    description="w/o electricity",
    dimensions=[
        (["d_cm", "d_nutr"], "hl"),
        (["d_satt", "d_educ"], "ed"),
        (["d_wtr", "d_sani", "d_hsg", "d_ckfl", "d_asst"], "ls")
    ],
    replace=True
)

results_trial02 = mpi.est(
    df=df,
    name="trial02",
    klist=[33],
    weights="equal",
    measures=["M0", "H", "A"],
    indmeasures=["hdk", "actb", "pctb"],
    aux=["hd"],
    svy=True,
    lframe="trial02",
    replace=True
)

trial02_df = pd.DataFrame(results_trial02)

summary = extract_model_summary(results_models, trial02_df)

In [None]:
print(summary)

## 📘 Example: Estimate Change Over Time

In [None]:
# Use full panel data

df = pd.read_stata(path)
df['t'].value_counts()

mpi.set(
    name="trial02",
    description="Time trend example",
    dimensions=[
        (['d_cm', 'd_nutr'], 'hl'),
        (['d_satt', 'd_educ'], 'ed'),
        (['d_wtr', 'd_sani', 'd_hsg', 'd_ckfl', 'd_asst'], 'ls')
    ],
    replace=True
)

In [None]:
# overall results
results = mpi.est(
    df=df,
    name="trial02",
    klist=[1, 33, 50],
    weights="equal",
    measures=["M0", "H", "A"],
    indmeasures=["hdk", "actb", "pctb"],
    aux=["hd"],
    svy=True,
    over=["region", "t"], # specify over time
    lframe="myresults",
    replace=True
)

In [None]:
# view data (can filter this way)
extract_and_sort_by_time(
    mpi.results,
    frame_name="myresults",
    measure=["H", "A", "M0"],   # <== multiple measures
    k=[1, 33,50],
    include_nat=True
)

In [None]:
# Wide table of b-values for H, A, M0 at different k, and wgts="equal"
df_wide = extract_and_pivot_all_stats_by_time(
    results_dict=mpi.results,
    frame_name="myresults",
    measure=["H", "A", "M0"],
    wgts="equal",
    k=[1, 33, 50]
)

df_wide

In [None]:
# over time change analysis

cot_results = mpi.est_cot(
    df=df,
    name="trial01",
    yearvar="t",
    klist=[33, 50],
    cotmeasures=["M0", "H", "A"],
    wgts="equal",              # also handles custom weights
    cotframe="mycot",
    replace=True,
    raw=True,
    ann=True,
    total=True,
    svy=True,
)



In [None]:
cot_df = pd.DataFrame(cot_results)
cot_df

In [None]:
# core results - change over time
extract_cot_summary(mpi.results, frame_name="mycot", k = [33,50] )

In [None]:
# pivot option
pivot_cot_summary(
    results_dict=mpi.results,
    frame_name="mycot",
    measure=["H", "M0"],
    k=[20, 33, 50],
    index=["spec"],
    include_se=True
)