# Medicaid Data Viz Fun

HHS released a Medicaid Provider Spending [dataset](https://opendata.hhs.gov/datasets/medicaid-provider-spending/) in Feb. 2026 that covers Jan 2018 - Dec 2024.

The dataset references National Provider Identifiers for billing and servicing info. Those IDs can be:
- Looked up individually [here](https://npiregistry.cms.hhs.gov/search)
- Downloaded [here](https://download.cms.gov/nppes/NPI_Files.html)

I was playing around with the data related to Texas payments and decided to try to make a missile map type of visualization showing payments in the dataset over time.  Some, but not all (I made the CSV used in this notebook after I wrote the blog post), of the data wrangling is covered [here](https://thatorjohn.hashnode.dev/medicaid-data-fun).

In [None]:
%pip install pandas plotly

In [None]:
import ipywidgets as widgets
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.io as pio

from IPython.display import display

In [None]:
def curved_arc(lon1, lat1, lon2, lat2, num_points=40, curvature=0.3):
    t = np.linspace(0, 1, num_points)

    # Linear interpolation
    lons = lon1 + (lon2 - lon1) * t
    lats = lat1 + (lat2 - lat1) * t

    # Perpendicular offset
    dx = lon2 - lon1
    dy = lat2 - lat1

    norm = np.sqrt(dx**2 + dy**2)
    offset_x = -dy / norm
    offset_y = dx / norm

    arc_strength = -curvature * norm
    curve = arc_strength * np.sin(np.pi * t)

    lons = lons + offset_x * curve
    lats = lats + offset_y * curve

    return lons, lats

In [None]:
pio.renderers.default = "notebook_connected"
# pio.renderers.default = "notebook"

data_url = "https://raw.githubusercontent.com/ThatOrJohn/medicaid-missile-map/refs/heads/main/zip3_cumulative_centroids.csv"

df = pd.read_csv(data_url)
df["month"] = pd.to_datetime(df["month"])

max_cum = df["cumulative"].max()
df["size"] = 5 + 40 * (df["cumulative"] / max_cum) ** 0.5

# dc coordinates
dc_lat, dc_lon = 38.8951, -77.0364

months = sorted(df["month"].unique())
frames = []

# Base DC marker
base_dc = go.Scattergeo(
    lon=[dc_lon],
    lat=[dc_lat],
    mode="markers+text",
    marker=dict(size=16, color="yellow"),
    text=["Washington DC"],
    textposition="top center",
    showlegend=False
)

# Animation frames
steps_per_month = 15  # smooth arcs

for month in months:
    df_until = df[df["month"] <= month]  # cumulative for ðŸ’µ
    df_current = df[df["month"] == month]  # arcs for current month

    for step in range(1, steps_per_month + 1):
        data = [base_dc]  # always include DC

        # Draw arcs from DC to TX (partial for animation)
        for _, row in df_current.iterrows():
            num_points = 30
            lons, lats = curved_arc(dc_lon, dc_lat, row["lon"], row["lat"])
            cutoff = int(len(lons) * step / steps_per_month)
            
            data.append(go.Scattergeo(
                lon=lons[:cutoff],
                lat=lats[:cutoff],
                mode="lines",
                line=dict(width=2, color="gold"),
                opacity=0.4,
                showlegend=False
            ))
    
            if cutoff > 1:
                data.append(go.Scattergeo(
                    lon=[lons[cutoff - 1]],
                    lat=[lats[cutoff - 1]],
                    mode="text",
                    text=["ðŸ’°"],
                    textfont=dict(size=16),
                    showlegend=False
                ))

        # Add growing ðŸ’µ emoji markers
        data.append(go.Scattergeo(
            lon=df_until["lon"],
            lat=df_until["lat"],
            mode="text",
            text=["ðŸ’µ"] * len(df_until),
            textfont=dict(size=df_until["size"]),
            hovertext=df_until.apply(lambda r: f"ZIP3 {r.zip3}<br>${r.cumulative:,.0f}", axis=1),
            hoverinfo="text",
            showlegend=False
        ))

        # Frame name
        frames.append(go.Frame(data=data, name=f"{month}_{step}"))

# Slider steps (jump to last step of each month)
slider_steps = []
for month in months:
    slider_steps.append({
        "args": [[f"{month}_{steps_per_month}"],
                 {"frame": {"duration": 0, "redraw": True},
                  "mode": "immediate"}],
        "label": str(month)[:7],
        "method": "animate"
    })

sliders = [{"steps": slider_steps, "currentvalue": {"prefix": "Month: "}}]

# Initial figure
fig = go.Figure(
    data=frames[0].data,
    frames=frames
)

# Layout
fig.update_layout(
    geo=dict(
        projection_type="mercator",
        center=dict(lat=32, lon=-95),
        lataxis=dict(range=[22, 46]),
        lonaxis=dict(range=[-109, -66]),
        showland=True,
        landcolor="rgb(20,20,20)",
        bgcolor="black",
        showcountries=True,
        countrycolor="Black",
        showsubunits=True,
        subunitcolor="Blue",
        visible=False,
        showocean=True,
        oceancolor="rgb(10,10,30)"
    ),
    paper_bgcolor="black",
    plot_bgcolor="black",
    height=700,
    width=1100,
    title="Federal Medicaid Spending Flow: DC â†’ Texas ZIP3s",
    title_font=dict(size=28, color="white"),
    updatemenus=[{
        "type": "buttons",
        "x": 0.1,
        "y": 0.05,
        "buttons": [
            {
                "label": "Play",
                "method": "animate",
                "args": [None, {"frame": {"duration": 100, "redraw": True}, "fromcurrent": True}],
            },
            {
                "label": "Pause",
                "method": "animate",
                "args": [[None], {"frame": {"duration": 0, "redraw": False}, "mode": "immediate"}],
            }
        ]
    }],
    sliders=sliders
)

fig.show(renderer="colab")