In [4]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots


# Load data
vax = pd.read_csv("C:\\Users\\ettod\\Tubingen_exercises\\DataL\\cacertovacc\\who_vax_country.tsv", sep="\t", header=0)

# Subset data
vax = vax[vax["ANTIGEN_DESCRIPTION"] == "HPV Vaccination program coverage, first dose, females"]

# Convert COVERAGE to integer, keeping NAs as NaN
vax["COVERAGE"] = vax["COVERAGE"].astype("Int64")

# Define WHO countries dictionary
who_countries = {
    "AFR": ["Algeria", "Angola", "Benin", "Botswana", "Burkina Faso", "Burundi", "Cameroon", "Cape Verde", "Cabo Verde", "Central African Republic", "Chad", "Comoros", "Ivory Coast", "Democratic Republic of the Congo", "Equatorial Guinea", "Eritrea", "Ethiopia", "Gabon", "Gambia", "Ghana", "Guinea", "Guinea-Bissau", "Kenya", "Lesotho", "Liberia", "Madagascar", "Malawi", "Mali", "Mauritania", "Mauritius", "Mozambique", "Namibia", "Niger", "Nigeria", "Republic of the Congo", "Rwanda", "São Tomé and Príncipe", "Senegal", "Seychelles", "Sierra Leone", "South Africa", "South Sudan", "Eswatini", "Togo", "Uganda", "Tanzania", "Zambia", "Zimbabwe"],
    "AMR": ["Peru", "Paraguay", "Saint Kitts and Nevis", "Antigua and Barbuda", "Argentina", "Bahamas", "Barbados", "Belize", "Bolivia", "Brazil", "Canada", "Chile", "Colombia", "Costa Rica", "Cuba", "Dominica", "Dominican Republic", "Ecuador", "El Salvador", "Grenada", "Guatemala", "Guyana", "Haiti", "Honduras", "Jamaica", "Mexico", "Nicaragua", "Panama", "Saint Lucia", "Saint Vincent and the Grenadines", "Suriname", "Trinidad and Tobago", "the United States of America", "Uruguay", "Venezuela"],
    "SEAR": ["Bangladesh", "Bhutan", "Democratic People's Republic of Korea", "India", "Maldives", "Myanmar", "Nepal", "Sri Lanka", "Thailand", "Timor-Leste"],
    "EUR": ["Albania", "Andorra", "Armenia", "Austria", "Azerbaijan", "Belarus", "Belgium", "Bosnia and Herzegovina", "Bulgaria", "Croatia", "Cyprus", "Czech Republic", "Denmark", "Estonia", "Finland", "France", "Georgia", "Germany", "Greece", "Hungary", "Iceland", "Ireland", "Italy", "Kazakhstan", "Kyrgyzstan", "Latvia", "Lithuania", "Luxembourg", "Malta", "Moldova", "Monaco", "Montenegro", "Netherlands", "North Macedonia", "Norway", "Poland", "Portugal", "Romania", "Russia", "San Marino", "Serbia", "Slovakia", "Slovenia", "Spain", "Sweden", "Switzerland", "Tajikistan", "Turkey", "Turkmenistan", "Ukraine", "United Kingdom", "Uzbekistan"],
    "EMR": ["Libya", "Afghanistan", "Bahrain", "Djibouti", "Egypt", "Iran", "Iraq", "Jordan", "Kuwait", "Israel", "Oman", "Pakistan", "Qatar", "Saudi Arabia", "Somalia", "Sudan", "Syria", "Tunisia", "United Arab Emirates", "Yemen", "Morocco"],
    "WPR": ["Australia", "Brunei", "Cambodia", "China", "Cook Islands", "Fiji", "Indonesia", "Japan", "Kiribati", "Laos", "Malaysia", "Marshall Islands", "Micronesia", "Mongolia", "Nauru", "New Zealand", "Niue", "Palau", "Papua New Guinea", "Philippines", "Samoa", "Singapore", "Solomon Islands", "South Korea", "Taiwan", "Tonga", "Tuvalu", "Vanuatu", "Vietnam"]
}

# Define name recode dictionary
name_recode = {
    "Bolivia (Plurinational State of)": "Bolivia",
    "Brunei Darussalam": "Brunei",
    "Côte d'Ivoire": "Ivory Coast",
    "Czechia": "Czech Republic",
    "Lao People's Democratic Republic": "Laos",
    "Republic of Moldova": "Moldova",
    "Russian Federation": "Russia",
    "Sao Tome and Principe": "São Tomé and Príncipe",
    "Türkiye": "Turkey",
    "United States of America": "the United States of America",
    "Micronesia (Federated States of)": "Micronesia",
    "Netherlands (Kingdom of the)": "Netherlands",
    "Republic of Korea": "South Korea",
    "Saint Kitts and Nevis": "Saint Kitts and Nevis",
    "United Kingdom of Great Britain and Northern Ireland": "United Kingdom",
    "United Republic of Tanzania": "Tanzania"
}

# Recode names
vax["NAME"] = vax["NAME"].replace(name_recode)

# Create who_region_df
who_region_df = pd.concat([
    pd.DataFrame({"NAME": who_countries[region], "REGION": region})
    for region in who_countries
])

# Left join
vax = vax.merge(who_region_df, on="NAME", how="left")

# Set NA to 0
vax["COVERAGE"] = vax["COVERAGE"].fillna(0)

# Group by and summarize (with NAs)
vax_name = vax.groupby(["REGION", "YEAR"]).agg(
    mean_coverage=("COVERAGE", "mean"),
    se_coverage=("COVERAGE", lambda x: 0 if x.notna().sum() <= 1 else x.std(ddof=1) / np.sqrt(x.notna().sum()))
).reset_index()

# Convert REGION to categorical
vax_name["REGION"] = pd.Categorical(vax_name["REGION"])

# Get regions
regions = vax_name["REGION"].cat.categories

In [5]:


# Create subplots
fig = make_subplots(rows=2, cols=3, subplot_titles=[f"Region: {region}" for region in regions])

for idx, region in enumerate(regions):
    row = (idx // 3) + 1
    col = (idx % 3) + 1
    region_data = vax_name[vax_name["REGION"] == region]
    country_data = vax[vax["REGION"] == region]
    
    # Plot individual country lines
    for country in country_data["NAME"].unique():
        country_subset = country_data[country_data["NAME"] == country]
        fig.add_trace(go.Scatter(
            x=country_subset["YEAR"],
            y=country_subset["COVERAGE"],
            mode='lines',
            line=dict(color='red', width=0.5),
            opacity=0.3,
            name=country,
            hovertemplate=f'{country}<br>Year: %{{x}}<br>Coverage: %{{y}}%<extra></extra>',
            showlegend=False
        ), row=row, col=col)
    
    # Ribbon (upper and lower bounds)
    fig.add_trace(go.Scatter(
        x=region_data["YEAR"],
        y=region_data["mean_coverage"] + region_data["se_coverage"],
        mode='lines',
        line=dict(width=0),
        showlegend=False,
        hoverinfo='skip'
    ), row=row, col=col)
    fig.add_trace(go.Scatter(
        x=region_data["YEAR"],
        y=region_data["mean_coverage"] - region_data["se_coverage"],
        mode='lines',
        line=dict(width=0),
        fill='tonexty',
        fillcolor='rgba(211,211,211,0.75)',
        showlegend=False,
        hoverinfo='skip'
    ), row=row, col=col)
    
    # Mean line
    fig.add_trace(go.Scatter(
        x=region_data["YEAR"],
        y=region_data["mean_coverage"],
        mode='lines',
        line=dict(color='black', width=1),
        name='Regional Mean',
        hovertemplate='Regional Mean<br>Year: %{x}<br>Coverage: %{y}%<extra></extra>',
        showlegend=False
    ), row=row, col=col)


# Update layout
fig.update_layout(
    title="HPV Vaccine Coverage by Region",
    height=800,
    width=1200
)
fig.update_xaxes(title_text="Year")
fig.update_yaxes(
    title_text="HPV Coverage (%)",
    range=[0, 100],  # Caps at 100%
    autorange=False,  # Prevents autoscaling beyond this
)
# Add subtitle
fig.add_annotation(
    text="Shaded lines: individual countries · Ribbon: ±SE · Solid lines: regional mean",
    xref="paper", yref="paper",
    x=0.5, y=-0.05,
    showarrow=False,
    font=dict(size=12)
)

fig.show()