In [1]:
import pandas as pd
from great_tables import GT, md, vals, nanoplot_options, style, loc

import warnings
warnings.filterwarnings("ignore", category=UserWarning)

In [2]:
who_tb_data = pd.read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2025/2025-11-11/who_tb_data.csv")

In [3]:
who_tb_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5117 entries, 0 to 5116
Data columns (total 18 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   country                5117 non-null   object 
 1   g_whoregion            5117 non-null   object 
 2   iso_numeric            5117 non-null   int64  
 3   iso2                   5117 non-null   object 
 4   iso3                   5117 non-null   object 
 5   year                   5117 non-null   int64  
 6   c_cdr                  4824 non-null   float64
 7   c_newinc_100k          4932 non-null   float64
 8   cfr                    4989 non-null   float64
 9   e_inc_100k             5117 non-null   float64
 10  e_inc_num              5117 non-null   int64  
 11  e_mort_100k            5093 non-null   float64
 12  e_mort_exc_tbhiv_100k  5093 non-null   float64
 13  e_mort_exc_tbhiv_num   5093 non-null   float64
 14  e_mort_num             5093 non-null   float64
 15  e_mo

In [4]:
who_tb_data.head(2)

Unnamed: 0,country,g_whoregion,iso_numeric,iso2,iso3,year,c_cdr,c_newinc_100k,cfr,e_inc_100k,e_inc_num,e_mort_100k,e_mort_exc_tbhiv_100k,e_mort_exc_tbhiv_num,e_mort_num,e_mort_tbhiv_100k,e_mort_tbhiv_num,e_pop_num
0,Afghanistan,Eastern Mediterranean,4,AF,AFG,2000,19.0,35.0,0.37,190.0,38000,68.0,68.0,14000.0,14000.0,0.17,34.0,20130323
1,Afghanistan,Eastern Mediterranean,4,AF,AFG,2001,26.0,50.0,0.35,189.0,38000,63.0,63.0,13000.0,13000.0,0.3,61.0,20284311


In [5]:
# Select relevant columns
sel_col = who_tb_data[["g_whoregion", "year", "e_inc_100k", "e_mort_exc_tbhiv_100k", "e_mort_tbhiv_100k"]]

# 2020 vs 2023 data prepartion

In [6]:
# Filter relevant years
years = [2000, 2023]
df_years1=sel_col[sel_col["year"].isin(years)]

In [7]:
# Mean values per region and year
df_years2=df_years1.groupby(["g_whoregion", "year"]).mean().reset_index()

# Calculate 2020 vs 2023 YoY change 
df_years2["e_inc_100k_YoY_change"] = df_years2.groupby("g_whoregion")["e_inc_100k"].pct_change(fill_method=None) 
df_years2["e_mort_exc_tbhiv_100k_YoY_change"] = df_years2.groupby("g_whoregion")["e_mort_exc_tbhiv_100k"].pct_change(fill_method=None)  
df_years2["e_mort_tbhiv_100k_YoY_change"] = df_years2.groupby("g_whoregion")["e_mort_tbhiv_100k"].pct_change(fill_method=None) 

In [8]:
# Order the columns
df_years3 = df_years2[["g_whoregion", "year",                       
                       "e_inc_100k", "e_inc_100k_YoY_change",
                       "e_mort_exc_tbhiv_100k", "e_mort_exc_tbhiv_100k_YoY_change",
                       "e_mort_tbhiv_100k", "e_mort_tbhiv_100k_YoY_change"]]                

In [9]:
# Pivot year values into columns
df_years4=df_years3.groupby(["g_whoregion", "year"]).mean().unstack() 
# Drop Na columns
df_years5=df_years4.dropna(axis=1)

In [10]:
# Convert column names to a df
df_reset = df_years5.columns.to_frame(index=False).reset_index(drop=True)

# Rename the columns by joining their names with underscores
df_years5.columns = ['_'.join(map(str, col)) for col in df_years5.columns]

# Reset index
df_years6 = df_years5.reset_index()

In [11]:
# Create empty columns
df_years6["e_inc_100k_sym"] = ""
df_years6["e_mort_exc_tbhiv_100k_sym"] = ""
df_years6["e_mort_tbhiv_100k_sym"] = ""

# Function to add "▲" or "▼"
def add_symbol(val):
    if val > 0:
        return "▲"
    else :
        return "▼"
    
# Apply the function 
df_years6["e_inc_100k_sym"] = df_years6["e_inc_100k_YoY_change_2023"].apply(add_symbol)
df_years6["e_mort_exc_tbhiv_100k_sym"] = df_years6["e_mort_exc_tbhiv_100k_YoY_change_2023"].apply(add_symbol)
df_years6["e_mort_tbhiv_100k_sym"] = df_years6["e_mort_tbhiv_100k_YoY_change_2023"].apply(add_symbol)

In [12]:
# Order the columns
df_years7 = df_years6[["g_whoregion", 
                       "e_inc_100k_2000", "e_inc_100k_2023", "e_inc_100k_sym", "e_inc_100k_YoY_change_2023", 
                       "e_mort_exc_tbhiv_100k_2000", "e_mort_exc_tbhiv_100k_2023", "e_mort_exc_tbhiv_100k_sym","e_mort_exc_tbhiv_100k_YoY_change_2023", 
                       "e_mort_tbhiv_100k_2000", "e_mort_tbhiv_100k_2023", "e_mort_tbhiv_100k_sym", "e_mort_tbhiv_100k_YoY_change_2023"]]

# Nanoplot data preparation

In [13]:
# Calculate the mean of numeric columns
df_lists1=sel_col.groupby(["g_whoregion", "year"]).mean(numeric_only=True).reset_index()

In [14]:
# Aggregate columns into lists
df_lists2 = df_lists1.groupby("g_whoregion").agg(list).drop(columns="year").reset_index()
#df_lists2 = df_lists2.drop("year", axis=1)

In [15]:
# Convert list columns to space separated strings 
for col in [c for c in df_lists2.columns]:
    df_lists2[col] = df_lists2[col].apply(lambda x: " ".join(str(v) for v in x))

# Drop region column
df_lists3=df_lists2.drop("g_whoregion", axis=1)

# Concatenation

In [16]:
# Concat dataframes horizontally
df=pd.concat([df_years7, df_lists3], axis=1)

In [17]:
# Order the columns
df = df[["g_whoregion", 
         "e_inc_100k_2000", "e_inc_100k_2023", "e_inc_100k_sym", "e_inc_100k_YoY_change_2023", "e_inc_100k",
         "e_mort_exc_tbhiv_100k_2000", "e_mort_exc_tbhiv_100k_2023", "e_mort_exc_tbhiv_100k_sym","e_mort_exc_tbhiv_100k_YoY_change_2023", "e_mort_exc_tbhiv_100k",
         "e_mort_tbhiv_100k_2000", "e_mort_tbhiv_100k_2023", "e_mort_tbhiv_100k_sym", "e_mort_tbhiv_100k_YoY_change_2023", "e_mort_tbhiv_100k"]]

# Final df

In [18]:
# Split dataframes by type
df1 = df.iloc[:, :6]
df2 = pd.concat([df.iloc[:, 0], df.iloc[:, 6:11]], axis=1)
df3 = pd.concat([df.iloc[:, 0], df.iloc[:, 11:16]], axis=1)

In [19]:
# Create new column and renames the other columns
df1["type"] = "Incidence per 100 000 population"
df1.columns = ["region", "y_2000", "y_2023", "sym", "change", "trend", "type"]

df2["type"] = "Non-HIV mortality per 100 000 population"
df2.columns = ["region", "y_2000", "y_2023", "sym", "change", "trend", "type"]

df3["type"] = "HIV mortality per 100 000 population"
df3.columns = ["region", "y_2000", "y_2023", "sym", "change", "trend", "type"]

In [20]:
# Concatenate 3 dataframes
result = pd.concat([df1, df2, df3], ignore_index=True)

# Great tables

In [21]:
# Reset the index to include it as a column
result = result.reset_index()  

table = (
    GT(result)
    .tab_header(
        title=md("Global tuberculosis (TB) burden estimates")        
    )     
    .tab_stub(
        rowname_col="region", groupname_col="type"
    )      
    .cols_label(
        index="Index",  
        y_2000="2000",
        y_2023="2023",
        change="Change",
        sym="(%)",          
        trend="Trend (2000-2023)",        
    )    
    .fmt_number(
        columns=["y_2000", "y_2023"], decimals=1, use_seps=False
    )
    .fmt_percent(
        "change", decimals=0
    )  
    .cols_move(
        columns="sym",
        after="change"
    )
    .cols_align(
        align="center"
    )  
    .cols_align(
        align="left", columns="sym"
    )          
    .cols_align(
        align="right", columns="change"
    )
    .cols_hide(
        columns="index"
    )   
    .fmt_nanoplot(
        columns="trend", plot_type="bar",
        options=nanoplot_options(            
            data_bar_fill_color="#093362",
        ),
    )     
    .tab_source_note(
        source_note="Data: TidyTuesday 2025-11-11 (WHO TB Burden Data: Incidence, Mortality, and Population) | Design: Bernadett Piros"
    )  
    .tab_style(
        style=style.text(size="x-small", align="center"),            
        locations=loc.source_notes()  
    )
    .tab_style(
        style=style.text(size="x-large", color="#093362", weight="bold"),            
        locations=loc.title()  
    )
    .tab_style(
        style=style.text(color="#093362", weight="bold"),            
        locations=loc.column_labels() 
    )    
    .tab_options(             
        row_group_font_weight="bold",
        row_group_background_color="#E0C954"
    )
    .tab_style(
        style=[
            style.fill(color="#FFEB7E"),  # darker color
            style.borders(sides=["top", "bottom", "left", "top"], weight="0px")
        ],
        locations=loc.stub(rows=lambda D: D['index'] % 2 == 0)  # Even rows
    )
    .tab_style(
        style=[
            style.fill(color="#FFF09A"),  # lighter color
            style.borders(sides=["top", "bottom", "left", "top"], weight="0px")
        ],
        locations=loc.stub(rows=lambda D: D['index'] % 2 != 0)  # Odd rows
    )
     .tab_style(
        style=[
            style.fill(color="#FFEB7E"),  # darker color
            style.borders(sides=["top", "bottom", "left", "top"], weight="0px")
        ],
        locations=loc.body(rows=lambda D: D['index'] % 2 == 0)  # Even rows
    )
    .tab_style(
        style=[
            style.fill(color="#FFF09A"),  # lighter color
            style.borders(sides=["top", "bottom", "left", "top"], weight="0px")
        ],
        locations=loc.body(rows=lambda D: D['index'] % 2 != 0)  # Odd rows
    )
    .tab_style(
        style=style.text(color="#FD4357"),   
        locations=loc.body(rows=lambda D: D["sym"] == "▲", columns="sym")
    )
    .tab_style(
        style=style.text(color="#049C3F"),   
        locations=loc.body(rows=lambda D: D["sym"] == "▼", columns="sym")  
    )
    .tab_style(
        style=style.fill(color="#BF9106"),            
        locations=loc.column_labels() 
    )   
    .tab_style(
        style=style.fill(color="#BF9106"),            
        locations=loc.stubhead() 
    )    
    .tab_style(
        style=style.fill(color="#BF9106"),            
        locations=loc.title() 
    )    
    .tab_style(
        style=style.fill(color="#FDF5DC"),            
        locations=loc.source_notes() 
    )    
    .opt_vertical_padding(
        scale=0.5
    )
    .tab_options(             
        row_group_border_top_color="#E0C954",
        row_group_border_bottom_color="#E0C954",  
        table_border_top_color="#BF9106",
        table_border_bottom_color="#FDF5DC", 
        column_labels_border_top_color="#E0C954",
        stub_border_color="#E0C954",
        table_body_border_bottom_color="#E0C954",
    )    
)
table

Global tuberculosis (TB) burden estimates,Global tuberculosis (TB) burden estimates,Global tuberculosis (TB) burden estimates,Global tuberculosis (TB) burden estimates,Global tuberculosis (TB) burden estimates,Global tuberculosis (TB) burden estimates
Unnamed: 0_level_1,2000,2023,Change,(%),Trend (2000-2023)
Incidence per 100 000 population,Incidence per 100 000 population,Incidence per 100 000 population,Incidence per 100 000 population,Incidence per 100 000 population,Incidence per 100 000 population
Africa,327.1,204.7,−37%,▼,3540327334341352354354351343333330322306293281268255243234225218211208207205
Americas,39.3,30.8,−22%,▼,39.3039.338.037.037.637.036.335.934.934.933.932.231.232.029.828.029.028.829.028.627.925.826.628.230.8
Eastern Mediterranean,99.2,61.5,−38%,▼,99.2099.298.189.388.184.785.283.783.885.685.786.781.780.176.670.571.369.365.864.564.161.862.463.261.5
Europe,52.1,23.2,−55%,▼,52.1052.152.151.051.349.548.546.545.144.041.740.839.537.036.533.531.629.427.625.925.423.821.923.623.2
South-East Asia,292.6,263.7,−10%,▼,3080293289308305300298292291286285283277271265261256255245242241238243256264
Western Pacific,137.2,147.7,8%,▲,1480137134140137136135136134133132132129127129129135135132148133124123124148
Non-HIV mortality per 100 000 population,Non-HIV mortality per 100 000 population,Non-HIV mortality per 100 000 population,Non-HIV mortality per 100 000 population,Non-HIV mortality per 100 000 population,Non-HIV mortality per 100 000 population
Africa,39.9,24.1,−40%,▼,43.3039.942.242.543.342.642.439.338.636.035.535.035.234.134.233.432.932.031.630.129.331.029.625.924.1
Americas,4.3,2.6,−40%,▼,4.2904.294.073.934.033.923.613.503.423.313.093.072.922.852.792.272.612.522.422.332.342.362.622.542.57


# Save html without selenium
html = table.as_raw_html()
with open("2023-11-11.html", "w") as f:
    f.write(html)