In [None]:
# Install and read in packages needed to make a demonstration table

# If running in a .py file

# import subprocess
# import sys

# subprocess.check_call([sys.executable, "-m", "pip", "install", "toyplot"])

# import pandas as pd
# import toyplot
# import numpy as np


# If running in a .ipynb file

%pip install toyplot

import numpy as np
import pandas as pd
import toyplot

# Make sure figues appear as intended when using pandas

pd.set_option("display.float_format", "{:.0f}".format)

In [None]:
# Read in Data

vulnerable = pd.read_csv(
    "C:/Users/bestj/OneDrive - Office for National Statistics/Documents/Pandemic Preparedness Toolkit/Basic data vis/Python/vulnerable.csv"
)

In [None]:
# Prepare the dataframe

# Filter for the years and columns we want

vuln_for_select_countries_3y = (
    vulnerable[vulnerable["year"].isin([1997, 2002, 2007])]
    .filter(["country", "continent", "year", "vulnerable_pop"])
    .pivot_table(
        index=["continent", "country"], columns="year", values="vulnerable_pop"
    )  # Make the data "wider" so each year is a col
    .reset_index()  # Reset our index; so we don't have a multi index
    .groupby("continent")
    .head()
)  # Group by the continent and return the first 5 rows for each continent.

vuln_for_select_countries_3y.columns.name = None  # Removed the "year" label on the index

# Prevent figures from turning into scientific notations by ensuring they are full integers
vuln_for_select_countries_3y[1997] = vuln_for_select_countries_3y[1997].astype("Int64")
vuln_for_select_countries_3y[2002] = vuln_for_select_countries_3y[2002].astype("Int64")
vuln_for_select_countries_3y[2007] = vuln_for_select_countries_3y[2007].astype("Int64")

# View the new dataframe

vuln_for_select_countries_3y

In [None]:
# Create the canvas
canvas = toyplot.Canvas(width=700, height=500)

# Add the data
table = canvas.table(data=vuln_for_select_countries_3y)

In [None]:
# Add a title and subtitle

# Create the canvas

canvas = toyplot.Canvas(width=700, height=500)

# Add the data

table = canvas.table(data=vuln_for_select_countries_3y)

# Add a title

canvas.text(
    x=45,
    y=20,
    text="Table 1: Vulnerable populations increase in size over time",
    style={
        "font-size": "16px",
        "font-weight": "bold",
        "font-family": "sans-serif",
        "text-anchor": "start",
        "fill": "black",
    },
)

# Add a subtitle

canvas.text(
    x=45,
    y=35,
    text="Size of vulnerable population in select countries in 1997, 2002, and 2007",
    style={
        "font-size": "14px",
        "font-family": "sans-serif",
        "text-anchor": "start",
        "fill": "black",
    },
)

In [None]:
# Create the canvas

canvas = toyplot.Canvas(width=700, height=500)

# Add the data

table = canvas.table(data=vuln_for_select_countries_3y)

# Add a title

canvas.text(
    x=45,
    y=20,
    text="Table 1: Vulnerable populations increase in size over time",
    style={
        "font-size": "16px",
        "font-weight": "bold",
        "font-family": "sans-serif",
        "text-anchor": "start",
        "fill": "black",
    },
)

# Add a subtitle

canvas.text(
    x=45,
    y=35,
    text="Size of vulnerable population in select countries in 1997, 2002, and 2007",
    style={
        "font-size": "14px",
        "font-family": "sans-serif",
        "text-anchor": "start",
        "fill": "black",
    },
)

# Add a caption - use this one for source information

canvas.text(
    x=390,
    y=465,
    text=("Source: Pandemic Preparedness Toolkit"),
    style={
        "font-size": "14px",
        "font-family": "sans-serif",
        "text-anchor": "start",
        "fill": "black",
    },
)

# Add a caption - use this one as a regular footnote

canvas.text(
    x=445,
    y=485,
    text=("* Data from 1997 is incomplete"),
    style={
        "font-size": "14px",
        "font-family": "sans-serif",
        "text-anchor": "start",
        "fill": "black",
    },
)

In [None]:
# Add an R, gt-style spanner

# Create the canvas

canvas = toyplot.Canvas(width=700, height=500)

# Add the data

table = canvas.table(data=vuln_for_select_countries_3y, trows=2)

table.top.cell[0, 3].merge().data = "year"

In [None]:
# Create the canvas

canvas = toyplot.Canvas(width=700, height=500)

# Add the data

table = canvas.table(data=vuln_for_select_countries_3y)

# Set alignment for population counts

table.cells.column[[2, 3, 4]].align = "right"

In [None]:
# Modify the original dataframe to include commas

# Make a copy of the original df

vuln_for_select_countries_3y_commas = vuln_for_select_countries_3y.copy()

# Now format the year columns with commas

for col in [1997, 2002, 2007]:  # Changed to integers

    if col in vuln_for_select_countries_3y_commas.columns:

        vuln_for_select_countries_3y_commas[col] = vuln_for_select_countries_3y_commas[col].apply(
            lambda x: f"{x:,}"
        )

# Recreate the table

# Create the canvas

canvas = toyplot.Canvas(width=700, height=500)

# Add the data

table = canvas.table(data=vuln_for_select_countries_3y_commas)

# Set alignment for population counts (we need to do this manually after adding commas converted the columns to strings)

table.cells.column[[2, 3, 4]].align = "right"

In [None]:
# Modify the original dataframe to round to a specified number of decimal places

# Make a copy of the original df

vuln_for_select_countries_3y_rounded = vuln_for_select_countries_3y.copy()

# Round the year columns to two decimal places

for col in [1997, 2002, 2007]:

    if col in vuln_for_select_countries_3y_rounded.columns:

        vuln_for_select_countries_3y_rounded[col] = vuln_for_select_countries_3y_rounded[col].round(
            2
        )

# Recreate the table

# Create the canvas

canvas = toyplot.Canvas(width=700, height=500)

# Add the data

table = canvas.table(data=vuln_for_select_countries_3y_rounded)

# Set alignment for population counts (we need to do this manually after adding commas converted the columns to strings)

table.cells.column[[2, 3, 4]].align = "right"

In [None]:
# Modify the original dataframe to use an 'M' suffix for values over a million

# Make a copy of the original df
vuln_for_select_countries_3y_millions = vuln_for_select_countries_3y.copy()

# Define the formatting function

for col in [1997, 2002, 2007]:

    if col in vuln_for_select_countries_3y_millions.columns:

        vuln_for_select_countries_3y_millions[col] = [
            f"{v/1e6:.1f}M" if v >= 1e6 else str(v)
            for v in vuln_for_select_countries_3y_millions[col]
        ]

# Create the canvas

canvas = toyplot.Canvas(width=700, height=500)

# Add the data

table = canvas.table(data=vuln_for_select_countries_3y_millions)

# Set alignment for population counts (we need to do this manually after adding commas converted the columns to strings)

table.cells.column[[2, 3, 4]].align = "right"

# 

In [None]:
# Modify the original dataframe with columns in a different order

# Make a copy of the original df

vuln_for_select_countries_3y_rearranged = vuln_for_select_countries_3y.copy()

# Specify the desired order

vuln_for_select_countries_3y_rearranged = vuln_for_select_countries_3y_rearranged[
    ["continent", "country", 2002, 1997, 2007]
]

# Create the canvas

canvas = toyplot.Canvas(width=700, height=500)

# Add the data

table = canvas.table(data=vuln_for_select_countries_3y_rearranged)

In [None]:
# Modify the original dataframe with rows in a different order

# Make a copy of the original df

vuln_for_select_countries_3y_rearranged = vuln_for_select_countries_3y.copy()

# Specify the desired order of rows based on specific values in the "continent" column

desired_row_order = ["Oceania", "Europe", "Americas", "Asia", "Africa"]

vuln_for_select_countries_3y_rearranged = (
    vuln_for_select_countries_3y_rearranged.set_index("continent")
    .loc[desired_row_order]
    .reset_index()
)

# Create the canvas

canvas = toyplot.Canvas(width=700, height=500)

# Add the data

table = canvas.table(data=vuln_for_select_countries_3y_rearranged)

In [None]:
# Group by "continent" and calculate summmary sums and means for each continent

continent_sum = vuln_for_select_countries_3y.groupby("continent")[[1997, 2002, 2007]].sum()

continent_mean = vuln_for_select_countries_3y.groupby("continent")[[1997, 2002, 2007]].mean()

# Add a "continent" column and populate the "country" column with 'total' and 'mean'

continent_sum["continent"] = continent_sum.index

continent_sum["country"] = "sum"

continent_mean["continent"] = continent_mean.index

continent_mean["country"] = "mean"

# Reset the index to make the summary rows compatible with the original df

continent_sum = continent_sum.reset_index(drop=True)

# Append the summary rows to the original df

vuln_for_select_countries_3y_with_summary = pd.concat(
    [vuln_for_select_countries_3y, continent_sum, continent_mean], ignore_index=True
)

# Add a helper column to prioritize sorting

vuln_for_select_countries_3y_with_summary["sort_key"] = vuln_for_select_countries_3y_with_summary[
    "country"
].apply(lambda x: 2 if x.lower() == "mean" else (1 if x.lower() == "sum" else 0))

# Sort by continent first, then by the helper column, and finally by country

vuln_for_select_countries_3y_with_summary = vuln_for_select_countries_3y_with_summary.sort_values(
    by=["continent", "sort_key", "country"], ascending=[True, True, True]
).reset_index(drop=True)

# Drop the helper column after sorting

vuln_for_select_countries_3y_with_summary = vuln_for_select_countries_3y_with_summary.drop(
    columns=["sort_key"]
)

# Ensure the columns appear as full integers, not scientific notations (this becomes necessary after running the code above)

columns_to_format = [1997, 2002, 2007]

vuln_for_select_countries_3y_with_summary[
    columns_to_format
] = vuln_for_select_countries_3y_with_summary[columns_to_format].applymap(lambda x: int(x))

# Create the canvas

canvas = toyplot.Canvas(width=700, height=500)

# Add the data

table = canvas.table(data=vuln_for_select_countries_3y_with_summary)

# Set alignment for population counts (we need to do this manually after adding commas converted the columns to strings)

table.cells.column[[2, 3, 4]].align = "right"

In [None]:
# Calculate grand summary rows for minimum, maximum, and mean

grand_min = vuln_for_select_countries_3y[[1997, 2002, 2007]].min()

grand_max = vuln_for_select_countries_3y[[1997, 2002, 2007]].max()

grand_mean = vuln_for_select_countries_3y[[1997, 2002, 2007]].mean()

# Create summary rows as DataFrames

grand_min_row = pd.DataFrame([grand_min], columns=[1997, 2002, 2007])

grand_min_row["country"] = "min"

grand_max_row = pd.DataFrame([grand_max], columns=[1997, 2002, 2007])

grand_max_row["country"] = "max"

grand_mean_row = pd.DataFrame([grand_mean], columns=[1997, 2002, 2007])

grand_mean_row["country"] = "mean"

# Append summary rows to the original DataFrame

vuln_for_select_countries_3y_with_summary = pd.concat(
    [vuln_for_select_countries_3y, grand_min_row, grand_max_row, grand_mean_row], ignore_index=True
)

# Ensure the columns appear as full integers, not scientific notations

columns_to_format = [1997, 2002, 2007]

vuln_for_select_countries_3y_with_summary[
    columns_to_format
] = vuln_for_select_countries_3y_with_summary[columns_to_format].applymap(lambda x: int(x))

# Optional: Replace the NaN values in the continent column with 'all'

vuln_for_select_countries_3y_with_summary["continent"].fillna("all", inplace=True)

# Create the canvas

canvas = toyplot.Canvas(width=700, height=500)

# Add the data

table = canvas.table(data=vuln_for_select_countries_3y_with_summary)

# Set alignment for population counts

table.cells.column[[2, 3, 4]].align = "right"

In [None]:
# Modify the original dataframe to include a summary column of average vulnerable populations

# Make a copy of the original df

vuln_for_select_countries_3y_with_summary = vuln_for_select_countries_3y.copy()

# Add a summary column for the average values across 1997, 2002, and 2007

vuln_for_select_countries_3y_with_summary["average"] = vuln_for_select_countries_3y_with_summary[
    [1997, 2002, 2007]
].mean(axis=1)

# Ensure the column uses full integers, not scientific notations

vuln_for_select_countries_3y_with_summary["average"] = vuln_for_select_countries_3y_with_summary[
    "average"
].apply(lambda x: int(x))

# Set alignment for population counts

table.cells.column[[5]].align = "right"

# Create the canvas

canvas = toyplot.Canvas(width=700, height=500)

# Add the data

table = canvas.table(data=vuln_for_select_countries_3y_with_summary)

In [None]:
# Change the width of the continent and country columns

# Create the canvas

canvas = toyplot.Canvas(width=700, height=500)

# Add the data

table = canvas.table(data=vuln_for_select_countries_3y)

# Specify width

table.cells.column[0].width = 200  # Set the width of the continent column to 200 pixels
table.cells.column[1].width = 200  # Set the width of the country column to 200 pixels

In [None]:
# Highlight the row with the largest vulnerable population in 1997 by changing the colour of the cell and the making the text bold

# Reset the index of the DataFrame to ensure alignment with row numbers

vuln_for_select_countries_3y = vuln_for_select_countries_3y.reset_index(drop=True)

# Find the row number of the highest value in the 1997 column

highest_1997_value = vuln_for_select_countries_3y[1997].max()  # Get the maximum value in the column

highest_1997_row = vuln_for_select_countries_3y[
    vuln_for_select_countries_3y[1997] == highest_1997_value
].index[
    0
]  # Get the row index of the max value

# Due to differences in indexing styles between pandas and toyplot, you need to specify the row below the one identified above

highest_1997_row = highest_1997_row + 1

# Create the canvas

canvas = toyplot.Canvas(width=700, height=500)

# Add the data

table = canvas.table(data=vuln_for_select_countries_3y)

# Highlight the desired row

table.cells.row[highest_1997_row].lstyle = {"font-weight": "bold", "fill": "#E2BC22"}

In [None]:
# Highlight the row where the country is 'Brazil' by changing the colour of the cell and making the text bold

# Reset the index of the DataFrame to ensure alignment with row numbers

vuln_for_select_countries_3y = vuln_for_select_countries_3y.reset_index(drop=True)

# Find the row number where the country is 'Brazil'

brazil_row = vuln_for_select_countries_3y[
    vuln_for_select_countries_3y["country"] == "Brazil"
].index[0]

# Due to differences in indexing styles between pandas and toyplot, you need to specify the row below the one identified above

brazil_row = brazil_row + 1

# Create the canvas

canvas = toyplot.Canvas(width=700, height=500)

# Add the data

table = canvas.table(data=vuln_for_select_countries_3y)

# Highlight the desired row

table.cells.row[brazil_row].lstyle = {"font-weight": "bold", "fill": "#E2BC22"}

In [None]:
# Add gridlines

# Create the Canvas

canvas = toyplot.Canvas(width=700, height=500)

# Add the Data

table = canvas.table(data=vuln_for_select_countries_3y)

# Set alignment for Continent and Country

table.cells.column[[0, 1]].align = "left"  # [[0,1]] only affects the first 2 columns.

# Set width for Country Column

table.cells.column[[1]].width = 150

# Set up the gridlines

table.cells.grid.vlines[..., [1, 2, 3, 4]] = "single"  # ALL rows, columns 1 to 4

table.cells.grid.hlines[1, ...] = "single"  # ROW 1 , ALL columns

table.cells.grid.style = {"stroke-width": "1", "stroke": "grey"}

In [None]:
# Create the Canvas

canvas = toyplot.Canvas(width=700, height=500)

# Add the Data

table = canvas.table(data=vuln_for_select_countries_3y)

# Add whitespace between continents

table.body.gaps.rows[[4, 9, 14, 19]] = "0.4cm"

In [None]:
# Modify the original dataframe to give rows a 'grouped' look

# Make a copy of the original df

vuln_for_select_countries_3y_grouped = vuln_for_select_countries_3y.copy()

# Reset the index

vuln_for_select_countries_3y_grouped.reset_index(inplace=True, drop=True)  # Reset the index

# Returns 'False' for first item, 'True' for subsequent

duplicate_continents = vuln_for_select_countries_3y_grouped["continent"].duplicated()

# Replaces 'True' values with blanks

vuln_for_select_countries_3y_grouped.loc[duplicate_continents, "continent"] = ""

# Create the Canvas

canvas = toyplot.Canvas(width=700, height=500)

# Add the Data

table = canvas.table(data=vuln_for_select_countries_3y_grouped)

# Add whitespace between continents

table.body.gaps.rows[[4, 9, 14, 19]] = "0.4cm"

In [None]:
# Export your table as HTML

toyplot.html.render(canvas, "D:/TEMP/test_python.html")