In [1]:
# Install and read in packages needed to make a demonstration table

# If running in a .py file

#import subprocess
#import sys

#subprocess.check_call([sys.executable, "-m", "pip", "install", "toyplot"])

#import pandas as pd
#import toyplot
#import numpy as np

# If running in a .ipynb file

#pip install toyplot

import pandas as pd
import toyplot
import toyplot.html

def make_table_with_summary_columns():

    # Make sure figues appear as intended when using pandas

    pd.set_option('display.float_format', '{:.0f}'.format)
    vulnerable = pd.read_csv("D:/repos/analysis-for-action/data/vulnerable.csv")

    vuln_for_select_countries_3y = (vulnerable[vulnerable["year"].isin([1997, 2002, 2007])] 



        .filter(["country", "continent", "year", "vulnerable_pop"])  



        .pivot_table(index = ["continent","country"],  



        columns = "year", values = "vulnerable_pop") # Make the data "wider" so each year is a col 



        .reset_index() # Reset our index; so we don't have a multi index 



        .groupby("continent").head())

    vuln_for_select_countries_3y.columns.name = None #Removed the "year" label on the index 

    # Prevent figures from turning into scientific notations by ensuring they are full integers 

    vuln_for_select_countries_3y[1997] = vuln_for_select_countries_3y[1997].astype("Int64") 

    vuln_for_select_countries_3y[2002] = vuln_for_select_countries_3y[2002].astype("Int64") 

    vuln_for_select_countries_3y[2007] = vuln_for_select_countries_3y[2007].astype("Int64") 

    # Calculate grand summary rows for minimum, maximum, and mean 

    grand_min = vuln_for_select_countries_3y[[1997, 2002, 2007]].min() 

    grand_max = vuln_for_select_countries_3y[[1997, 2002, 2007]].max() 

    grand_mean = vuln_for_select_countries_3y[[1997, 2002, 2007]].mean() 

    # Create summary rows as DataFrames 

    grand_min_row = pd.DataFrame([grand_min], columns=[1997, 2002, 2007]) 

    grand_min_row["country"] = "min" 

    grand_max_row = pd.DataFrame([grand_max], columns=[1997, 2002, 2007]) 

    grand_max_row["country"] = "max" 

    grand_mean_row = pd.DataFrame([grand_mean], columns=[1997, 2002, 2007]) 

    grand_mean_row["country"] = "mean" 

    vuln_for_select_countries_3y_with_summary = pd.concat( 

            [vuln_for_select_countries_3y, grand_min_row, grand_max_row, grand_mean_row], 

            ignore_index=True
        )

    # Ensure the columns appear as full integers, not scientific notations 

    columns_to_format = [1997, 2002, 2007] 

    vuln_for_select_countries_3y_with_summary[columns_to_format] = vuln_for_select_countries_3y_with_summary[columns_to_format].map(lambda x: int(x)) 

    # Group by "continent" and calculate summmary sums and means for each continent 

    continent_sum = vuln_for_select_countries_3y.groupby("continent")[[1997, 2002, 2007]].sum() 

    continent_mean = vuln_for_select_countries_3y.groupby("continent")[[1997, 2002, 2007]].mean() 

        # Add a "continent" column and populate the "country" column with 'total' and 'mean' 

    continent_sum["continent"] = continent_sum.index 

    continent_sum["country"] = "sum" 

    continent_mean["continent"] = continent_mean.index 

    continent_mean["country"] = "mean" 

    # Reset the index to make the summary rows compatible with the original df 

    continent_sum = continent_sum.reset_index(drop=True) 

    # Append the summary rows to the original df 

    vuln_for_select_countries_3y_with_summary = pd.concat( 
        [vuln_for_select_countries_3y, continent_sum, continent_mean, grand_min_row, grand_max_row, grand_mean_row], 
        ignore_index=True
        ) 

    columns_to_format = [1997, 2002, 2007] 

    vuln_for_select_countries_3y_with_summary[columns_to_format] = vuln_for_select_countries_3y_with_summary[columns_to_format].map(lambda x: int(x)) 

    # Add a summary column for the average values across 1997, 2002, and 2007 

    vuln_for_select_countries_3y_with_summary["average"] = vuln_for_select_countries_3y_with_summary[[1997, 2002, 2007]].mean(axis=1) 

    # Ensure the column uses full integers, not scientific notations 

    vuln_for_select_countries_3y_with_summary["average"] = vuln_for_select_countries_3y_with_summary["average"].apply(lambda x: int(x)) 

    # Optional: Replace the NaN values in the continent column with 'all' 

    vuln_for_select_countries_3y_with_summary["continent"].fillna("all", inplace=True) 

    # Create the canvas

    canvas = toyplot.Canvas(width=700, height=500)

    # Add the Data

    table = canvas.table(data = vuln_for_select_countries_3y_with_summary)

    table
    
    # Save the table as an HTML file

    html_path = "D:/repos/analysis-for-action/output"

    toyplot.html.render(canvas, html_path + "/table_with_summary_columns.html")


make_table_with_summary_columns()

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  vuln_for_select_countries_3y_with_summary["continent"].fillna("all", inplace=True)
