In [3]:
import polars as pl
import altair as alt

# avoids errors from maximum allowed rows in altair
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [4]:
#read in emissions data
emissions = pl.read_csv('data/emissions_high_granularity.csv', skip_rows = 1).filter(pl.col("year") >= 2000)

    
emissions = emissions.with_columns((pl.col("total_emissions_MtCO2e") - pl.col("total_operational_emissions_MtCO2e"
                                                                   )).alias("non_operational_emissions_MtCO2e"))

In [5]:
def top_producers_commodity(df):
        # identify names of top producers 
        top_producers = df.group_by(["parent_entity"]
                    ).agg(pl.col("total_emissions_MtCO2e").sum()
                         ).sort("total_emissions_MtCO2e", descending = True
                               ).top_k(20, by = "total_emissions_MtCO2e")
    
        top_producer_names = top_producers.select("parent_entity").to_series().to_list()

        # find annual emissions of top producers

        top_producer_annual_emissions = df.filter(pl.col("parent_entity").is_in(top_producer_names))

        # map annual emissions of each top producer 

        chart = alt.Chart(top_producer_annual_emissions).mark_bar().encode(
            alt.X("parent_entity:N").sort("-y"),
            alt.Y("total_emissions_MtCO2e:Q"),
            alt.Color("commodity:N", title = "Commodity Type"))

        return chart

top_producers_commodity(emissions)