In [19]:
import polars as pl
import altair as alt

# avoids errors from maximum allowed rows in altair
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [139]:
#read in emissions data
emissions = pl.read_csv('data/emissions_high_granularity.csv', skip_rows = 1).filter(pl.col("year") >= 2000)

    
emissions = emissions.with_columns((pl.col("total_emissions_MtCO2e") - pl.col("total_operational_emissions_MtCO2e"
                                                                   )).alias("non_operational_emissions_MtCO2e"))

emissions.head(10)

year,parent_entity,parent_type,reporting_entity,commodity,production_value,production_unit,product_emissions_MtCO2,flaring_emissions_MtCO2,venting_emissions_MtCO2,own_fuel_use_emissions_MtCO2,fugitive_methane_emissions_MtCO2e,fugitive_methane_emissions_MtCH4,total_operational_emissions_MtCO2e,total_emissions_MtCO2e,source,non_operational_emissions_MtCO2e
i64,str,str,str,str,f64,str,f64,f64,f64,f64,f64,f64,f64,f64,str,f64
2000,"""Abu Dhabi National Oil Company""","""State-owned Entity""","""Abu Dhabi National Oil Company""","""Oil & NGL""",695.4,"""Million bbl/yr""",258.290764,4.118351,0.989923,0.0,13.911111,0.496825,19.019385,277.310149,"""Oil & Gas Journal OGJ100 data …",258.290764
2001,"""Abu Dhabi National Oil Company""","""State-owned Entity""","""Abu Dhabi National Oil Company""","""Oil & NGL""",669.8,"""Million bbl/yr""",248.782217,3.966741,0.953481,0.0,13.398996,0.478536,18.319218,267.101435,"""Oil & Gas Journal OGJ100 data …",248.782217
2002,"""Abu Dhabi National Oil Company""","""State-owned Entity""","""Abu Dhabi National Oil Company""","""Oil & NGL""",616.9,"""Million bbl/yr""",229.133696,3.653452,0.878176,0.0,12.34076,0.440741,16.872388,246.006085,"""Oil & Gas Journal OGJ100 data …",229.133696
2003,"""Abu Dhabi National Oil Company""","""State-owned Entity""","""Abu Dhabi National Oil Company""","""Oil & NGL""",675.3,"""Million bbl/yr""",250.825069,3.999313,0.96131,0.0,13.509021,0.482465,18.469645,269.294714,"""Oil & Gas Journal OGJ100 data …",250.825069
2004,"""Abu Dhabi National Oil Company""","""State-owned Entity""","""Abu Dhabi National Oil Company""","""Oil & NGL""",713.6,"""Million bbl/yr""",265.050747,4.226137,1.015832,0.0,14.275192,0.509828,19.51716,284.567907,"""OGJ data for 2004""",265.050747
2005,"""Abu Dhabi National Oil Company""","""State-owned Entity""","""Abu Dhabi National Oil Company""","""Oil & NGL""",839.5,"""Million bbl/yr""",311.813484,4.971751,1.195054,0.0,16.793755,0.599777,22.960561,334.774044,"""Oil & Gas Journal, 17Sep07, OG…",311.813484
2006,"""Abu Dhabi National Oil Company""","""State-owned Entity""","""Abu Dhabi National Oil Company""","""Oil & NGL""",894.3,"""Million bbl/yr""",332.167717,5.296292,1.273064,0.0,17.890001,0.638929,24.459356,356.627073,"""Oil & Gas Journal, 17Sep07, OG…",332.167717
2007,"""Abu Dhabi National Oil Company""","""State-owned Entity""","""Abu Dhabi National Oil Company""","""Oil & NGL""",846.8,"""Million bbl/yr""",314.524905,5.014984,1.205446,0.0,16.939788,0.604992,23.160218,337.685123,"""Oil & Gas Journal, 21Sep09, OG…",314.524905
2008,"""Abu Dhabi National Oil Company""","""State-owned Entity""","""Abu Dhabi National Oil Company""","""Oil & NGL""",888.8,"""Million bbl/yr""",330.124865,5.263719,1.265234,0.0,17.779976,0.634999,24.30893,354.433795,"""OGJ100 6 Sep 2010 pg 69.""",330.124865
2009,"""Abu Dhabi National Oil Company""","""State-owned Entity""","""Abu Dhabi National Oil Company""","""Oil & NGL""",779.3,"""Million bbl/yr""",289.453541,4.61523,1.109358,0.0,15.589486,0.556767,21.314074,310.767615,"""Oil & Gas Journal, OGJ 100, 3O…",289.453541


# Annual CO2 Emissions

In [150]:
def annual_emissions(df):

    # find total annual emissions
    df = df.group_by("year"
                    ).agg(pl.col(["total_emissions_MtCO2e","non_operational_emissions_MtCO2e", "total_operational_emissions_MtCO2e"]).sum())

    total_emissions = alt.Chart(df.sort("year"), title = "Annual CO2 Emissions").transform_fold(
        ["total_operational_emissions_MtCO2e", "non_operational_emissions_MtCO2e" ],
    ).mark_bar().encode(
        alt.X("year:O").title("Year"),
        alt.Y("value:Q").title("Total CO2 Emissions (Mt)"),
        alt.Color("key:N", title = "Emission Type"),
    )

    mean_line = alt.Chart(df.sort("year"), title = "Average CO2 Emissions").mark_line(color = "red").encode(
        alt.X("year:O").title("Year"),
        alt.Y("mean(total_emissions_MtCO2e):Q").title("Total CO2 Emissions (Mt)")
    )
    return total_emissions + mean_line

annual_emissions(emissions)
    

## Annual Emissions by Entity Type

In [147]:
def emissions_by_entity_type(df):

    # find total emissions by year & parent_type
    total = df.group_by(["year", "parent_type"]
                    ).agg(pl.col("total_emissions_MtCO2e").sum()
                         ).pivot("parent_type", index = "year", values = "total_emissions_MtCO2e")
    
    operational = df.group_by(["year", "parent_type"]
                    ).agg(pl.col("total_operational_emissions_MtCO2e").sum()
                         ).pivot("parent_type", index = "year", values = "total_operational_emissions_MtCO2e")

    non_operational = df.group_by(["year", "parent_type"]
                    ).agg(pl.col("non_operational_emissions_MtCO2e").sum()
                         ).pivot("parent_type", index = "year", values = "non_operational_emissions_MtCO2e")

    # set colors for each entity
    color_scale = alt.Scale(domain=['Nation State', 'State-owned Entity', 'Investor-owned Company'], 
                            range=['red', 'blue', 'black'])

    # develop chart by entities
    total_emissions_by_types = alt.Chart(total.sort("year"), title = "Total Emissions by Entity Type").transform_fold(
        ['Nation State', 'State-owned Entity', 'Investor-owned Company'],
    ).mark_line().encode(
        alt.X("year:O").title("Year"),
        alt.Y("value:Q").title("Total CO2 Emissions"),
        alt.Color("key:N", scale = color_scale, title = "Entity Type")

    )
    op_emissions_by_types = alt.Chart(operational.sort("year"), title = "Operational Emissions by Entity Type").transform_fold(
        ['Nation State', 'State-owned Entity', 'Investor-owned Company'],
    ).mark_line().encode(
        alt.X("year:O").title("Year"),
        alt.Y("value:Q", scale=alt.Scale(domain=[0, 14000])).title("Total CO2 Emissions"),
        alt.Color("key:N", scale = color_scale, title = "Entity Type")

    )

    non_op_emissions_by_types = alt.Chart(non_operational.sort("year"), title = "Non-Operational Emissions by Entity Type").transform_fold(
        ['Nation State', 'State-owned Entity', 'Investor-owned Company'],
    ).mark_line().encode(
        alt.X("year:O").title("Year"),
        alt.Y("value:Q", scale=alt.Scale(domain=[0, 14000])).title("Total CO2 Emissions"),
        alt.Color("key:N", scale = color_scale, title = "Entity Type")

    )

    all_emissions_by_type = total_emissions_by_types | op_emissions_by_types | non_op_emissions_by_types
    """ below code uses the layering approach, does not allow for use of a legend """
    # find total emissions by year & parent_type
    # df = df.group_by(["year", "parent_type"]).agg(pl.col("total_emissions_MtCO2e").sum())
    # df = df.filter(pl.col("year") > 2000)
    # print(df.sort("year"))
    
    # # create Nation State Chart
    # nation_states = alt.Chart(df.filter(pl.col("parent_type") == "Nation State").sort("year"))
    # nation_states = nation_states.mark_line(color = "red").encode(
    #     alt.X("year:O", title = "Year"),
    #     alt.Y("total_emissions_MtCO2e:Q", title = "Total CO2 Emissions"),
    # )

    # # create State-Owned Entity Chart
    # state_owned = alt.Chart(df.filter(pl.col("parent_type") == "State-owned Entity").sort("year"))
    # state_owned = state_owned.mark_line(color = "blue").encode(
    #     alt.X("year:O", title = "Year"),
    #     alt.Y("total_emissions_MtCO2e:Q", title = "Total CO2 Emissions")
    # )

    # # create Investor-owned Company Chart
    # investor_owned = alt.Chart(df.filter(pl.col("parent_type") == "Investor-owned Company").sort("year"))
    # investor_owned = investor_owned.mark_line(color = "black").encode(
    #     alt.X("year:O", title = "Year"),
    #     alt.Y("total_emissions_MtCO2e:Q", title = "Total CO2 Emissions")
    # )

    # emissions_by_all_types = nation_states + state_owned + investor_owned
    
    return all_emissions_by_type 

emissions_by_entity_type(emissions)
    

## Emissions by Commodity

In [215]:
def emissions_by_commodity(df):

    # aggregate Emissions by commodity
    df1 = df.group_by(["year", "commodity"]).agg(pl.col("total_emissions_MtCO2e").sum())
    
    # set colors by commodity - NOT WORKING YET 
    # color_scale = alt.Scale(domain= 
    #                         ['Oil & NGL',
    #                          'Natural Gas',
    #                          'Anthracite Coal',
    #                          'Bituminous Coal',
    #                          'Lignite Coal',
    #                          'Metallurgical Coal',
    #                          'Sub- Bituminous Coal',
    #                          'Thermal Coal',
    #                          'Cement'], 
    #                         range=['red', 'orange', 'yellow', 'blue', 'green', 'indigo', 'violet','black', 'gray'])

    # develop chart 
    chart = alt.Chart(df1, title = "CO2 Emissions by Commodity").mark_line().encode(
        alt.X("year:O").title("Year"),
        alt.Y("total_emissions_MtCO2e:Q").title("Total CO2 Emissions"),
        alt.Color("commodity:N", title = "Commodity"),

    )

    return chart 

emissions_by_commodity(emissions)

## Operational Emissions by Type

In [120]:
def faceted_operational_emissions(df):

    # calculate annual operational emissions by type
    df = df.group_by("year").agg(pl.col(
        ["flaring_emissions_MtCO2", 
         "venting_emissions_MtCO2",
         "own_fuel_use_emissions_MtCO2",
         "fugitive_methane_emissions_MtCO2e"]).sum())

    # develop chart for each operational emission type
    flaring = alt.Chart(df, title = "Flaring Emissions").mark_area(color = "red").encode(
        alt.X("year:N", title = "Year"),
        alt.Y("flaring_emissions_MtCO2:Q", title = "CO2 (Mt)", scale=alt.Scale(domain=[0, 4000]))
    )

    venting = alt.Chart(df, title = "Venting Emissions").mark_area(color = "blue").encode(
        alt.X("year:N", title = "Year"),
        alt.Y("venting_emissions_MtCO2:Q", title = "CO2 (Mt)", scale=alt.Scale(domain=[0, 4000]))
    )

    own_fuel_use = alt.Chart(df, title = "Own Fuel Use Emissions").mark_area(color = "green").encode(
        alt.X("year:N", title = "Year"),
        alt.Y("own_fuel_use_emissions_MtCO2:Q", title = "CO2 (Mt)", scale=alt.Scale(domain=[0, 4000]))
    )

    fugitive_methane = alt.Chart(df, title = "Fugitive Methane Emissions").mark_area(color = "orange").encode(
        alt.X("year:N", title = "Year"),
        alt.Y("fugitive_methane_emissions_MtCO2e:Q", title = "CO2 (Mt)", scale=alt.Scale(domain=[0, 4000]))
    )

    # concatenate charts into a grid
    custom_title = alt.TitleParams('Annual Operational CO2 Emissions by Emission Type', anchor='middle')
    upper = flaring | venting
    lower = own_fuel_use | fugitive_methane
    chart = alt.vconcat(upper, lower).properties(title = custom_title)

    """ 
    Below code uses chart repeat, but I found color and positioning customizationto be a bit harder using the repeat function.
    """
    
    # color_scale = alt.Scale(domain= emission_types, 
    #                         range=['red', 'blue', 'orange', 'green'])

    
    # emission_types = ["flaring_emissions_MtCO2", 
                      # "venting_emissions_MtCO2",
                      # "own_fuel_use_emissions_MtCO2",
                      # "fugitive_methane_emissions_MtCO2e"]
    
    # chart = alt.Chart(df).mark_area().encode(
    #     alt.X("year:N", title = "Year"),
    #     alt.Y(alt.repeat("row"), type='quantitative',  scale=alt.Scale(domain=[0, 4000])),
    #     #alt.Color(["flaring_emissions_MtCO2:N", 
    #                # "venting_emissions_MtCO2:N",
    #                # "own_fuel_use_emissions_MtCO2:N",
    #                # "fugitive_methane_emissions_MtCO2e:N"], scale = color_scale, title = "Emission Type")
    # ).repeat(row= emission_types)

    return chart

faceted_operational_emissions(emissions)

In [83]:
emissions.group_by("year").agg(pl.col(
        ["flaring_emissions_MtCO", 
         "venting_emissions_MtCO2",
         "own_fuel_use_emissions_MtCO2",
         "fugitive_methane_emissions_MtC O2e"]).sum())

year,flaring_emissions_MtCO2,venting_emissions_MtCO2,own_fuel_use_emissions_MtCO2,fugitive_methane_emissions_MtCO2e
i64,f64,f64,f64,f64
2012,160.402866,181.138054,290.394173,3621.920123
2001,139.965971,135.077002,206.587565,2369.501442
2002,137.046999,139.258209,216.532335,2436.533937
2016,172.961912,187.729448,297.670394,3524.391655
2008,157.697275,168.884343,266.761831,3205.597879
…,…,…,…,…
2013,160.816933,183.981517,295.982687,3678.035996
2015,170.85474,187.740581,298.724643,3655.382701
2011,155.940857,175.470287,281.035147,3488.17203
2009,154.419521,165.052948,260.563147,3226.306665


In [121]:
def op_emissions_by_commodity(df):

    # aggregate Emissions by commodity
    df = df.group_by(["year", "commodity"]).agg(pl.col(["flaring_emissions_MtCO2", 
         "venting_emissions_MtCO2",
         "own_fuel_use_emissions_MtCO2",
         "fugitive_methane_emissions_MtCO2e"]).sum())
    
    # set colors by commodity - NOT WORKING YET 
    # color_scale = alt.Scale(domain= 
    #                         ['Oil & NGL',
    #                          'Natural Gas',
    #                          'Anthracite Coal',
    #                          'Bituminous Coal',
    #                          'Lignite Coal',
    #                          'Metallurgical Coal',
    #                          'Sub- Bituminous Coal',
    #                          'Thermal Coal',
    #                          'Cement'], 
    #                         range=['red', 'orange', 'yellow', 'blue', 'green', 'indigo', 'violet','black', 'gray'])

    # develop chart 

    flaring = alt.Chart(df, title = "Flaring Emissions").mark_area().encode(
        alt.X("year:O").title("Year"),
        alt.Y("flaring_emissions_MtCO2:Q").title("CO2 (Mt)").stack("normalize"),
        alt.Color("commodity", title = "Commodity"),
    )

    venting = alt.Chart(df, title = "Venting CO2 Emissions").mark_area().encode(
        alt.X("year:O").title("Year"),
        alt.Y("venting_emissions_MtCO2:Q").title("CO2 (Mt)").stack("normalize"),
        alt.Color("commodity", title = "Commodity"),
    )


    own_fuel_use = alt.Chart(df, title = "Own Fuel Use Emissions").mark_area().encode(
        alt.X("year:O").title("Year"),
        alt.Y("own_fuel_use_emissions_MtCO2:Q").title("CO2 (Mt)").stack("normalize"),
        alt.Color("commodity", title = "Commodity"),
    )
    
    fugitive_methane = alt.Chart(df, title = "Fugitive Methane Emissions").mark_area().encode(
        alt.X("year:O").title("Year"),
        alt.Y("fugitive_methane_emissions_MtCO2e:Q").title("CO2 (Mt)").stack("normalize"),
        alt.Color("commodity", title = "Commodity")
    )

    # concatenate charts into a grid
    custom_title = alt.TitleParams('Commodity Distribution by Operational Emission Type', anchor='middle')
    upper = flaring | venting
    lower = own_fuel_use | fugitive_methane
    chart = alt.vconcat(upper, lower).properties(title = custom_title)

    return chart

fugitive_emissions_by_commodity(emissions)

In [199]:
def top_emissions_producers(df):

    # find top 20 emission producers of past 20 years
    df = df.group_by(["parent_entity","parent_type"]
                    ).agg(pl.col("total_emissions_MtCO2e").sum()
                         ).sort("total_emissions_MtCO2e", descending = True
                               ).top_k(20, by = "total_emissions_MtCO2e")
    
    chart = alt.Chart(df, title = "Top 20 Emissions Producers Since 2000").mark_bar().encode(
        alt.X("parent_entity:N").sort("-y").title("Organization"),
        alt.Y("total_emissions_MtCO2e:Q").title("Total CO2 Emissions (Mt)"),
        alt.Color("parent_type:N", title = "Entity Type"),
    )

    return chart

top_emissions_producers(emissions)
    

In [221]:
def top_producers_by_year(df):

        # identify names of top producers 
        top_producers = df.group_by(["parent_entity"]
                    ).agg(pl.col("total_emissions_MtCO2e").sum()
                         ).sort("total_emissions_MtCO2e", descending = True
                               ).top_k(20, by = "total_emissions_MtCO2e")
    
        top_producer_names = top_producers.select("parent_entity").to_series().to_list()

        # find annual emissions of top producers

        top_producer_annual_emissions = df.filter(pl.col("parent_entity").is_in(top_producer_names))

        # map annual emissions of each top producer 

        chart = alt.Chart(top_producer_annual_emissions, title = "Annual Emissions by Top Producers").mark_rect().encode(
            alt.X("year:N", title = "Year"),
            alt.Y("parent_entity:N", title = "Parent Entity"),
            alt.Color("total_emissions_MtCO2e:Q", title = "Total CO2 Emissions (Mt)", scale = alt.Scale(domain=[0, 2800]))
        )

        return chart


top_producers_by_year(emissions)

In [234]:
def top_producers_commodity(df):
        # identify names of top producers 
        top_producers = df.group_by(["parent_entity"]
                    ).agg(pl.col("total_emissions_MtCO2e").sum()
                         ).sort("total_emissions_MtCO2e", descending = True
                               ).top_k(20, by = "total_emissions_MtCO2e")
    
        top_producer_names = top_producers.select("parent_entity").to_series().to_list()

        # find annual emissions of top producers

        top_producer_annual_emissions = df.filter(pl.col("parent_entity").is_in(top_producer_names))

        # map annual emissions of each top producer 

        chart = alt.Chart(top_producer_annual_emissions).mark_bar().encode(
            alt.X("parent_entity:N").sort("-y"),
            alt.Y("total_emissions_MtCO2e:Q"),
            alt.Color("commodity:N", title = "Commodity Type"))

        return chart

top_producers_commodity(emissions)
    

In [244]:
import geopandas as gpd
def top_producers_location(df):
    url = "https://naciscdn.org/naturalearth/110m/cultural/ne_110m_admin_0_countries.zip"
    gdf_ne = gpd.read_file(url)  # zipped shapefile
    gdf_ne = gdf_ne[["NAME", "CONTINENT", "POP_EST", 'geometry']][:21]

    basemap = alt.Chart(gdf_ne).mark_geoshape(
        fill='lightgrey', stroke='white', strokeWidth=0.5
    ).project(
    type='albers'
    )

    bubbles = alt.Chart(gdf_ne).transform_calculate(
    centroid=alt.expr.geoCentroid(None, alt.datum)).mark_circle(
    stroke='black').encode(
    longitude='centroid[0]:Q',
    latitude='centroid[1]:Q',
    # size="POP_EST:Q"
    )

    chart = (basemap + bubbles).project(type='identity', reflectY=True)

    return chart

top_producers_location(emissions)