In [1]:
import re
import numpy as np
import pandas as pd
from dotmap import DotMap
from pathlib import Path
from collections.abc import Mapping, Callable

from IPython.display import display

# Config

In [2]:
DATA = Path("./data")

# Load Standards

In [3]:
df_flags = pd.read_csv(
    DATA / "definitions_and_standards/flags.csv",
    keep_default_na=False,
    index_col="Flag"
)

# Load datas

## Helper Functions

In [8]:
# Helper function

#snake case conversion
_first_cap_re = re.compile("(.)([A-Z][a-z]+)")
_all_cap_re = re.compile("([a-z0-9])([A-Z])")


def to_snakecase(name):
    s1 = _first_cap_re.sub(r"\1_\2", name)
    return _all_cap_re.sub(r"\1_\2", s1).lower().replace(" ", "_")

def join_flags(df, filepath, df_flags):
    df["Flag"] = pd.read_csv(filepath, encoding="latin1", usecols=["Flag"], keep_default_na=False)
    return df.merge(df_flags, on="Flag").drop(columns="Flag").rename(columns={"Flags": "Flag"})

def split_on(df, column, rename={}, only_named=False):
    """
    Splits a dataframe in multiple dataframe based on the value of a columne
    
    Args:
        df: dataframe to spli
        column: name of the column to split on
        rename (optional): mapping or callable that rename values to name in the final dictionary
        only_named (False): keep only dataframe with a value in column that is renamed
    
    Returns:
        A dictionnary mapping unique values in df[column] to the dataframe with that value.
        Value (i.e. dict keys) can be renamed by the rename argument. A callable can return None to not rename it.
    """
    result = DotMap(_dynamic=False)
    for value in df[column].unique():
        if callable(rename):
            name = rename(value)
        elif isinstance(rename, Mapping):
            name = rename.get(value)
        else:
            name = None
        if name is None and only_named:
            continue
        result[name or value] = df[ df[column] == value].drop(columns=column)
    return result

## Animal emissions data

## Computed emissions data

In [28]:
# load base info, drop redundant year code column
df_env_emission_intensity = pd.read_csv(
    DATA / "global-food-agriculture-statistics/raw_files/Environment_Emissions_intensities_E_All_Data_(Normalized).csv",
    encoding="latin1",
).drop("Year Code", axis="columns")

# reload the flags to rpevent NaN collison on flags "" and "NA", and replace by their meaning
df_env_emission_intensity = join_flags(
    df_env_emission_intensity,
    DATA / "global-food-agriculture-statistics/raw_files/Environment_Emissions_intensities_E_All_Data_(Normalized).csv",
    df_flags
)

# Split on the Element column
dfs_env_em_intensity = split_on(
    df_env_emission_intensity, "Element",
    rename=to_snakecase
)

# Make all values the same unit and put unit in column title
# Emissions, intensity - everything is in kg CO2eq / kg product
dfs_env_em_intensity['emissions_intensity'] = (
    dfs_env_em_intensity['emissions_intensity']
    .rename(columns={"Value": "Value (kg CO2eq/kg product)"})
    .drop(columns="Unit")
)
# emissions_(co2eq)
dfs_env_em_intensity['emissions_(co2eq)'] = (
    dfs_env_em_intensity['emissions_(co2eq)']
    .rename(columns={"Value": "Value (gigagrams)"})
    .drop(columns='Unit')
)
# production
dfs_env_em_intensity['production'] = (
    dfs_env_em_intensity['production']
    .rename(columns={"Value": "Value (tonnes)"})
    .drop(columns='Unit')
)
    
for table_name, df in dfs_env_em_intensity.items():
    print(f"\n\nTable {table_name}")
    display(df.sample(5))
    if table_name == "emissions_intensity":
        print(f"Available products:")
        for product in df['Item'].unique():
            print(f"\n - {product}")



Table emissions_intensity


Unnamed: 0,Area Code,Area,Item Code,Item,Element Code,Year,Value (kg CO2eq/kg product),Flag
164655,195,Senegal,1020,"Milk, whole fresh goat",71761,1981,10.7582,Calculated data
79413,89,Guatemala,27,"Rice, paddy",71761,2006,0.5748,Calculated data
76318,84,Greece,1058,"Meat, chicken",71761,1996,0.3304,Calculated data
135007,157,Nicaragua,882,"Milk, whole fresh cow",71761,1978,2.3764,Calculated data
156136,183,Romania,982,"Milk, whole fresh sheep",71761,2006,2.5335,Calculated data


Available products:

 - Cereals excluding rice

 - Rice, paddy

 - Meat, cattle

 - Milk, whole fresh cow

 - Meat, goat

 - Milk, whole fresh goat

 - Meat, sheep

 - Milk, whole fresh sheep

 - Milk, whole fresh camel

 - Meat, chicken

 - Eggs, hen, in shell

 - Milk, whole fresh buffalo

 - Meat, pig

 - Meat, buffalo


Table emissions_(co2eq)


Unnamed: 0,Area Code,Area,Item Code,Item,Element Code,Year,Value (gigagrams),Flag
134103,156,New Zealand,882,"Milk, whole fresh cow",7231,1992,8679.8396,Calculated data
15850,15,Belgium-Luxembourg,977,"Meat, sheep",7231,1989,47.9807,Calculated data
175203,206,Sudan (former),1718,Cereals excluding rice,7231,1982,600.4394,Calculated data
171553,202,South Africa,1718,Cereals excluding rice,7231,1986,2840.3413,Calculated data
188046,220,Trinidad and Tobago,1062,"Eggs, hen, in shell",7231,1980,12.0463,Calculated data




Table production


Unnamed: 0,Area Code,Area,Item Code,Item,Element Code,Year,Value (tonnes),Flag
338016,182,Réunion,1017,"Meat, goat",5510,1964,18.0,FAO estimate
304001,95,Honduras,882,"Milk, whole fresh cow",5510,1986,269338.0,Official data
141225,221,Oman,982,"Milk, whole fresh sheep",5510,1977,348.0,Calculated data
346418,226,Uganda,977,"Meat, sheep",5510,1982,7122.0,Unofficial figure
232411,52,Azerbaijan,1718,Cereals excluding rice,5510,2012,2753585.1,"Aggregate, may include official, semi-official..."


## Trade data