In [33]:
import re
import numpy as np
import pandas as pd
from dotmap import DotMap
from pathlib import Path
from collections.abc import Mapping, Callable

from IPython.display import display

# Config

In [34]:
DATA = Path("./data")

# Load Standards

In [35]:
df_flags = pd.read_csv(
    DATA / "definitions_and_standards/flags.csv",
    keep_default_na=False,
    index_col="Flag"
)

# Load datas

## Helper Functions

In [36]:
# Helper function

#snake case conversion
_first_cap_re = re.compile("(.)([A-Z][a-z]+)")
_all_cap_re = re.compile("([a-z0-9])([A-Z])")


def to_snakecase(name):
    s1 = _first_cap_re.sub(r"\1_\2", name)
    return _all_cap_re.sub(r"\1_\2", s1).lower().replace(" ", "_")

def join_flags(df, filepath, df_flags):
    df["Flag"] = pd.read_csv(filepath, encoding="latin1", usecols=["Flag"], keep_default_na=False)
    return df.merge(df_flags, on="Flag").drop(columns="Flag").rename(columns={"Flags": "Flag"})

def split_on(df, column, rename={}, only_named=False):
    """
    Splits a dataframe in multiple dataframe based on the value of a columne
    
    Args:
        df: dataframe to spli
        column: name of the column to split on
        rename (optional): mapping or callable that rename values to name in the final dictionary
        only_named (False): keep only dataframe with a value in column that is renamed
    
    Returns:
        A dictionnary mapping unique values in df[column] to the dataframe with that value.
        Value (i.e. dict keys) can be renamed by the rename argument. A callable can return None to not rename it.
    """
    result = DotMap(_dynamic=False)
    for value in df[column].unique():
        if callable(rename):
            name = rename(value)
        elif isinstance(rename, Mapping):
            name = rename.get(value)
        else:
            name = None
        if name is None and only_named:
            continue
        result[name or value] = df[ df[column] == value].drop(columns=column)
    return result

## Animal emissions data

## Computed emissions data

In [28]:
# load base info, drop redundant year code column
df_env_emission_intensity = pd.read_csv(
    DATA / "global-food-agriculture-statistics/raw_files/Environment_Emissions_intensities_E_All_Data_(Normalized).csv",
    encoding="latin1",
).drop("Year Code", axis="columns")

# reload the flags to rpevent NaN collison on flags "" and "NA", and replace by their meaning
df_env_emission_intensity = join_flags(
    df_env_emission_intensity,
    DATA / "global-food-agriculture-statistics/raw_files/Environment_Emissions_intensities_E_All_Data_(Normalized).csv",
    df_flags
)

# Split on the Element column
dfs_env_em_intensity = split_on(
    df_env_emission_intensity, "Element",
    rename=to_snakecase
)

# Make all values the same unit and put unit in column title
# Emissions, intensity - everything is in kg CO2eq / kg product
dfs_env_em_intensity['emissions_intensity'] = (
    dfs_env_em_intensity['emissions_intensity']
    .rename(columns={"Value": "Value (kg CO2eq/kg product)"})
    .drop(columns="Unit")
)
# emissions_(co2eq)
dfs_env_em_intensity['emissions_(co2eq)'] = (
    dfs_env_em_intensity['emissions_(co2eq)']
    .rename(columns={"Value": "Value (gigagrams)"})
    .drop(columns='Unit')
)
# production
dfs_env_em_intensity['production'] = (
    dfs_env_em_intensity['production']
    .rename(columns={"Value": "Value (tonnes)"})
    .drop(columns='Unit')
)
    
for table_name, df in dfs_env_em_intensity.items():
    print(f"\n\nTable {table_name}")
    display(df.sample(5))
    if table_name == "emissions_intensity":
        print(f"Available products:")
        for product in df['Item'].unique():
            print(f"\n - {product}")



Table emissions_intensity


Unnamed: 0,Area Code,Area,Item Code,Item,Element Code,Year,Value (kg CO2eq/kg product),Flag
164655,195,Senegal,1020,"Milk, whole fresh goat",71761,1981,10.7582,Calculated data
79413,89,Guatemala,27,"Rice, paddy",71761,2006,0.5748,Calculated data
76318,84,Greece,1058,"Meat, chicken",71761,1996,0.3304,Calculated data
135007,157,Nicaragua,882,"Milk, whole fresh cow",71761,1978,2.3764,Calculated data
156136,183,Romania,982,"Milk, whole fresh sheep",71761,2006,2.5335,Calculated data


Available products:

 - Cereals excluding rice

 - Rice, paddy

 - Meat, cattle

 - Milk, whole fresh cow

 - Meat, goat

 - Milk, whole fresh goat

 - Meat, sheep

 - Milk, whole fresh sheep

 - Milk, whole fresh camel

 - Meat, chicken

 - Eggs, hen, in shell

 - Milk, whole fresh buffalo

 - Meat, pig

 - Meat, buffalo


Table emissions_(co2eq)


Unnamed: 0,Area Code,Area,Item Code,Item,Element Code,Year,Value (gigagrams),Flag
134103,156,New Zealand,882,"Milk, whole fresh cow",7231,1992,8679.8396,Calculated data
15850,15,Belgium-Luxembourg,977,"Meat, sheep",7231,1989,47.9807,Calculated data
175203,206,Sudan (former),1718,Cereals excluding rice,7231,1982,600.4394,Calculated data
171553,202,South Africa,1718,Cereals excluding rice,7231,1986,2840.3413,Calculated data
188046,220,Trinidad and Tobago,1062,"Eggs, hen, in shell",7231,1980,12.0463,Calculated data




Table production


Unnamed: 0,Area Code,Area,Item Code,Item,Element Code,Year,Value (tonnes),Flag
338016,182,Réunion,1017,"Meat, goat",5510,1964,18.0,FAO estimate
304001,95,Honduras,882,"Milk, whole fresh cow",5510,1986,269338.0,Official data
141225,221,Oman,982,"Milk, whole fresh sheep",5510,1977,348.0,Calculated data
346418,226,Uganda,977,"Meat, sheep",5510,1982,7122.0,Unofficial figure
232411,52,Azerbaijan,1718,Cereals excluding rice,5510,2012,2753585.1,"Aggregate, may include official, semi-official..."


## Productions

In [52]:
# Load, remove duplicate column "year code"
df_prod_animals = pd.read_csv(
    DATA / "global-food-agriculture-statistics/raw_files/Production_LivestockPrimary_E_All_Data_(Normalized).csv",
    encoding="latin1"
).drop(columns="Year Code")

# Join the flag column with the standard meaning
df_prod_animals = join_flags(
    df_prod_animals,
    DATA / "global-food-agriculture-statistics/raw_files/Production_LivestockPrimary_E_All_Data_(Normalized).csv",
    df_flags
)

# Split according to Element column
dfs_prod_animals = split_on(
    df_prod_animals,
    "Element",
    rename=to_snakecase
)

for table_name, df in dfs_prod_animals.items():
    print(f"\n\nTable '{table_name}'")
    display(df.sample(5))
    display(df["Unit"].unique())



Table 'laying'


Unnamed: 0,Area Code,Area,Item Code,Item,Element Code,Year,Unit,Value,Flag
865954,133,Mali,1783,Eggs Primary,5313,2005,1000 Head,6500.0,"Aggregate, may include official, semi-official..."
90414,144,Mozambique,1062,"Eggs, hen, in shell",5313,1974,1000 Head,330.0,FAO estimate
22,2,Afghanistan,1062,"Eggs, hen, in shell",5313,1988,1000 Head,5500.0,FAO estimate
14724,19,Bolivia (Plurinational State of),1062,"Eggs, hen, in shell",5313,1971,1000 Head,2400.0,FAO estimate
22985,115,Cambodia,1091,"Eggs, other bird, in shell",5313,2010,1000 Head,820.0,FAO estimate


array(['1000 Head'], dtype=object)



Table 'production'


Unnamed: 0,Area Code,Area,Item Code,Item,Element Code,Year,Unit,Value,Flag
201524,112,Jordan,982,"Milk, whole fresh sheep",5510,2006,tonnes,84544.0,Official data
945618,5101,Eastern Africa,1094,"Meat indigenous, chicken",5323,1997,1000 Head,313115.0,"Aggregate, may include official, semi-official..."
93434,148,Nauru,1094,"Meat indigenous, chicken",5323,1989,1000 Head,5.0,FAO estimate
1006361,5303,Southern Asia,1806,Beef and Buffalo Meat,5510,2010,tonnes,4825429.0,"Aggregate, may include official, semi-official..."
773227,84,Greece,957,"Hides, buffalo, fresh",5510,1999,tonnes,,Data not available


array(['tonnes', '1000 No', 'Head', '1000 Head'], dtype=object)



Table 'producing__animals/_slaughtered'


Unnamed: 0,Area Code,Area,Item Code,Item,Element Code,Year,Unit,Value,Flag
704916,60,El Salvador,1017,"Meat, goat",5320,1977,Head,5000.0,Unofficial figure
161563,233,Burkina Faso,919,"Hides, cattle, fresh",5320,1992,Head,532467.0,Official data
902049,196,Seychelles,1806,Beef and Buffalo Meat,5320,1997,Head,210.0,"Aggregate, may include official, semi-official..."
99154,159,Nigeria,977,"Meat, sheep",5320,1967,Head,870000.0,FAO estimate
721725,16,Bangladesh,1017,"Meat, goat",5320,1997,Head,15437500.0,FAO data based on imputation methodology


array(['Head', '1000 Head'], dtype=object)



Table 'milk__animals'


Unnamed: 0,Area Code,Area,Item Code,Item,Element Code,Year,Unit,Value,Flag
246571,220,Trinidad and Tobago,882,"Milk, whole fresh cow",5318,2002,Head,14350.0,Official data
59891,175,Guinea-Bissau,1020,"Milk, whole fresh goat",5318,2011,Head,99500.0,FAO estimate
971107,5200,Americas,982,"Milk, whole fresh sheep",5318,2004,Head,1373914.0,"Aggregate, may include official, semi-official..."
124331,38,Sri Lanka,1020,"Milk, whole fresh goat",5318,1994,Head,206000.0,FAO estimate
183525,68,France,1020,"Milk, whole fresh goat",5318,2005,Head,841894.0,Official data


array(['Head'], dtype=object)



Table 'prod__popultn'


Unnamed: 0,Area Code,Area,Item Code,Item,Element Code,Year,Unit,Value,Flag
1042834,5500,Oceania,1182,"Honey, natural",5314,1970,No,8880.0,"Aggregate, may include official, semi-official..."
1056863,5504,Polynesia,1182,"Honey, natural",5314,1963,No,2870.0,"Aggregate, may include official, semi-official..."
954155,5103,Northern Africa,1182,"Honey, natural",5314,1994,No,0.0,"Aggregate, may include official, semi-official..."
1056392,5504,Polynesia,1183,Beeswax,5314,2001,No,,"Aggregate, may include official, semi-official..."
1021900,5400,Europe,987,"Wool, greasy",5319,1968,Head,17407700.0,"Aggregate, may include official, semi-official..."


array(['Head', 'No'], dtype=object)



Table 'yield'


Unnamed: 0,Area Code,Area,Item Code,Item,Element Code,Year,Unit,Value,Flag
308562,233,Burkina Faso,1780,"Milk,Total",5420,1974,hg/An,1037.0,Calculated data
401202,89,Guatemala,1025,"Skins, goat, fresh",5420,1991,hg/An,25.0,Calculated data
577977,210,Sweden,882,"Milk, whole fresh cow",5420,2012,hg/An,83369.0,Calculated data
263996,4,Algeria,1780,"Milk,Total",5420,1985,hg/An,1037.0,Calculated data
644662,5105,Western Africa,1020,"Milk, whole fresh goat",5420,1962,hg/An,559.0,Calculated data


array(['100mg/An', 'hg/An', 'hg'], dtype=object)



Table 'yield/_carcass__weight'


Unnamed: 0,Area Code,Area,Item Code,Item,Element Code,Year,Unit,Value,Flag
650383,5204,Central America,1808,"Meat, Poultry",5424,2012,0.1g/An,17163.0,Calculated data
523966,173,Poland,1035,"Meat, pig",5417,2006,hg/An,865.0,Calculated data
689732,5801,Least Developed Countries,1807,Sheep and Goat Meat,5417,1961,hg/An,112.0,Calculated data
290932,17,Bermuda,1807,Sheep and Goat Meat,5417,1994,hg/An,120.0,Calculated data
503030,159,Nigeria,977,"Meat, sheep",5417,2013,hg/An,100.0,Calculated data


array(['hg/An', '0.1g/An'], dtype=object)

In [51]:
dfs_prod_animals["production"]["Item"].unique()

array(['Eggs, hen, in shell', 'Eggs, hen, in shell (number)',
       'Honey, natural', 'Meat indigenous, camel',
       'Meat indigenous, cattle', 'Meat indigenous, chicken',
       'Meat indigenous, goat', 'Meat indigenous, sheep', 'Meat, cattle',
       'Meat, game', 'Meat, goat', 'Meat, sheep', 'Milk, whole fresh cow',
       'Milk, whole fresh goat', 'Milk, whole fresh sheep',
       'Silk-worm cocoons, reelable', 'Wool, greasy',
       'Eggs, other bird, in shell',
       'Eggs, other bird, in shell (number)', 'Meat indigenous, pig',
       'Meat, chicken', 'Meat, nes', 'Meat, pig',
       'Milk, whole fresh buffalo', 'Meat indigenous, horse',
       'Meat indigenous, rabbit', 'Meat indigenous, turkey',
       'Meat, camel', 'Meat, horse', 'Milk, whole fresh camel', 'Beeswax',
       'Meat indigenous, duck', 'Meat indigenous, geese',
       'Skins, sheep, with wool', 'Hides, cattle, fresh', 'Meat, duck',
       'Meat, turkey', 'Meat, goose and guinea fowl', 'Meat, rabbit',
       

## Trade data