In [10]:
import numpy as np
import pandas as pd
from dotmap import DotMap
from pathlib import Path
from collections.abc import Mapping, Callable

from IPython.display import display

# Config

In [35]:
DATA = Path("./data")

# Load Standards

In [14]:
df_flags = pd.read_csv(
    DATA / "definitions_and_standards/flags.csv",
    keep_default_na=False,
    index_col="Flag"
)

# Load datas

## Helper Functions

In [16]:
# Helper function

def join_flags(df, filepath, df_flags):
    df["Flag"] = pd.read_csv(filepath, encoding="latin1", usecols=["Flag"], keep_default_na=False)
    return df.merge(df_flags, on="Flag").drop(columns="Flag").rename(columns={"Flags": "Flag"})

def split_on(df, column, rename={}, only_named=False):
    """
    Splits a dataframe in multiple dataframe based on the value of a columne
    
    Args:
        df: dataframe to spli
        column: name of the column to split on
        rename (optional): mapping or callable that rename values to name in the final dictionary
        only_named (False): keep only dataframe with a value in column that is renamed
    
    Returns:
        A dictionnary mapping unique values in df[column] to the dataframe with that value.
        Value (i.e. dict keys) can be renamed by the rename argument. A callable can return None to not rename it.
    """
    result = DotMap(_dynamic=False)
    for value in df[column].unique():
        if callable(rename):
            name = rename(value)
        elif isinstance(rename, Mapping):
            name = rename.get(value)
        else:
            name = None
        if name is None and only_named:
            continue
        result[name or value] = df[ df[column] == value].drop(columns=column)
    return result

## Animal emissions data

## Computed emissions data

In [33]:
# load base info, drop redundant year code column
df_env_emission_intensity = pd.read_csv(
    DATA / "global-food-agriculture-statistics/raw_files/Environment_Emissions_intensities_E_All_Data_(Normalized).csv",
    encoding="latin1",
).drop("Year Code", axis="columns")
# reload the flags to rpevent NaN collison on flags "" and "NA", and replace by their meaning
df_env_emission_intensity = join_flags(
    df_env_emission_intensity,
    DATA / "global-food-agriculture-statistics/raw_files/Environment_Emissions_intensities_E_All_Data_(Normalized).csv",
    df_flags
)

print(df_env_emission_intensity["Unit"].unique())
# Split on the Element column
dfs_env_emission_intensity = split_on(df_env_emission_intensity, "Element", rename=)

# Make all values the same unit and put unit in column title
# Emissions, intensity

for element, df in dfs_env_emission_intensity.items():
    print(f"\n\nTable {element}")
    display(df.head())
    print(df["Unit"].unique())

['kg CO2eq/kg product' 'gigagrams' 'tonnes']


Table Emissions intensity


Unnamed: 0,Area Code,Area,Item Code,Item,Element Code,Element,Year,Unit,Value,Flag
0,2,Afghanistan,1718,Cereals excluding rice,71761,Emissions intensity,1961,kg CO2eq/kg product,0.1191,Calculated data
1,2,Afghanistan,1718,Cereals excluding rice,71761,Emissions intensity,1962,kg CO2eq/kg product,0.1209,Calculated data
2,2,Afghanistan,1718,Cereals excluding rice,71761,Emissions intensity,1963,kg CO2eq/kg product,0.1261,Calculated data
3,2,Afghanistan,1718,Cereals excluding rice,71761,Emissions intensity,1964,kg CO2eq/kg product,0.1214,Calculated data
4,2,Afghanistan,1718,Cereals excluding rice,71761,Emissions intensity,1965,kg CO2eq/kg product,0.1204,Calculated data


['kg CO2eq/kg product']


Table Emissions (CO2eq)


Unnamed: 0,Area Code,Area,Item Code,Item,Element Code,Element,Year,Unit,Value,Flag
54,2,Afghanistan,1718,Cereals excluding rice,7231,Emissions (CO2eq),1961,gigagrams,402.2165,Calculated data
55,2,Afghanistan,1718,Cereals excluding rice,7231,Emissions (CO2eq),1962,gigagrams,408.3269,Calculated data
56,2,Afghanistan,1718,Cereals excluding rice,7231,Emissions (CO2eq),1963,gigagrams,385.7396,Calculated data
57,2,Afghanistan,1718,Cereals excluding rice,7231,Emissions (CO2eq),1964,gigagrams,406.7923,Calculated data
58,2,Afghanistan,1718,Cereals excluding rice,7231,Emissions (CO2eq),1965,gigagrams,410.094,Calculated data


['gigagrams']


Table Production


Unnamed: 0,Area Code,Area,Item Code,Item,Element Code,Element,Year,Unit,Value,Flag
540,2,Afghanistan,1017,"Meat, goat",5510,Production,1961,tonnes,12220.0,Calculated data
541,2,Afghanistan,1017,"Meat, goat",5510,Production,1962,tonnes,11375.0,Calculated data
542,2,Afghanistan,1017,"Meat, goat",5510,Production,1963,tonnes,10530.0,Calculated data
543,2,Afghanistan,1017,"Meat, goat",5510,Production,1964,tonnes,9750.0,Calculated data
544,2,Afghanistan,1017,"Meat, goat",5510,Production,1965,tonnes,11375.0,Calculated data


['tonnes']


## Trade data