In [3]:
import pandas as pd

In [4]:
df = pd.read_csv('../_data/summary_with_pop.csv')

In [5]:
df.columns

Index(['country', 'year', 'population', 'beef (kg/capita)', 'pig (kg/capita)',
       'poultry (kg/capita)', 'sheep (kg/capita)', 'maize (1000 tonnes)',
       'rice (1000 tonnes)', 'soy (1000 tonnes)', 'wheat (1000 tonnes)'],
      dtype='object')

In [6]:
cols = [col.split(' ')[0] for col in df.columns] 

In [7]:
df.columns = cols

### Quantifying CO2 emissions

Data from: https://www.visualcapitalist.com/visualising-the-greenhouse-gas-impact-of-each-food/

Units: kg CO2 equivalents per kg of product

In [8]:
emissions = {
    'beef': 60,
    'sheep': 24,
    'pig': 7,
    'poultry': 6,
    'maize': 1,
    'rice': 4,
    'soy': 0.9,
    'wheat': 1.4,
}

emissions = pd.Series(emissions)

In [9]:
MEATS = ['beef', 'pig', 'poultry', 'sheep']
PLANTS = ['maize', 'rice', 'soy', 'wheat']

In [10]:
emdf = df.copy().drop(PLANTS, axis=1)

In [11]:
drop_countries = ['KAZ']
emdf = emdf.loc[~emdf.country.isin(drop_countries)]

In [12]:
for meat in MEATS:
    emdf[meat + '_emissions'] = emdf[meat] * emdf.population * emissions.loc[meat]

In [12]:
# for plant in PLANTS:
#     emdf[plant + '_emissions'] = emdf[plant] * emissions.loc[plant]

In [13]:
# emdf['total'] = emdf[MEATS + PLANTS].sum(axis=1)
emdf['total_meat_consumption'] = emdf[MEATS].sum(axis=1)
emdf['total_meat_emissions'] = emdf[[f'{m}_emissions' for m in MEATS]].sum(axis=1)

Final units:

 - `population`: millions
 - meat consumption: kg/capita
 - emissions: 1000 tons CO2 equivalents

In [17]:
countries = pd.Series({
    'ARG': 'Argentina',
    'AUS': 'Australia',
    'BRA': 'Brazil',
    'CAN': 'Canada',
    'CHL': 'Chile',
    'CHN': 'China',
    'COL': 'Colombia',
    'EGY': 'Egypt',
    'ETH': 'Ethiopia',
    'IDN': 'India',
    'IND': 'Indonesia',
    'IRN': 'Iran',
    'ISR': 'Israel',
    'JPN': 'Japan',
    'KAZ': 'Kazakhstan',
    'KOR': 'South Korea',
    'MEX': 'Mexico',
    'MYS': 'Malaysia',
    'NGA': 'Nigeria',
    'NZL': 'New Zealand',
    'PAK': 'Pakistan',
    'PER': 'Peru',
    'PHL': 'Phillipines',
    'PRY': 'Paraguay',
    'RUS': 'Russia',
    'SAU': 'Saudi Arabia',
    'THA': 'Thailand',
    'TUR': 'Turkey',
    'UKR': 'Ukraine',
    'USA': 'United States',
    'VNM': 'Vietnam',
    'WLD': 'World',
    'ZAF': 'South Africa',
})

In [24]:
region = pd.Series({
    'ARG': 'Latin America',
    'AUS': 'Oceania',
    'BRA': 'Latin America',
    'CAN': 'North America',
    'CHL': 'Latin America',
    'CHN': 'East Asia',
    'COL': 'Latin America',
    'EGY': 'Africa',
    'ETH': 'Africa',
    'IDN': 'South Asia',
    'IND': 'South Asia',
    'IRN': 'Middle East',
    'ISR': 'Middle East',
    'JPN': 'East Asia',
#     'KAZ': 'Kazakhstan',
    'KOR': 'East Asia',
    'MEX': 'Latin America',
    'MYS': 'South Asia',
    'NGA': 'Africa',
    'NZL': 'Oceania',
    'PAK': 'South Asia',
    'PER': 'Latin America',
    'PHL': 'South Asia',
    'PRY': 'Latin America',
    'RUS': 'East Europe',
    'SAU': 'Middle East',
    'THA': 'South Asia',
    'TUR': 'Middle East',
    'UKR': 'East Europe',
    'USA': 'North America',
    'VNM': 'South Asia',
    'WLD': 'World',
    'ZAF': 'Africa',
})

In [25]:
emdf['country_name'] = countries.reindex(emdf.country.values).values
emdf['region'] = region.reindex(emdf.country.values).values

In [26]:
emdf

Unnamed: 0,country,year,population,beef,pig,poultry,sheep,beef_emissions,pig_emissions,poultry_emissions,sheep_emissions,total_meat_consumption,total_meat_emissions,country_name,region
0,ARG,1990,32.580854,55.559,3.674,8.660,2.282,108609.580043,837.914403,1692.901174,1784.388212,70.175,112924.783832,Argentina,Latin America
1,ARG,1991,33.028546,51.817,3.612,8.880,2.347,102686.410085,835.093757,1759.760931,1860.431939,66.656,107141.696712,Argentina,Latin America
2,ARG,1992,33.475005,50.980,4.487,10.027,1.894,102393.345294,1051.416432,2013.923251,1521.639827,67.388,106980.324804,Argentina,Latin America
3,ARG,1993,33.917440,51.678,4.851,15.442,1.869,105167.127859,1151.734510,3142.518651,1521.400689,73.840,110982.781709,Argentina,Latin America
4,ARG,1994,34.353066,48.594,4.837,18.392,2.374,100161.173352,1163.160462,3790.929539,1957.300288,74.197,107072.563642,Argentina,Latin America
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
952,ZAF,2014,54.166787,12.585,3.365,34.106,3.273,40901.340864,1275.898668,11084.474625,4254.909452,53.329,57516.623608,South Africa,Africa
953,ZAF,2015,55.055626,13.099,3.523,33.924,2.999,43270.418698,1357.726793,11206.242339,3962.683737,53.545,59797.071567,South Africa,Africa
954,ZAF,2016,55.946872,13.132,3.454,33.950,2.957,44081.659386,1352.683471,11396.377826,3970.437612,53.493,60801.158296,South Africa,Africa
955,ZAF,2017,56.837474,12.067,3.388,33.371,2.680,41151.467925,1347.957533,11380.340069,3655.786328,51.506,57535.551856,South Africa,Africa


In [27]:
emdf.to_csv('../_data/emissions.csv', index=False)

In [30]:
emdf.loc[emdf.country != 'WLD'].population.describe()

count     899.000000
mean      151.740338
std       289.833600
min         3.398000
25%        29.925212
50%        63.136000
75%       127.381444
max      1415.045928
Name: population, dtype: float64