# FAO

Find out which fats people eat

In [None]:
import datapackage
import geopandas as gpd
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
import numpy as np
import pandas as pd

%matplotlib inline

plt.rcParams["figure.figsize"] = (20,10)

## Load in the data

In [None]:
countries = pd.read_csv('data/FAOSTAT_data_10-25-2019.csv')

In [None]:
items = pd.read_csv('data/FAOSTAT_ct_fat_items.csv')
sorted(items['Item'].unique())

In [None]:
items = items[['Country Code', 'Country', 'Year', 'Item', 'Value']]

## Aggregate items

1. Set up a dictionary which maps items to new names
2. Sum over the aggregated items
3. For each country code, country & year pick the item with the maximum value

In [None]:
aggregate = {'Butter, Ghee': 'Dairy',
             'Cream': 'Dairy',
             'Milk - Excluding Butter': 'Dairy',
             'Oilcrops Oil, Other': 'Oilcrops, Other',
             'Cottonseed Oil': 'Oilcrops, Other',
             'Oilcrops, Other': 'Oilcrops, Other',
             'Groundnut Oil': 'Groundnuts',
             'Groundnuts (Shelled Eq)': 'Groundnuts',
             'Maize Germ Oil': 'Oilcrops, Other',
             'Maize and products': 'Cereals',
             'Wheat and products': 'Cereals', 
             'Rice (Milled Equivalent)': 'Cereals',
             'Barley and products': 'Cereals', 
             'Maize and products': 'Cereals', 
             'Millet and products': 'Cereals',
             'Cereals, Other': 'Cereals',
             'Oats': 'Cereals'
            }

In [None]:
items['Item_Dairy'] = items['Item'].apply(lambda x: x if x not in aggregate else aggregate[x])

In [None]:
items = items.groupby(by=['Country Code', 'Country', 'Year', 'Item_Dairy']).sum().reset_index()

In [None]:
idx = items.groupby(
    by=['Country Code', 'Country', 'Year'])['Value'].transform(max) == items['Value']
max_items = items[idx]

In [None]:
max_items

In [None]:
item_names = max_items['Item_Dairy'].unique()
mapping = {name: idx for idx, name in enumerate(item_names)}
if np.nan not in mapping.keys():
    mapping[np.nan] = len(mapping)

# Join with countries to get 3-letter countrycode

1. Set country code as the index for both dataframes
2. Join on country code to obtain ISO3 country code
3. Import country outlines. Download country outlines from https://datahub.io/core/geo-countries#resource-geo-countries_zip
4. Join items with outlines on ISO3 index

In [None]:
countries = countries.reset_index().set_index('Country Code')

In [None]:
max_items = max_items.reset_index().set_index('Country Code')

In [None]:
max_items = max_items.join(countries[['ISO3 Code']])

In [None]:
max_items = max_items.set_index('ISO3 Code')

In [None]:
package = datapackage.Package('data/geo-countries_zip.zip')

In [None]:
countries_path = package.get_resource('countries').source

In [None]:
world = gpd.GeoDataFrame.from_file(countries_path)

In [None]:
world = world.set_index('ISO_A3')

In [None]:
max_items = world.join(max_items)

In [None]:
# add an integer column to signify fat source
max_items['coding'] = max_items['Item_Dairy'].apply(lambda x: mapping[x])
max_items.sample(5)

# Plot the map

1. Set the colour scheme

In [None]:
colours = ['#a6cee3',
           '#1f78b4',
           '#b2df8a',
           '#33a02c',
           '#fb9a99',
           '#e31a1c',
           '#fdbf6f',
           '#ff7f00',
           '#cab2d6',
           '#6a3d9a',
           '#ffff99',
           '#b15928',
           '#a6cee3',
           '#1f78b4',
           '#b2df8a',
           '#ffffff']

cmap = LinearSegmentedColormap.from_list(
    'mycmap', [(x, y) for x, y in zip(np.linspace(0,1, len(colours)), colours)])

In [None]:
fig, ax = plt.subplots(1, 1)
ax = plt.gca()

max_items.plot(ax=ax,
               column='coding', 
               cmap=cmap, 
               categorical=True, 
               legend=True,
               edgecolor='#000000', # colour of the country outlines
               legend_kwds={'loc': 'lower left'}
              )

def replace_legend_items(legend, mapping):
    for txt in legend.texts:
        for k,v in mapping.items():
            if txt.get_text() == str(k):
                txt.set_text(v)

legend_dict = {v: k for k, v in mapping.items()}
                
replace_legend_items(ax.get_legend(), legend_dict)

plt.show()
fig.savefig('fats.pdf')

# Export data to a csv file

In [None]:
summary = max_items[['Country', 'Item_Dairy']].groupby('Item_Dairy')['Country'].apply(lambda x: "%s" % ', '.join(x))

In [None]:
summary.reset_index().to_csv('sources_of_fat.csv', index=False)