In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go

import os

plt.style.use('ggplot')

In [2]:
os.chdir('dataframes_pkl')

In [3]:
df = pd.read_pickle('df_global_format.pkl')
coord_df = pd.read_pickle('df_coordenadas.pkl')

In [4]:
uk_index = df[df['Area'] == 'United Kingdom'].index

df.loc[uk_index, 'Area'] = 'United Kingdom of Great Britain and Northern Ireland'

In [5]:
df.head()

Unnamed: 0,Area Code,Area,Partner Country Code,Partner Countries,Item Code,Item,Element Code,Element,Year,Unit,Value
0,2,Afghanistan,4,Algeria,230,"Cashew nuts, shelled",5910,Export Quantity,2016,tonnes,3.0
1,2,Afghanistan,4,Algeria,230,"Cashew nuts, shelled",5922,Export Value,2016,1000 US$,23.0
2,2,Afghanistan,9,Argentina,56,Maize,5610,Import Quantity,2017,tonnes,130.0
3,2,Afghanistan,9,Argentina,56,Maize,5622,Import Value,2017,1000 US$,68.0
4,2,Afghanistan,9,Argentina,1058,"Meat, chicken",5610,Import Quantity,2014,tonnes,974.0


In [6]:
df['Element'].unique()

array(['Export Quantity', 'Export Value', 'Import Quantity',
       'Import Value', 'Production', 'Loss', 'Processed',
       'Food supply quantity (tonnes)', 'Feed', 'Seed',
       'Other uses (non-food)', 'Tourist consumption',
       'Emissions intensity', 'Emissions (CO2eq)',
       'Total Population - Both sexes', 'Rural population',
       'Urban population', 'Export Transport Emissions Quantity',
       'Import Transport Emissions Quantity',
       'Import Production Emissions Quantity'], dtype=object)

In [7]:
df['Item'].unique()

array(['Cashew nuts, shelled', 'Maize', 'Meat, chicken',
       'Milk, whole dried', 'Chick peas',
       'Meat, cattle, boneless (beef & veal)', 'Meat, sheep', 'Nuts nes',
       'Nuts, prepared (exc. groundnuts)', 'Cheese, processed',
       'Cheese, whole cow milk', 'Chocolate products nes',
       'Lemons and limes', 'Walnuts, shelled',
       'Buttermilk, curdled, acidified milk', 'Milk, whole condensed',
       'Milk, whole fresh cow', 'Rice, paddy', 'Brazil nuts, shelled',
       'Cashew nuts, with shell', 'Grapes', 'Groundnuts, shelled',
       'Milk, whole evaporated', 'Wine', 'Apples',
       'Cabbages and other brassicas', 'Oranges', 'Strawberries',
       'Tomatoes', 'Coffee, roasted', 'Eggs, hen, in shell',
       'Groundnuts, prepared', 'Meat, beef, preparations', 'Meat, pig',
       'Peas, green', 'Bananas', 'Milk, skimmed dried', 'Rice, milled',
       'Soybeans', 'Sweet potatoes', 'Coffee, green', 'Butter, cow milk',
       'Oats', 'Potatoes', 'Coffee, extracts',
     

In [8]:
def reparto_suministros(df, pais, alimento, año = None, ratio_pop = False):
    elements = ['Production', 'Import Quantity', 'Export Quantity']

    graph_df = df[(df['Area'] == pais) & (df['Partner Countries'] == pais) & (df['Item'] == alimento) & (df['Element'].isin(elements))]
    
    if año:
        graph_df = graph_df[graph_df['Year'] == año]
    else:
        graph_df = graph_df.groupby(['Area', 'Item', 'Element']).agg({'Unit': lambda x: x.mode()[0], 'Value': 'sum'}, axis = 1).reset_index()
    
    if ratio_pop:

        pop_df = df[(df['Element'] == 'Total Population - Both sexes') & (df['Area'] == pais)].copy()
        
        if año:
            pop_df = pop_df[pop_df['Year'] == año]
        else:
            pop_df = pop_df.groupby(['Area', 'Element']).agg({'Unit': lambda x: x.mode()[0], 'Value': 'mean'}, axis = 1).reset_index()
        
        population = pop_df.loc[pop_df.index[0], 'Value']

        graph_df.loc[:, 'Value'] = graph_df['Value'].apply(lambda x: x / population)

    title = f'{alimento} supply distribution in {pais} during {año}' if año else f'{alimento} supply distribution in {pais} during {df["Year"].min()}-{df["Year"].max()}'
    fig = px.pie(data_frame = graph_df, names = 'Element', values = 'Value', title = title)
    fig.update_traces(textposition = 'inside', textinfo = 'percent+label')
    fig.show()


In [9]:
reparto_suministros(df, 'Netherlands', 'Meat, cattle', año = 2015, ratio_pop = True)

In [10]:
def sankey(df, pais, alimento, año = None):

    graph_df = df[df['Area'] == df['Partner Countries']]

    elements = ['Export Quantity', 'Import Quantity', 'Production', 'Loss', 'Processed', \
        'Food supply quantity (tonnes)', 'Feed', 'Seed','Other uses (non-food)']

    graph_df = graph_df[(graph_df['Area'] == pais) & (graph_df['Item'] == alimento) & (graph_df['Element'].isin(elements))]

    if año:
        graph_df = graph_df[graph_df['Year'] == año]
    else:
        graph_df = graph_df.groupby(['Area', 'Item', 'Element']).agg({'Unit': lambda x: x.mode()[0], 'Value': 'sum'}, axis = 1).reset_index()
    
    sources = ['Import Quantity', 'Production']

    targets = [element for element in elements if element not in sources]

    labels = list(graph_df['Element'].unique()) + ['Supply']
    color = 'blue'

    graph_df['Source'] = graph_df['Element'].apply(lambda x: labels.index(x) if x in sources else labels.index('Supply'))
    graph_df['Target'] = graph_df['Element'].apply(lambda x: labels.index(x) if x in targets else labels.index('Supply'))

    title = f'{alimento} supply flow in {pais} during {año}' if año else f'{alimento} supply flow in {pais} during {df["Year"].min()}-{df["Year"].max()}'
    
    fig = go.Figure(data = [go.Sankey(
    arrangement = "snap",
    valueformat = ".2s",
    valuesuffix = "Tonnes",
    # Define nodes
    node = dict(
        pad = 15,
        thickness = 15,
        line = dict(color = "black", width = 0.5),
        label =  labels,
    ),
        link = dict(
        source =  graph_df['Source'],
        target =  graph_df['Target'],
        value =  graph_df['Value'],
    ))])
    fig.update_layout(title = title)
    fig.show()

In [11]:
sankey(df, 'Netherlands', 'Potatoes')

In [12]:
def regplot_emissions(df, alimento, año = None):
    elements = ['Emissions (CO2eq)', 'Production']
    graph_df = df[(df['Element'].isin(elements)) & (df['Item'] == alimento) & (df['Area'] == df['Partner Countries'])]

    if año:
        graph_df = graph_df[graph_df['Year'] == año]
    else:
        graph_df = graph_df.groupby(['Area', 'Item', 'Element']).agg({'Unit': lambda x: x.mode()[0], 'Value': 'sum'}, axis = 1).reset_index()
    
    graph_df = graph_df.pivot_table(values = 'Value', index = 'Area', columns = ['Element'])
    graph_df.columns = ['CO2 Emissions [gigagrams]', 'Production [tonnes]']
    title = f'CO2 emissions derived from {alimento} production during {año}' if año else f'CO2 emissions derived from {alimento} production during {df["Year"].min()}-{df["Year"].max()}'
    fig = px.scatter(
        data_frame = graph_df, 
        x = 'Production [tonnes]', 
        y = 'CO2 Emissions [gigagrams]', 
        trendline = 'ols', 
        trendline_color_override = '#fd8585',
        trendline_scope = 'overall', 
        hover_name = graph_df.index, 
        color = graph_df.index, 
        title = title);
    fig.show()

In [13]:
regplot_emissions(df, 'Rice, paddy', año = 2016)

In [14]:
def emissions_barplot(df, paises, alimento):
    elements = ['Emissions (CO2eq)','Import Transport Emissions Quantity', 'Import Production Emissions Quantity']
    graph_df = df[(df['Area'].isin(paises)) & (df['Element'].isin(elements)) & (df['Item'] == alimento)]

    fig = px.bar(graph_df, x="Area", y="Value", color="Element", animation_frame="Year", range_y = [0, 560000])
    fig.show()

In [15]:
emissions_barplot(df, ['Spain', 'Italy'], 'Potatoes')

In [16]:
df[(df['Area'] == 'Italy') & (df['Element'] == 'Import Production Emissions Quantity')]['Value'].sum()

266640123.94919893

In [31]:
def mapa_intercambios(df, pais, alimento, tipo_intercambio = 'import', año = None, incluir_produccion = False):
    
    elementos = ['Import Quantity']
    if tipo_intercambio == 'export':
        elementos = ['Export Quantity']

    if incluir_produccion:
        elementos.append('Production')

    graph_df = df[(df['Area'] == pais) & (df['Item'] == alimento) & (df['Element'].isin(elementos))].copy()
    graph_df.drop(graph_df[(graph_df['Area'] == graph_df['Partner Countries']) & (graph_df['Element'] == elementos[0])].index, inplace = True)
    graph_df.drop(graph_df[graph_df['Value'] == 0].index, inplace = True)

    if año:
        graph_df = graph_df[graph_df['Year'] == año]
    else:
        graph_df = graph_df.groupby(['Area', 'Partner Countries', 'Item', 'Element']).agg({'Unit': lambda x: x.mode()[0], 'Value': 'sum'}, axis = 1).reset_index()
    
    title = f'{alimento} {tipo_intercambio} quantity in {pais} during {año}' if año else f'{alimento} {tipo_intercambio} quantity in {pais} during {df["Year"].min()}-{df["Year"].max()}'
    
    fig = px.choropleth(graph_df, locations = 'Partner Countries', locationmode = 'country names', color = 'Value', color_continuous_scale = px.colors.sequential.YlOrBr, title = title, projection = 'equirectangular', labels = {'Value': 'Tonnes'})
    
    color_pais = 'Black'
    fig.add_traces(go.Choropleth(locations=[pais],
                            locationmode = 'country names',
                            z = [1],
                            colorscale = [[0, color_pais],[1, color_pais]],
                            colorbar=None,
                            showscale = False))

    fig.update_layout(
        autosize=False,
        margin = dict(l=10, r=5, b=10, t=45, pad=4, autoexpand=True),
        width=900)
    fig.show()

In [18]:
mapa_intercambios(df, 'China, mainland', 'Rice, paddy', tipo_intercambio = 'export', incluir_produccion = False)

In [29]:
def mapa_emisiones_import(df, pais, alimento, tipo_emisiones = 'total', año = None):
    
    elementos = ['Import Production Emissions Quantity', 'Import Transport Emissions Quantity']

    graph_df = df[(df['Area'] == pais) & (df['Item'] == alimento)].copy()

    if tipo_emisiones == 'production':
        graph_df = graph_df[graph_df['Element'] == elementos[0]]
    
    elif tipo_emisiones == 'transport':
        graph_df = graph_df[graph_df['Element'] == elementos[1]]
    
    elif tipo_emisiones == 'total':
        graph_df = graph_df[graph_df['Element'].isin(elementos)]

    graph_df.drop(graph_df[graph_df['Value'] == 0].index, inplace = True)

    if año:
        graph_df = graph_df[graph_df['Year'] == año]
    else:
        graph_df = graph_df.groupby(['Area', 'Partner Countries', 'Item', 'Element']).agg({'Unit': lambda x: x.mode()[0], 'Value': 'sum'}, axis = 1).reset_index()
    
    if tipo_emisiones == 'total':
        graph_df = graph_df.groupby(['Area', 'Partner Countries', 'Item']).agg({'Unit': lambda x: x.mode()[0], 'Value': 'sum'}, axis = 1).reset_index()
    
    title = f'{alimento} {tipo_emisiones} emissions quantity due to importations by {pais} during {año}' if año else f'{alimento} {tipo_emisiones} emissions quantity due to importations by {pais} during {df["Year"].min()}-{df["Year"].max()}'
    
    fig = px.choropleth(graph_df, locations = 'Partner Countries', locationmode = 'country names', color = 'Value', color_continuous_scale = px.colors.sequential.YlOrBr, title = title, projection = 'equirectangular', labels = {'Value': 'Tonnes CO2'})
    
    color_pais = 'Black'
    fig.add_traces(go.Choropleth(locations=[pais],
                            locationmode = 'country names',
                            z = [1],
                            colorscale = [[0, color_pais],[1, color_pais]],
                            colorbar=None,
                            showscale = False))

    fig.update_layout(
        autosize=False,
        margin = dict(l=10, r=5, b=10, t=35, pad=4, autoexpand=True),
        width=900)
    fig.show()

In [30]:
mapa_emisiones_import(df, 'Canada', 'Potatoes', tipo_emisiones = 'total', año = None)

In [27]:
def mapa_general(df, alimento, valor = 'emissions', año = None, ratio_pop = False):
    
    if valor == 'emissions':
        elemento = 'Emissions (CO2eq)'
    elif valor == 'production':
        elemento = 'Production'

    graph_df = df[(df['Item'] == alimento) & (df['Element'] == elemento)].copy()
    graph_df.drop(graph_df[graph_df['Value'] == 0].index, inplace = True)

    if año:
        graph_df = graph_df[graph_df['Year'] == año]
    else:
        graph_df = graph_df.groupby(['Area', 'Partner Countries', 'Item', 'Element']).agg({'Unit': lambda x: x.mode()[0], 'Value': 'sum'}, axis = 1).reset_index()
    
    if ratio_pop:

        pop_df = df[df['Element'] == 'Total Population - Both sexes']
        
        if año:
            pop_df = pop_df[pop_df['Year'] == año]
        else:
            pop_df = pop_df.groupby(['Area']).agg({'Unit': lambda x: x.mode()[0], 'Value': 'mean'}, axis = 1).reset_index()
        
        pop_df.set_index('Area', inplace = True)

        factor = 1 if valor == 'production' else 1000

        graph_df.loc[:, 'Value'] = graph_df.apply(lambda x: x['Value'] * factor / pop_df.loc[x['Area'], 'Value'], axis = 1)
    
    title = f'{alimento} {valor} quantity in the world during {año}' if año else f'{alimento} {valor} quantity in the world during {df["Year"].min()}-{df["Year"].max()}'

    labels = {'Value': 'Tfood / 1000p'} if valor == 'production' else {'Value': 'TCO2 / 1000p'}
    
    fig = px.choropleth(graph_df, locations = 'Area', locationmode = 'country names', color = 'Value', color_continuous_scale = px.colors.sequential.YlOrBr, title = title, projection = 'equirectangular', labels = labels)

    fig.update_layout(
        autosize=False,
        margin = dict(l=10, r=5, b=10, t=45, pad=4, autoexpand=True),
        width=900)
    fig.show()

In [32]:
mapa_general(df, 'Rice, paddy', valor = 'emissions', ratio_pop = True)