In [35]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go

plt.style.use('ggplot')

In [3]:
os.chdir('dataframes_pkl')

In [4]:
df = pd.read_pickle('df_global_format.pkl')

In [5]:
df.head()

Unnamed: 0,Area Code,Area,Partner Country Code,Partner Countries,Item Code,Item,Element Code,Element,Year,Unit,Value
0,2,Afghanistan,4,Algeria,230,"Cashew nuts, shelled",5910,Export Quantity,2016,tonnes,3.0
1,2,Afghanistan,4,Algeria,230,"Cashew nuts, shelled",5922,Export Value,2016,1000 US$,23.0
2,2,Afghanistan,9,Argentina,56,Maize,5610,Import Quantity,2017,tonnes,130.0
3,2,Afghanistan,9,Argentina,56,Maize,5622,Import Value,2017,1000 US$,68.0
4,2,Afghanistan,9,Argentina,1058,"Meat, chicken",5610,Import Quantity,2014,tonnes,974.0


In [6]:
df['Element'].unique()

array(['Export Quantity', 'Export Value', 'Import Quantity',
       'Import Value', 'Production', 'Loss', 'Processed',
       'Food supply quantity (tonnes)', 'Feed', 'Seed',
       'Other uses (non-food)', 'Tourist consumption',
       'Emissions intensity', 'Emissions (CO2eq)',
       'Total Population - Both sexes', 'Rural population',
       'Urban population', 'Export Transport Emissions Quantity',
       'Import Transport Emissions Quantity',
       'Import Production Emissions Quantity'], dtype=object)

In [7]:
df['Item'].unique()

array(['Cashew nuts, shelled', 'Maize', 'Meat, chicken',
       'Milk, whole dried', 'Chick peas',
       'Meat, cattle, boneless (beef & veal)', 'Meat, sheep', 'Nuts nes',
       'Nuts, prepared (exc. groundnuts)', 'Cheese, processed',
       'Cheese, whole cow milk', 'Chocolate products nes',
       'Lemons and limes', 'Walnuts, shelled',
       'Buttermilk, curdled, acidified milk', 'Milk, whole condensed',
       'Milk, whole fresh cow', 'Rice, paddy', 'Brazil nuts, shelled',
       'Cashew nuts, with shell', 'Grapes', 'Groundnuts, shelled',
       'Milk, whole evaporated', 'Wine', 'Apples',
       'Cabbages and other brassicas', 'Oranges', 'Strawberries',
       'Tomatoes', 'Coffee, roasted', 'Eggs, hen, in shell',
       'Groundnuts, prepared', 'Meat, beef, preparations', 'Meat, pig',
       'Peas, green', 'Bananas', 'Milk, skimmed dried', 'Rice, milled',
       'Soybeans', 'Sweet potatoes', 'Coffee, green', 'Butter, cow milk',
       'Oats', 'Potatoes', 'Coffee, extracts',
     

In [33]:
def reparto_suministros(df, pais, alimento, año = None, ratio_pop = False):
    elements = ['Production', 'Import Quantity', 'Export Quantity']

    graph_df = df[(df['Area'] == pais) & (df['Partner Countries'] == pais) & (df['Item'] == alimento) & (df['Element'].isin(elements))]
    
    if año:
        graph_df = graph_df[graph_df['Year'] == año]
    else:
        graph_df = graph_df.groupby(['Area', 'Item', 'Element']).agg({'Unit': lambda x: x.mode()[0], 'Value': 'sum'}, axis = 1).reset_index()
    
    if ratio_pop:

        pop_df = df[(df['Element'] == 'Total Population - Both sexes') & (df['Area'] == pais)]
        
        if año:
            pop_df = pop_df[pop_df['Year'] == año]
        else:
            pop_df = pop_df.groupby(['Area', 'Element']).agg({'Unit': lambda x: x.mode()[0], 'Value': 'mean'}, axis = 1).reset_index()
        
        population = pop_df.loc[pop_df.index[0], 'Value']

        graph_df.loc[:, 'Value'] = graph_df['Value'].apply(lambda x: x / population)

    title = f'{alimento} supply distribution in {pais} during {año}' if año else f'{alimento} supply distribution in {pais} during {df["Year"].min()}-{df["Year"].max()}'
    fig = px.pie(data_frame = graph_df, names = 'Element', values = 'Value', title = title)
    fig.update_traces(textposition = 'inside', textinfo = 'percent+label')
    fig.show()


In [34]:
reparto_suministros(df, 'Spain', 'Flour, cereals', ratio_pop = True)

In [50]:
def sankey(df, pais, alimento, año = None):

    graph_df = df[df['Area'] == df['Partner Countries']]

    elements = ['Export Quantity', 'Import Quantity', 'Production', 'Loss', 'Processed', \
        'Food supply quantity (tonnes)', 'Feed', 'Seed','Other uses (non-food)']

    graph_df = graph_df[(graph_df['Area'] == pais) & (graph_df['Item'] == alimento) & (graph_df['Element'].isin(elements))]

    if año:
        graph_df = graph_df[graph_df['Year'] == año]
    else:
        graph_df = graph_df.groupby(['Area', 'Item', 'Element']).agg({'Unit': lambda x: x.mode()[0], 'Value': 'sum'}, axis = 1).reset_index()
    
    sources = ['Import Quantity', 'Production']

    targets = [element for element in elements if element not in sources]

    labels = list(graph_df['Element'].unique()) + ['Supply']
    color = 'blue'

    graph_df['Source'] = graph_df['Element'].apply(lambda x: labels.index(x) if x in sources else labels.index('Supply'))
    graph_df['Target'] = graph_df['Element'].apply(lambda x: labels.index(x) if x in targets else labels.index('Supply'))
    
    fig = go.Figure(data=[go.Sankey(
    valueformat = ".0f",
    valuesuffix = "Tonnes",
    # Define nodes
    node = dict(
        pad = 15,
        thickness = 15,
        line = dict(color = "black", width = 0.5),
        label =  labels,
        color =  color
    ),
        link = dict(
        source =  graph_df['Source'],
        target =  graph_df['Target'],
        value =  graph_df['Value'],
    ))])
    fig.show()

In [52]:
sankey(df, 'Spain', 'Flour, cereals')