# Create multliayer Food groups 
It is a copy of Multilayer food_groups.ipynb but that I am going to mess-up to tranform into a code that I can apply to make trajectories of a country across the years

In [34]:
import pandas as pd
import numpy as np
import time
import networkx as nx
import geopandas as gpd # pip installed
import matplotlib.pyplot as plt 
import pickle

In [103]:
def Edge_overlap_w(data,weight):
    """Calculate the weighted edge overlap for each 'origin_country_ISO' in a DataFrame.

    In weighted multilayer networks the edge overlap is the sum of all the weights of all out-links in each node.  

    Parameters:
    - data (pd.DataFrame): Input DataFrame containing at least the 'origin_country_ISO' and 'value' columns.

    Returns:
    pd.DataFrame: DataFrame with edge overlap calculated for each 'origin_country_ISO'.
    The output DataFrame has one column named 'overlap' representing the calculated edge overlap weights.

    Example:
    >>> df_result = Edge_overlap_w(input_data)
    """
    if weight==True:
        overlap_w= data.groupby(['origin_country_ISO']).apply(lambda group : group.value.sum())
    else:
        overlap_w= data.groupby(['origin_country_ISO']).apply(lambda group : len(group))

    overlap_w= pd.DataFrame(overlap_w.sort_values(ascending=False), columns=['o_i'])
    return overlap_w

def Node_strength(data, group_class):
    """ Calculate the node relevance in each layer of the multilayer network. 
    In a weighted directed network the country strenght is the sum of the magnitude exported by that country in that layer.

    Parameters:
    - data (pd.DataFrame): Input DataFrame containing trade data for a single year. The dataframe must contain 
    the columns: 'item', 'unit', 'origin_country_ISO', and 'value'.

    Returns:
    pd.DataFrame: DataFrame with country strength for each combination of 'item' and 'unit', sorted from highest to lowest. 
    Columns include 'item', 'unit', 'origin_country_ISO', and the calculated 'value' for node strength.

    Example:
    >>> df_result = Node_strength_w(input_data)
    """
    
    if (group_class==True):
        str_i_l = data.groupby([group_class,'unit']).apply(lambda group: group.loc[:,['origin_country_ISO','value']].
                                                        pivot_table(index='origin_country_ISO', aggfunc='sum').
                                                        sort_values(by = 'value',ascending=False))
        strength_i_l= str_i_l.reset_index(level=[group_class,'unit','origin_country_ISO']).rename(columns={'value':'str_i_l'})

    else:
        str_i_l = data.groupby([group_class,'unit']).apply(lambda group: group.loc[:,['origin_country_ISO','value']].
                                                        pivot_table(index='origin_country_ISO', aggfunc=lambda x: len(x.unique())).
                                                        sort_values(by = 'value',ascending=False))
        strength_i_l= str_i_l.reset_index(level=[group_class,'unit','origin_country_ISO']).rename(columns={'value':'str_i_l'})

    return strength_i_l

def Participation_coeff (data, overlap, group_class,direction, weight=True):

    if weight == True:
        w_f = '_w'
        flag_overlap = 'overl'
    else:
        w_f = ''
        flag_overlap = 'deg'

    #Define elements for part_coeff
    L = len(data[group_class].unique())
    
    s_i_l = Node_strength(data,group_class)

    # Prepare data
    #data_for_Pc = pd.merge(s_i_l, o_i, left_on= 'origin_country_ISO',right_index=True, how='left')
    data_for_Pc = pd.merge(s_i_l, overlap.loc[:,['country',direction+'_'+flag_overlap]], left_on= 'origin_country_ISO',right_on='country', how='left')

    # Contibution of each layer to total exports:
    data_for_Pc['sum_layers'] = (data_for_Pc['str_i_l']/data_for_Pc[direction+'_'+flag_overlap])**2
    
    sum_layers= data_for_Pc.groupby(['origin_country_ISO']).apply(lambda group: group.sum_layers.sum())

    particip_coeff = (pd.DataFrame((L/(L-1))*(1- sum_layers),columns=['pc'+w_f+'_'+direction]).
                      sort_values(by='pc'+w_f+'_'+direction,ascending=False))
    return particip_coeff 


def Agregated_network (edge_data,node_info, group_class):
    
    # General network 
    pos_dict = node_info.loc[:,['ISO','pos','name']].set_index('ISO').to_dict(orient='index')
    pos_dict

    edge_dict =dict(zip(zip(edge_data['origin_country_ISO'], edge_data['destin_country_ISO'], edge_data[group_class]), edge_data['value']))
    edge_dict

    # Create network
    G=nx.MultiDiGraph()

    G.add_nodes_from(pos_dict)
    nx.set_node_attributes(G,pos_dict)

    G.add_edges_from(edge_dict.keys())
    nx.set_edge_attributes(G, edge_dict, 'weight')
    nx.set_edge_attributes(G, edge_dict.keys(), '<attribute_name>')
    return G
        

def Make_dict_years (data_in, country_metadata, group_class, year_check):
    """ 
    Iterate everything to filter data per year, get network and network stats. 
    The output should be a the dataframe with the value of overlap and zscore for each country the year being explored. 

    This will be saved in a dictionary. 
    """

    print(year_check)
    
    # Load data 
    data_filt = data_in.loc[(data_in.unit =='1000 US$') & (data_in.year == year_check) & (data_in.value > 0) ,:].copy()
    # Create agregated multilayer network 
    G = Agregated_network(data_filt, country_metadata, group_class)

    def Extract_metrics (G,direction):

        # Degree estimations and overlap 
        if direction == 'in':
            degree= pd.DataFrame([i for i in G.in_degree()],columns=['country','in_deg']).sort_values(by='in_deg',ascending=False)
            overlap= pd.DataFrame([i for i in G.in_degree(weight='weight')],columns=['country','in_overl']).sort_values(by='in_overl',ascending=False)
        else:
            degree= pd.DataFrame([i for i in G.out_degree()],columns=['country','out_deg']).sort_values(by='out_deg',ascending=False)
            overlap= pd.DataFrame([i for i in G.out_degree(weight='weight')],columns=['country','out_overl']).sort_values(by='out_overl',ascending=False)
        
        metrics = pd.merge(degree,overlap, on='country', copy=False)
        return metrics
    
    def Z_score(metrics, direction):
        metrics['z_'+direction+'_deg'] = (metrics[direction +'_deg'] - metrics[direction +'_deg'].mean())/metrics[direction +'_deg'].std()
        metrics['z_'+direction+'_overl'] = (metrics[direction +'_overl'] - metrics[direction +'_overl'].mean())/metrics[direction +'_overl'].std()
        return metrics

    # Participation coefficient: exports
    def Particip_coeff_direction(data_filt,metrics_join, direction):
        partic_coeff_out = Participation_coeff(data_filt, metrics_join, group_class, direction, weight= False)
        partic_coeff_w_out = Participation_coeff(data_filt, metrics_join, group_class, direction, weight= True)
        particip_coeff = pd.merge(partic_coeff_out, partic_coeff_w_out, left_index=True, right_index=True, how='left')
        return particip_coeff


    # Extract metrics
    metrics_in = Extract_metrics(G, direction='in')
    metrics_out = Extract_metrics(G, direction='out')

    metrics_join= pd.merge(metrics_in,metrics_out, on='country', copy=False)

    metrics_join = Z_score(metrics_join, direction='in')
    metrics_join = Z_score(metrics_join, direction='out')
    
    #metrics_join= pd.merge(metrics_join,z_score_in, on='country', copy=False)
    #metrics_join= pd.merge(metrics_join,z_score_out, on='country', copy=False)
    
    partic_coeff_in = Particip_coeff_direction(data_filt,metrics_join,direction='in')
    partic_coeff_out = Particip_coeff_direction(data_filt,metrics_join,direction='out')

    # Merge participation and degree dataframes:
    deg_particip = pd.merge(metrics_join, partic_coeff_in, right_index=True, left_on='country',how='left')
    deg_particip = pd.merge(deg_particip, partic_coeff_out, left_on='country',right_index=True,how='left',copy=False)
    print(deg_particip)

    
    # deg_particip = pd.merge(deg_particip, partic_coeff_w, left_on='country',right_index=True,how='left',copy=False)
    return deg_particip

# need to do dynamic
def Country_map_static (dict_results,country_list ='All', year_list= 'All', direction='out', weight= True, plt_flag=True):
    
    # Define values for 'All' flags: country
    if country_list == 'All':
        country_list = list(dict_results[list(dict_results.keys())[0]].country)
        title_country = 'All countries'
    else: 
        title_country  = country_list
    
    # Define values for 'All' flags: year
    if year_list == 'All':
        year_list = list(dict_results.keys())
        title_year  = 'All years'
    else: 
        title_year  = year_list[0]
    
    # Define values for 'All' flags: year
    if weight == True:
        w_f = '_w'
        overl_flag = 'overl'
    else: 
        w_f = ''
        overl_flag = 'deg'
    
    if direction == 'out':
        flag_dir = 'exports'
    else: 
        flag_dir = 'imports'

    # Define figure characteristics 
    if plt_flag == True :
        maxY_plot = max([max(dict_results[y].z_out_overl) for y in year_list])#list years before
        minY_plot = min([min(dict_results[y].z_out_overl) for y in year_list])#year_list

        fig_phase, ax = plt.subplots(figsize = (6,5))
        ax.axvline(x=1/3, color='grey', linestyle= '--', zorder =1)
        ax.axvline(x=2/3, color='grey', linestyle= '--', zorder = 1)
        ax.axhline(y=2, color='grey', linestyle= '--', zorder = 1)
        ax.set_xlim (0,1)
        ax.set_xlim (0,1)
        ax.set_ylim((minY_plot-1,maxY_plot+1))

        #
        ax.set_ylabel('Z-score weighted overlap')
        ax.set_xlabel('Participation coefficient (p)')
        #ax.set_title ('Map with weighted overlap: '+ str(title_country) +' countries' +' & '+ str(title_year))
        ax.set_title ('Relevance in global food trade '+flag_dir+': '+ str(title_country) +' & '+ str(title_year))

    #Get coordinates of each country & map them (if plot true)
    coords_dict = dict()
    for country in country_list:
        coords = pd.DataFrame()
        for y in year_list:
            to_add = dict_results[y].loc[(dict_results[y]['country']==country),('pc_'+w_f+'_'+direction,'z_'+direction+'_'+overl_flag)] 
            coords = pd.concat([coords,to_add])
        coords.index = year_list

        coords_dict[country] = coords
        if (plt_flag ==True):
            # Plot scatter with trajectories.
            ax.scatter(coords.pc_w_out.iloc[2:-2], coords.z_out_overl.iloc[2:-2],zorder = 2,s=20,label= coords.index)
            ax.plot(coords.pc_w_out, coords.z_out_overl,zorder = 2,label= coords.index)
            ax.scatter(coords.pc_w_out.iloc[0], coords.z_out_overl.iloc[0],zorder = 2,label= coords.index[0],color = 'black')
            ax.scatter(coords.pc_w_out.iloc[-1], coords.z_out_overl.iloc[-1],zorder = 2,label= coords.index[-1],color = 'steelblue')
            #ax.annotate(country,(coords.pc_w_out.iloc[-1], coords.z_out_overl.iloc[-1]),zorder = 2,label= coords.index[-1],color = 'steelblue') # get country name

    #
    fig_phase.savefig('../Plots/map_countries_export.pdf',format ='pdf',dpi=300)   
    plt.show()
    print(country)
    return coords_dict


In [93]:
# Parameter definition
direction = 'in'
group_class = 'Food_group' # item
weight= True 

parameters = {'direction': direction ,'group_class': group_class, 'weight': weight}

#data_og = pd.read_pickle('../Data/Data_year_groups_12.pkl')

country_metadata = pd.read_pickle('../Data/Country_info.pkl')

data_og = pd.read_pickle('../Data/Data_food_groups.pkl')


In [94]:
parameters['group_class']

'Food_group'

Iterate estimation for all countries and years. It is saved as a dictionary with a single pandas datafarame containing all outputs. 

In [104]:
list_years = data_og.year.unique()
'''
# Save data per year:
dict_results = dict()
for i in list_years: 
    dict_aux = Make_dict_years(data_og, country_metadata, parameters['group_class'], i)
    dict_results[i] = dict_aux.copy()
'''
dict_results=dict()
dict_results = dict((i, Make_dict_years(data_og,country_metadata, parameters['group_class'], i)) for i in list_years)
dict_results

w_flag = ('w' if weight == True else '')
# Save data of country overlap. 
#pickle.dump(dict_results, open('../Data/Country_stats_'+parameters['group_class']+'.pkl', 'wb'))


1986
    country  in_deg    in_overl  out_deg   out_overl  z_in_deg  z_in_overl  \
0        US    1256  20510581.0     1696  20059598.0  4.734288    8.574072   
1        GB    1115  11945483.0     1860   6087896.0  4.106796    4.863597   
2        FR    1052  10359669.0     1922  15024671.0  3.826426    4.176609   
3        NL    1052   8580858.0     1919   9988993.0  3.826426    3.406012   
4     BE-LU     899   6119419.0     1584   6904595.0  3.145530    2.339696   
..      ...     ...         ...      ...         ...       ...         ...   
214      AZ       0         0.0        0         0.0 -0.855291   -0.311290   
215      FM       0         0.0        0         0.0 -0.855291   -0.311290   
216   UM-71       0         0.0        0         0.0 -0.855291   -0.311290   
217   UM-67       0         0.0        0         0.0 -0.855291   -0.311290   
218      GS       0         0.0        0         0.0 -0.855291   -0.311290   

     z_out_deg  z_out_overl     pc_in  pc_w_in    pc_out  

In [102]:
dict_results[1986]

Unnamed: 0,country,in_deg,in_overl,out_deg,out_overl,z_in_deg,z_in_overl,z_out_deg,z_out_overl,pc_out,pc_w_out
0,US,1256,20510581.0,1696,20059598.0,4.734288,8.574072,4.271017,9.132746,0.996626,1.05
1,GB,1115,11945483.0,1860,6087896.0,4.106796,4.863597,4.736797,2.535372,1.006174,1.05
2,FR,1052,10359669.0,1922,15024671.0,3.826426,4.176609,4.912885,6.755276,1.003967,1.05
3,NL,1052,8580858.0,1919,9988993.0,3.826426,3.406012,4.904365,4.377452,1.002282,1.05
4,BE-LU,899,6119419.0,1584,6904595.0,3.145530,2.339696,3.952923,2.921014,1.002913,1.05
...,...,...,...,...,...,...,...,...,...,...,...
214,AZ,0,0.0,0,0.0,-0.855291,-0.311290,-0.545836,-0.339305,,
215,FM,0,0.0,0,0.0,-0.855291,-0.311290,-0.545836,-0.339305,,
216,UM-71,0,0.0,0,0.0,-0.855291,-0.311290,-0.545836,-0.339305,,
217,UM-67,0,0.0,0,0.0,-0.855291,-0.311290,-0.545836,-0.339305,,


In [30]:
# Coords dict 
coords_dict = Country_map_static(dict_results,country_list='All',year_list=[2019], direction = 'out',weight=True, plt_flag = True)

#pickle.dump(coords_dict, open('../Data/country_coords.pkl', 'wb'))

AttributeError: 'DataFrame' object has no attribute 'z_out_overl'

In [32]:
dict_results[1986]

Unnamed: 0,country,out_overl,in_overl,out_deg,in_deg,z_in_deg,z_in_overl,pc_in,pc_w_in
0,US,20059598.0,20510581.0,1696,1256,4.734288,8.574072,0.952681,0.831093
1,FR,15024671.0,10359669.0,1922,1052,3.826426,4.176609,0.896347,0.710356
2,NL,9988993.0,8580858.0,1919,1052,3.826426,3.406012,0.891219,0.853678
3,BE-LU,6904595.0,6119419.0,1584,899,3.145530,2.339696,0.903817,0.846971
4,GB,6087896.0,11945483.0,1860,1115,4.106796,4.863597,0.928041,1.011601
...,...,...,...,...,...,...,...,...,...
214,BA,0.0,0.0,0,0,-0.855291,-0.311290,,
215,CT,0.0,0.0,0,0,-0.855291,-0.311290,,
216,SI,0.0,0.0,0,0,-0.855291,-0.311290,,
217,FM,0.0,0.0,0,0,-0.855291,-0.311290,,


In [None]:

# Example show area
coords =dict_results[2019]
coords.loc[(coords.pc_w_out>=2/3) & (coords.z_out_overl>=2),:]
