# Create multliayer products 

In [1]:
import pandas as pd
import numpy as np
import time
import networkx as nx
import geopandas as gpd # pip installed
import matplotlib.pyplot as plt 

In [34]:
def Edge_overlap_w(data):
    """Calculate the weighted edge overlap for each 'origin_country_ISO' in a DataFrame.

    In weighted multilayer networks the edge overlap is the sum of all the weights of all out-links in each node.  

    Parameters:
    - data (pd.DataFrame): Input DataFrame containing at least the 'origin_country_ISO' and 'value' columns.

    Returns:
    pd.DataFrame: DataFrame with edge overlap calculated for each 'origin_country_ISO'.
    The output DataFrame has one column named 'overlap' representing the calculated edge overlap weights.

    Example:
    >>> df_result = Edge_overlap_w(input_data)
    """

    overlap_w= data.groupby(['origin_country_ISO']).apply(lambda group : len(group.value))

    overlap_w= pd.DataFrame(overlap_w.sort_values(ascending=False),columns=['o_i'])
    return overlap_w

def Node_strength_w(data):
    """ Calculate the node relevance in each layer of the multilayer network. 
    In a weighted directed network the country strenght is the sum of the magnitude exported by that country in that layer.

    Parameters:
    - data (pd.DataFrame): Input DataFrame containing trade data for a single year. The dataframe must contain 
    the columns: 'item', 'unit', 'origin_country_ISO', and 'value'.

    Returns:
    pd.DataFrame: DataFrame with country strength for each combination of 'item' and 'unit', sorted from highest to lowest. 
    Columns include 'item', 'unit', 'origin_country_ISO', and the calculated 'value' for node strength.

    Example:
    >>> df_result = Node_strength_w(input_data)
    """

    str_i_l = data.groupby(['item','unit']).apply(lambda group: group.loc[:,['origin_country_ISO','value']].
                                                        pivot_table(index='origin_country_ISO', aggfunc='sum').
                                                        sort_values(by = 'value',ascending=False))
    # Reset multi-index:
    strength_i_l= str_i_l.reset_index(level=['item','unit','origin_country_ISO']).rename(columns={'value':'str_i_l'})

    return strength_i_l    

def Node_strength(data, weight = True):
    """ Calculate the node relevance in each layer of the multilayer network. 
    In a weighted directed network the country strenght is the sum of the magnitude exported by that country in that layer.

    Parameters:
    - data (pd.DataFrame): Input DataFrame containing trade data for a single year. The dataframe must contain 
    the columns: 'item', 'unit', 'origin_country_ISO', and 'value'.

    Returns:
    pd.DataFrame: DataFrame with country strength for each combination of 'item' and 'unit', sorted from highest to lowest. 
    Columns include 'item', 'unit', 'origin_country_ISO', and the calculated 'value' for node strength.

    Example:
    >>> df_result = Node_strength_w(input_data)
    """
    
    if (weight==True):
        str_i_l = data.groupby(['item','unit']).apply(lambda group: group.loc[:,['origin_country_ISO','value']].
                                                        pivot_table(index='origin_country_ISO', aggfunc='sum').
                                                        sort_values(by = 'value',ascending=False))
        strength_i_l= str_i_l.reset_index(level=['item','unit','origin_country_ISO']).rename(columns={'value':'str_i_l'})

    else:
        str_i_l = data.groupby(['item','unit']).apply(lambda group: group.loc[:,['origin_country_ISO','value']].
                                                        pivot_table(index='origin_country_ISO', aggfunc=lambda x: len(x.unique())).
                                                        sort_values(by = 'value',ascending=False))
        strength_i_l= str_i_l.reset_index(level=['item','unit','origin_country_ISO']).rename(columns={'value':'str_i_l'})

    return strength_i_l
    
def Participation_coeff (data, overlap, direction, weight=True):

    if (weight == True):
        w_f = '_w'
        flag_overlap = 'overlap'
    else:
        w_f = ''
        flag_overlap = 'deg'

    #Define elements for part_coeff
    L = len(data.item.unique())
    
    s_i_l = Node_strength(data,weight)

    # Prepare data
    #data_for_Pc = pd.merge(s_i_l, o_i, left_on= 'origin_country_ISO',right_index=True, how='left')
    data_for_Pc = pd.merge(s_i_l, overlap.loc[:,['country',direction+'_'+flag_overlap]], left_on= 'origin_country_ISO',right_on='country', how='left')

    # Contibution of each layer to total exports:
    data_for_Pc['sum_layers'] = (data_for_Pc['str_i_l']/data_for_Pc[direction+'_'+flag_overlap])**2
    
    sum_layers= data_for_Pc.groupby(['origin_country_ISO']).apply(lambda group: group.sum_layers.sum())

    particip_coeff = (pd.DataFrame((L/(L-1))*(1- sum_layers),columns=['pc'+w_f+'_'+direction]).
                      sort_values(by='pc'+w_f+'_'+direction,ascending=False))
    return particip_coeff 

def Agregated_network (edge_data,node_info):

    # General network 
    pos_dict = node_info.loc[:,['ISO','pos','name']].set_index('ISO').to_dict(orient='index')
    pos_dict

    edge_dict =dict(zip(zip(edge_data['origin_country_ISO'], edge_data['destin_country_ISO'], edge_data['item']), edge_data['value']))
    edge_dict

    # Create network
    G=nx.MultiDiGraph()

    G.add_nodes_from(pos_dict)
    nx.set_node_attributes(G,pos_dict)

    G.add_edges_from(edge_dict.keys())
    nx.set_edge_attributes(G, edge_dict, 'weight')
    nx.set_edge_attributes(G, edge_dict.keys(), '<attribute_name>')
    return G
        

TypeError: MultiGraph.get_edge_data() missing 2 required positional arguments: 'u' and 'v'

In [39]:
str_i_l = data.groupby(['item','unit']).apply(lambda group: group.loc[:,['origin_country_ISO','value']].
                                                        pivot_table(index='origin_country_ISO', aggfunc=lambda x: len(x.unique())).
                                                        sort_values(by = 'value',ascending=False))
strength_i_l= str_i_l.reset_index(level=['item','unit','origin_country_ISO']).rename(columns={'value':'str_i_l'})
data_for_Pc = pd.merge(strength_i_l, overlap.loc[:,['country','out_deg']], left_on= 'origin_country_ISO',right_on='country', how='left')
data_for_Pc

Unnamed: 0,item,unit,origin_country_ISO,str_i_l,country,out_deg
0,"Almonds, in shell",1000 US$,US,102,US,25230
1,"Almonds, in shell",1000 US$,ES,48,ES,22742
2,"Almonds, in shell",1000 US$,AE,45,AE,14946
3,"Almonds, in shell",1000 US$,NL,34,NL,26074
4,"Almonds, in shell",1000 US$,AU,33,AU,10170
...,...,...,...,...,...,...
44634,hydrogenated oils and fats,1000 US$,AG,1,AG,249
44635,hydrogenated oils and fats,1000 US$,SL,1,SL,216
44636,hydrogenated oils and fats,1000 US$,KI,1,KI,21
44637,hydrogenated oils and fats,1000 US$,SO,1,SO,215


In [46]:
data.groupby(['origin_country_ISO']).apply(lambda group : len(group.value))


origin_country_ISO
AE    14946
AF     1259
AG      249
AL     1251
AM     1421
      ...  
WS      313
YE      607
ZA    11542
ZM     1003
ZW      913
Length: 198, dtype: int64

In [45]:
data.groupby('origin_country_ISO').apply(lambda group : len(group.destin_country_ISO))

origin_country_ISO
AE    14946
AF     1259
AG      249
AL     1251
AM     1421
      ...  
WS      313
YE      607
ZA    11542
ZM     1003
ZW      913
Length: 198, dtype: int64

In [4]:
year_check='2019-2021'
data = pd.read_pickle('../Data/Data_year_groups_12.pkl')
data = data.loc[(data.unit =='1000 US$') & (data.year == year_check) & (data.value > 0) ,:]

country_metadata = pd.read_pickle('../Data/Country_info.pkl')

Create single layer network with the total trade and find its degree distribution. 

In [20]:
# Create agregated multinetwork 
direction = 'out'
G = Agregated_network(data, country_metadata)

# Out-degree:not weighted
out_degree= pd.DataFrame([i for i in G.out_degree()],columns=['country','out_deg']).sort_values(by='out_deg',ascending=False)
in_degree= pd.DataFrame([i for i in G.in_degree()],columns=['country','in_deg']).sort_values(by='in_deg',ascending=False)

# Overlap: sum of weights (out_degree)
overlap= pd.DataFrame([i for i in G.out_degree(weight='weight')],columns=['country',direction+'_overlap']).sort_values(by=direction+'_overlap',ascending=False)

overlap= pd.merge(overlap,out_degree,on='country', copy=False)
overlap= pd.merge(overlap,in_degree,on='country', copy=False)

overlap3 = Edge_overlap_w(data)
overlap3

Unnamed: 0_level_0,o_i
origin_country_ISO,Unnamed: 1_level_1
NL,26074
FR,25704
US,25230
DE,23998
IT,22873
...,...
KI,21
NU,17
NR,14
FM,13


In [12]:
# out_degree= pd.DataFrame([i for i in G.out_degree(weight='weight')],columns=['country','out_deg'])
# out_degree
out_degree['z_out_deg'] = (out_degree.out_deg - out_degree.out_deg.mean())/out_degree.out_deg.std()

# Participation coefficient: 
#overlap = Edge_overlap(data)
overlap['z_overl'] = (overlap.out_overlap - overlap.out_overlap.mean()) / overlap.out_overlap.std()

partic_coeff_w = Participation_coeff(data,overlap,'out',weight=True)

partic_coeff = Participation_coeff(data,out_degree,'out',weight=False)

partic_coeff_all= pd.concat([partic_coeff,partic_coeff_w],axis = 1)

# Get participation and degree 
deg_particip = pd.merge(partic_coeff_all, overlap, left_index=True, right_on='country',how='left')
deg_particip = pd.merge(deg_particip, out_degree, on='country',how='left')
deg_particip

AttributeError: 'int' object has no attribute 'sort_values'

In [23]:
partic_coeff = Participation_coeff(data,out_degree,'out',weight=False)
partic_coeff

KeyError: 'Level origin_country_ISO not found'

In [None]:
fig_phase,ax = plt.subplots(figsize = (6,5))
ax.axvline(x=1/3, color='grey', linestyle='--',zorder =1)
ax.axvline(x=2/3, color='grey', linestyle='--',zorder = 1)
ax.axhline(y=2, color='grey', linestyle='--',zorder = 1)
ax.scatter(deg_particip.part_coeff, deg_particip.z_overl,zorder = 2,label= deg_particip.country)
ax.set_xlim (0,1)
ax.set_xlabel('Participation coefficient (p)')
ax.set_title ('Map with weighted overlap')

ax.set_ylabel('Z-score overlap')

# Multilayer- Hubs:
bool_multihub= (deg_particip.part_coeff>=0.6) & (deg_particip.z_overl>=2)
a = deg_particip.loc[bool_multihub,:].sort_values(by='z_overl',ascending=False)
a

In [None]:
fig_phase,ax = plt.subplots(figsize = (6,5))
ax.axvline(x=1/3, color='grey', linestyle='--',zorder =1)
ax.axvline(x=2/3, color='grey', linestyle='--',zorder = 1)
ax.scatter(deg_particip.partic_coeff, deg_particip.out_deg,zorder = 2)
ax.set_xlim (0,1)
ax.set_xlabel('Participation coefficient (p)')
ax.set_title ('Map with average degree')
ax.set_ylabel('Av.degree')

# Old codes

In [None]:
''' 
#Old Code

def Edge_overlap_w(data):
    """Calculate the weighted edge overlap for each 'origin_country_ISO' in a DataFrame.

    In weighted multilayer networks the edge overlap is the sum of all the weights of all out-links in each node.  

    Parameters:
    - data (pd.DataFrame): Input DataFrame containing at least the 'origin_country_ISO' and 'value' columns.

    Returns:
    pd.DataFrame: DataFrame with edge overlap calculated for each 'origin_country_ISO'.
    The output DataFrame has one column named 'overlap' representing the calculated edge overlap weights.

    Example:
    >>> df_result = Edge_overlap_w(input_data)
    """

    overlap_w= data.groupby(['origin_country_ISO']).apply(lambda group : group.value.sum())

    overlap_w= pd.DataFrame(overlap_w.sort_values(ascending=False),columns=['o_i'])
    return overlap_w

def Edge_overlap(data):
    """Calculate the (non-weighted) edge overlap for each 'origin_country_ISO' in a DataFrame.

    In weighted multilayer networks the edge overlap is the sum of all the weights of all out-links in each node.  

    Parameters:
    - data (pd.DataFrame): Input DataFrame containing at least the 'origin_country_ISO' and 'value' columns.

    Returns:
    pd.DataFrame: DataFrame with edge overlap calculated for each 'origin_country_ISO'.
    The output DataFrame has one column named 'overlap' representing the calculated edge overlap weights.

    Example:
    >>> df_result = Edge_overlap_w(input_data)
    """

    overlap= data.groupby(['origin_country_ISO']).apply(lambda group : len(group))

    overlap= pd.DataFrame(overlap.sort_values(ascending=False),columns=['o_i'])
    return overlap
'''

In [None]:
'''
# Overlap? Check for individual products. 
year= '2019-2021'# 2018
unit = '1000 US$'
products = ['Palm oil','Apples','Cane sugar, non-centrifugal']
key_wanted = [tuple([product, unit]) for product in products]
print(key_wanted[0])
data_g = data.groupby(['item','unit']) 
data_net=data_g.get_group(key_wanted[0]) #
data_net
str_layer=data_net.loc[:,['origin_country_ISO','value']].pivot_table(index='origin_country_ISO', aggfunc='sum').sort_values(by = 'value',ascending=False)


test_strength = data.groupby(['item','unit']).apply(lambda group: group.loc[:,['origin_country_ISO','value']].pivot_table(index='origin_country_ISO', aggfunc='sum').sort_values(by = 'value',ascending=False))
test_strength_og = test_strength.reset_index(level=['item','unit','origin_country_ISO'])

strength_all = test_strength_og.groupby(['origin_country_ISO']).apply(lambda group: group.loc[:,['origin_country_ISO','value']].pivot_table(index='origin_country_ISO', aggfunc='sum').sort_values(by = 'value',ascending=False))

merged_str = pd.merge(test_strength_og, str_overlap, left_on= 'origin_country_ISO',right_index=True, how='left')

# Layer per layer strength: 
merged_str['contribution'] = (merged_str['value']/merged_str['total_val'])**2

sum_particip = merged_str.groupby(['origin_country_ISO']).apply(lambda group: group.loc[:,'value'].pivot_table(index='origin_country_ISO', aggfunc='sum').sort_values(by = 'value',ascending=False))

sum_test= merged_str.groupby(['origin_country_ISO']).apply(lambda group: group.contribution.sum())
L= len(data.item.unique())
partic_coeff = (L/(L-1))*(1- sum_test)


print(merged_str)

data_g = data.groupby(['item','unit']) 
data_net=data_g.get_group(key_wanted) #

edges_multi_all


overlap= pd.DataFrame(np.zeros([len(country_metadata.ISO), len(country_metadata.ISO)]),
             columns= country_metadata['ISO'], index= country_metadata.ISO)
'''