In [4]:
#-------------------------------------------------------
# Importar paquetes a utilizar
#-------------------------------------------------------
import pandas as pd
import numpy as np 
import networkx as nx
import scipy.stats as stats



#-------------------------------------------------------
# Archivo CSV describiendo los nodos de cada red
#-------------------------------------------------------
ags_nd=pd.read_csv('Gephi_stats/Gephi AGS Stats.csv')             
caba_nd=pd.read_csv('Gephi_stats/Gephi CABA Stats.csv')           #capital
cdmx_nd=pd.read_csv('Gephi_stats/Gephi CDMX Stats.csv')           #capital
gdl_nd=pd.read_csv('Gephi_stats/Gephi GDL Stats.csv') 
hgo_nd=pd.read_csv('Gephi_stats/Gephi Hidalgo Stats.csv')
mad_nd=pd.read_csv('Gephi_stats/Gephi Madrid Stats.csv')          #capital
mtv_nd=pd.read_csv('Gephi_stats/Gephi Montevideo Stats.csv')      #capital
oax_nd=pd.read_csv('Gephi_stats/Gephi Oaxaca Stats.csv')
sao_nd=pd.read_csv('Gephi_stats/Gephi Sao Paulo Stats.csv')       #capital
scl_nd=pd.read_csv('Gephi_stats/Gephi SCL Stats.csv')             #capital


#-------------------------------------------------------
# Archivo CSV describiendo las aristas de cada red
#-------------------------------------------------------
ags_ed=pd.read_csv('Gephi_edges/Gephi AGS Edges.csv')
caba_ed=pd.read_csv('Gephi_edges/Gephi CABA Edges.csv')           #capital
cdmx_ed=pd.read_csv('Gephi_edges/Gephi CDMX Edges.csv')           #capital
gdl_ed=pd.read_csv('Gephi_edges/Gephi GDL Edges.csv')
hgo_ed=pd.read_csv('Gephi_edges/Gephi Hidalgo Edges.csv')
mad_ed=pd.read_csv('Gephi_edges/Gephi Madrid Edges.csv')          #capital
mtv_ed=pd.read_csv('Gephi_edges/Gephi Montevideo Edges.csv')      #capital
oax_ed=pd.read_csv('Gephi_edges/Gephi Oaxaca Edges.csv')
sao_ed=pd.read_csv('Gephi_edges/Gephi Sao Paulo Edges.csv')       #capital
scl_ed=pd.read_csv('Gephi_edges/Gephi SCL Edges.csv')             #capital

In [5]:
def armar_grafo(nodes,edges,rol_str,weight_str):
    '''
    Función con la cual, a partir de una lista de nodos y conexiones, forma un grafo con NetworkX
    
    In:
    - nodes       lista de nodos
    - edges       lista de aristas
    - rol_str     un nombre para el parámetro que describe el rol de un actor
    - weight_str  un nombre para el parámetro que describe el peso de las aristas
    
    Out
    Un objeto NetworkX llamado G.
    '''
    
    #crea un grafo dirigido a partirde la lista edges
    G=nx.from_pandas_edgelist(edges,'Source','Target',edge_attr=["Weight"],create_using=nx.DiGraph())
    
    #rol es un diccionario que manda cada id de un nodo a el atributo correspondiente a rol
    rol = {nid: nodes[nodes['Id']==nid][rol_str].values[0] for nid in nodes['Id']}
    nx.set_node_attributes(G,rol,'rol')
    
    #weight es un diccionario que manda cada id de un nodo a el atributo correspondiente al peso de nodo
    weight = {nid: nodes[nodes['Id']==nid][weight_str].values[0] for nid in nodes['Id']}
    nx.set_node_attributes(G,weight,'weight')
    
    return G

In [6]:
#--------------------------------------------------------------
#Armar grafos a partir de cada uno de los CSV que descargamos
#--------------------------------------------------------------

ags_G=armar_grafo(ags_nd,ags_ed,'role','weight')
caba_G=armar_grafo(caba_nd,caba_ed,'type','weight')
cdmx_G=armar_grafo(cdmx_nd,cdmx_ed,'rol estimado','weight')
gdl_G=armar_grafo(gdl_nd,gdl_ed,'type','weight')
hgo_G=armar_grafo(hgo_nd,hgo_ed,'type','weight')
mad_G=armar_grafo(mad_nd,mad_ed,'rol estimado','weight')
mtv_G=armar_grafo(mtv_nd,mtv_ed,'rol estimado','node size')
oax_G=armar_grafo(oax_nd,oax_ed,'rol','weight')
sao_G=armar_grafo(sao_nd,sao_ed,'rol estimado','weight')
scl_G=armar_grafo(scl_nd,scl_ed,'type','weight')


In [72]:
#---------------------------------------------------------------------------------------------------------------------
# Obtenemos una matriz cuya entrada (i,j) es la fracción de aristas que van de un eje con rol i a un eje con rol j
#---------------------------------------------------------------------------------------------------------------------

rol_map_esp={'Generador de conocimiento':0,'Habilitador':1,'Promotor':2,'Vinculador':3,'Articulador':4,'Comunidad':5}
rol_map_ing={'Knowledge Generator':0,'Enabler':1,'Promoter':2,'Linker':3,'Articulator':4,'Community':5}

capitals={'CABA': caba_G, 'CDMX':cdmx_G, 'Santiago de Chile': scl_G, 'Montevideo': mtv_G, 'Madrid': mad_G, 'Sao Paulo': sao_G}


for capital_str in capitals.keys():
    if capital_str=='Montevideo':
        print('Matriz de artibuto Rol para la ciudad de '+capital_str)
        print('')
        print(nx.attribute_mixing_matrix(capitals[capital_str],'rol',mapping=rol_map_ing))
        print('')
        print('')
    else:
        print('Matriz de artibuto Rol para la ciudad de '+capital_str)
        print('')
        print(nx.attribute_mixing_matrix(capitals[capital_str],'rol',mapping=rol_map_esp))
        print('')
        print('')

        
        

Matriz de artibuto Rol para la ciudad de CABA

[[0.01508621 0.0387931  0.00862069 0.01724138 0.04741379 0.        ]
 [0.05603448 0.31896552 0.01077586 0.06896552 0.12715517 0.01939655]
 [0.         0.         0.         0.         0.         0.        ]
 [0.00431034 0.05603448 0.00431034 0.03017241 0.0237069  0.01077586]
 [0.01724138 0.05603448 0.         0.01508621 0.03232759 0.        ]
 [0.00215517 0.01508621 0.         0.         0.00431034 0.        ]]


Matriz de artibuto Rol para la ciudad de CDMX

[[0.00398406 0.02390438 0.00199203 0.00199203 0.01195219 0.        ]
 [0.03784861 0.51394422 0.02390438 0.06772908 0.10956175 0.02788845]
 [0.00199203 0.02191235 0.00398406 0.         0.         0.        ]
 [0.00199203 0.02988048 0.         0.00199203 0.00398406 0.00199203]
 [0.00199203 0.01992032 0.         0.00199203 0.00796813 0.00398406]
 [0.         0.05179283 0.0059761  0.0059761  0.00398406 0.00398406]]


Matriz de artibuto Rol para la ciudad de Santiago de Chile

[[0.        

In [7]:
#----------------------
# Subgrafos Nucleares 
#----------------------
#    Obtenemos los -core- nodes (nodos núcleo) de cada una de las redes capitales, 
#    donde el grado total de un nodo núcleo es al menos 3

caba_core=nx.k_core(caba_G,k=3)
cdmx_core=nx.k_core(cdmx_G,k=3)
scl_core=nx.k_core(scl_G,k=3)
mad_core=nx.k_core(mad_G,k=3)
mtv_core=nx.k_core(mtv_G,k=3)
sao_core=nx.k_core(sao_G,k=3)

#----------------------
# GraphMLs Nucleares
#----------------------
#    Ahora, convertimos cada uno de los subgrafos nucleares que obtuvimos
#    en un archivo .graphml

nx.write_graphml(caba_core,'Gephi_core/CABA core graph.graphml')
nx.write_graphml(cdmx_core,'Gephi_core/CDMX core graph.graphml')
nx.write_graphml(scl_core,'Gephi_core/Scl core graph.graphml')
nx.write_graphml(mad_core,'Gephi_core/Mad core graph.graphml')
nx.write_graphml(mtv_core,'Gephi_core/Mtv core graph.graphml')
nx.write_graphml(sao_core,'Gephi_core/Sao core graph.graphml')

#----------------------
# Subgrafos Corteza 
#----------------------
#    Obtenemos los -crust- nodes (nodos corteza) de cada una de las redes capitales, 
#    donde el grado total de un nodo corteza es menor a 3

caba_crust=nx.k_crust(caba_G,k=3)
cdmx_crust=nx.k_crust(cdmx_G,k=3)
scl_crust=nx.k_crust(scl_G,k=3)
mad_crust=nx.k_crust(mad_G,k=3)
mtv_crust=nx.k_crust(mtv_G,k=3)
sao_crust=nx.k_crust(sao_G,k=3)

#----------------------
# GraphMLs Corteza  
#----------------------
#    Ahora, convertimos cada uno de los subgrafos corteza que obtuvimos
#    en un archivo .graphml

nx.write_graphml(caba_crust,'Gephi_crust/CABA crust graph.graphml')
nx.write_graphml(cdmx_crust,'Gephi_crust/CDMX crust graph.graphml')
nx.write_graphml(scl_crust,'Gephi_crust/Scl crust graph.graphml')
nx.write_graphml(mad_crust,'Gephi_crust/Mad crust graph.graphml')
nx.write_graphml(mtv_crust,'Gephi_crust/Mtv crust graph.graphml')
nx.write_graphml(sao_crust,'Gephi_crust/Sao crust graph.graphml')

In [77]:
#------------------------------------------------
#  Core Attribute Mixing Matrices
#------------------------------------------------
#     Para cada uno de los grafos corteza que conseguimos, 
#     Obtenemos una matriz cuya entrada (i,j) es la fracción 
#     de aristas que van de un eje con rol i a un eje con rol j


rol_map_esp={'Generador de conocimiento':0,'Habilitador':1,'Promotor':2,
             'Vinculador':3,'Articulador':4,'Comunidad':5}

rol_map_ing={'Knowledge Generator':0,'Enabler':1,'Promoter':2,
             'Linker':3,'Articulator':4,'Community':5}

capitals_cores={'CABA': caba_core, 'CDMX':cdmx_core, 'Santiago de Chile': scl_core, 
                'Montevideo': mtv_core, 'Madrid': mad_core, 'Sao Paulo': sao_core}


for capital_str in capitals.keys():
    if capital_str=='Montevideo':
        print('Matriz de artibuto Rol para el núcleo de la ciudad de '+capital_str)
        print('')
        print(nx.attribute_mixing_matrix(capitals_cores[capital_str],'rol',mapping=rol_map_ing))
        print('')
        print('')
    else:
        print('Matriz de artibuto Rol para el núcleo de la ciudad de '+capital_str)
        print('')
        print(nx.attribute_mixing_matrix(capitals_cores[capital_str],'rol',mapping=rol_map_esp))
        print('')
        print('')


Matriz de artibuto Rol para el núcleo de la ciudad de CABA

[[0.004 0.028 0.004 0.016 0.06  0.   ]
 [0.032 0.276 0.008 0.08  0.192 0.   ]
 [0.    0.    0.    0.    0.    0.   ]
 [0.    0.068 0.    0.024 0.032 0.004]
 [0.016 0.084 0.    0.016 0.044 0.   ]
 [0.004 0.004 0.    0.    0.004 0.   ]]


Matriz de artibuto Rol para el núcleo de la ciudad de CDMX

[[0.         0.04694836 0.         0.         0.02816901 0.        ]
 [0.03755869 0.49765258 0.         0.05633803 0.14084507 0.03286385]
 [0.         0.         0.         0.         0.         0.        ]
 [0.         0.03286385 0.         0.         0.00469484 0.        ]
 [0.00469484 0.02816901 0.         0.00469484 0.00938967 0.00469484]
 [0.         0.05633803 0.         0.00469484 0.00469484 0.00469484]]


Matriz de artibuto Rol para el núcleo de la ciudad de Santiago de Chile

[[0.         0.         0.         0.         0.         0.        ]
 [0.         0.35       0.         0.075      0.16666667 0.00833333]
 [0.         0.

In [8]:
#----------------------------
# Medidas Globales
#----------------------------
#     Ahora obtenemos, para cada una de las capitales, una serie 
#     pandas.core.series.Series, utilizando la función mean(), 
#     con la que obtenemos las siguientes centralidades 
#           - timeset
#           - mentions    
#           - avg strength                    promedio de la fuerza de los nodos
#           - weight                          promedio del peso de los nodos
#           - indegree                        promedio del grado hacia los nodos
#           - outdegree                       promedio del grado fuera de los nodos
#           - Degree                          promedio de in+out
#           - weighted indegree               promedio del grado hacia los nodos con pesos de acuerdo a intensidades
#           - weighted outdegree             promedio del grado fuera de los nodos con pesos de acuerdo a intensidades
#           - Weighted Degree                promedio in+out de acuerdo a intensidades
#           - Eccentricity                   promedio de eccentricidad 
#           - closnesscentrality              
#           - harmonicclosnesscentrality      
#           - betweenesscentrality          
#           - modularity_class                
#           - Authority                       promedio de autoridad de los nodos de acuerdo al aloritmo HITS
#           - Hub                             promedio de Hubs de los nodos de acuerdo al aloritmo HITS
#           - componentnumber                 número de componentes
#           - strongcompnum                  número de componentes fuertemente conectados
#           - clustering                     promedio de coeficiente de clustering
#           - triangles                      
#           - eigencentrality                 promedio de centralidades eigenvectores

capitals_stats={'CABA': caba_nd, 'CDMX':cdmx_nd, 'Santiago de Chile': scl_nd, 
                'Montevideo': mtv_nd, 'Madrid': mad_nd, 'Sao Paulo': sao_nd}


averages={city: stats.mean() for city,stats in capitals_stats.items()}
averages['Sao Paulo']

#caba_avg=caba_nd.mean()
#cdmx_avg=cdmx_nd.mean()
#scl_avg=scl_nd.mean()
#mad_avg=mad_nd.mean()
#mtv_avg=mtv_nd.mean()
#sao_avg=sao_nd.mean()

timeset                              NaN
mentions                        1.800926
avg strength                    3.430669
weight                          1.253704
node size                            NaN
indegree                        1.685185
outdegree                       1.685185
Degree                          3.370370
weighted indegree               5.847222
weighted outdegree              5.847222
Weighted Degree                11.694444
Eccentricity                    6.726852
closnesscentrality              0.238598
harmonicclosnesscentrality      0.266719
betweenesscentrality          357.333333
modularity_class                5.564815
componentnumber                 0.000000
strongcompnum                  92.027778
clustering                      0.129928
Authority                       0.039103
Hub                             0.039103
triangles                       0.916667
eigencentrality                 0.144046
dtype: float64

In [13]:
#let's try with CDMX

city='CDMX'
avg=averages[city]
datafr=avg.copy().to_frame().T
datafr.drop(['timeset'], axis=1, inplace=True)
if 'type' in datafr.columns:
    datafr.drop(['type'], axis=1, inplace=True)

all_columns = datafr.columns.values.tolist()
print(all_columns)
print('')
print('')
print('')
datafr

['mentions', 'avg strength', 'weight', 'indegree', 'outdegree', 'Degree', 'weighted indegree', 'weighted outdegree', 'Weighted Degree', 'clustering', 'Eccentricity', 'closnesscentrality', 'harmonicclosnesscentrality', 'betweenesscentrality', 'modularity_class', 'componentnumber', 'strongcompnum', 'triangles', 'eigencentrality']





Unnamed: 0,mentions,avg strength,weight,indegree,outdegree,Degree,weighted indegree,weighted outdegree,Weighted Degree,clustering,Eccentricity,closnesscentrality,harmonicclosnesscentrality,betweenesscentrality,modularity_class,componentnumber,strongcompnum,triangles,eigencentrality
0,,3.477388,1.388629,1.829431,1.829431,3.658863,6.481605,6.481605,12.963211,0.07594,5.622074,0.267067,0.28932,420.227425,4.270903,0.0,127.060201,0.862876,0.104015


In [15]:
countries={'Montevideo':'Uruguay', 'CABA':'Argentina', 'CDMX':'México',
          'Madrid':'España', 'Sao Paulo': 'Brasil', 'Santiago de Chile': 'Chile'}


list_concat=[]
for city,avg in averages.items():
    datafr=avg.copy().to_frame().T
    
    #borramos del set de columnas a aquellos atributos que no son significantes 
    datafr.drop(['timeset', 'componentnumber'], axis=1, inplace=True)
    if 'type' in datafr.columns:
        datafr.drop(['type'], axis=1, inplace=True)
    
    #añadimos datafr a la lista de dataframes que vamos a concatenar
    list_concat.append(datafr)
    
    #añadimos el atributo que corresponde a el nombre de ciudad y país
    datafr.insert(0, 'País', [countries[city]], True) 
    datafr.insert(0, 'Ciudad', [city], True) 

df_concat=pd.concat(list_concat, ignore_index=True)
df_concat

Unnamed: 0,Ciudad,País,mentions,avg strength,weight,indegree,outdegree,Degree,weighted indegree,weighted outdegree,...,modularity_class,Authority,Hub,strongcompnum,clustering,triangles,eigencentrality,ego,pageranks,node size
0,CABA,Argentina,2.285088,3.244633,1.481579,2.192982,2.192982,4.385965,7.008772,7.008772,...,5.850877,0.036946,0.036946,96.763158,0.148791,2.434211,0.093111,,,
1,CDMX,México,,3.477388,1.388629,1.829431,1.829431,3.658863,6.481605,6.481605,...,4.270903,,,127.060201,0.07594,0.862876,0.104015,,,
2,Santiago de Chile,Chile,2.097436,3.417211,1.462564,1.974359,1.974359,3.948718,6.882051,6.882051,...,3.907692,0.040609,0.040609,,0.134037,2.153846,0.074438,0.133333,0.00392,
3,Montevideo,Uruguay,4.065657,3.227742,,3.873737,3.873737,7.747475,12.969697,12.969697,...,2.666667,,,67.89899,0.207747,11.060606,0.10779,,,2.749495
4,Madrid,España,1.958159,3.518387,1.362343,1.895397,1.895397,3.790795,6.430962,6.430962,...,5.050209,,,89.757322,0.12051,1.330544,0.065103,,,
5,Sao Paulo,Brasil,1.800926,3.430669,1.253704,1.685185,1.685185,3.37037,5.847222,5.847222,...,5.564815,0.039103,0.039103,92.027778,0.129928,0.916667,0.144046,,,


In [158]:
df_concat.isnull().sum()


Ciudad                        0
País                          0
mentions                      1
avg strength                  0
weight                        1
indegree                      0
outdegree                     0
Degree                        0
weighted indegree             0
weighted outdegree            0
Weighted Degree               0
Eccentricity                  0
closnesscentrality            0
harmonicclosnesscentrality    0
betweenesscentrality          0
modularity_class              0
Authority                     3
Hub                           3
strongcompnum                 1
clustering                    0
triangles                     0
eigencentrality               0
ego                           5
pageranks                     5
node size                     5
dtype: int64

In [34]:
#Quitar promedios que en la pagina no mencionan que describen una propiedad global del grafo

df_concat=df_concat.drop('closnesscentrality',axis=1)
df_concat=df_concat.drop('harmonicclosnesscentrality',axis=1)
df_concat=df_concat.drop('modularity_class',axis=1)
df_concat=df_concat.drop('triangles',axis=1)
df_concat=df_concat.drop('eigencentrality',axis=1)
df_concat=df_concat.drop('pageranks',axis=1)
df_concat=df_concat.drop('ego',axis=1)
df_concat=df_concat.drop('betweenesscentrality',axis=1)
df_concat=df_concat.drop('Hub',axis=1)


Unnamed: 0,Ciudad,País,mentions,avg strength,weight,indegree,outdegree,Degree,weighted indegree,weighted outdegree,Weighted Degree,Eccentricity,Authority,strongcompnum,clustering,node size
0,CABA,Argentina,2.285088,3.244633,1.481579,2.192982,2.192982,4.385965,7.008772,7.008772,14.017544,4.798246,0.036946,96.763158,0.148791,
1,CDMX,México,,3.477388,1.388629,1.829431,1.829431,3.658863,6.481605,6.481605,12.963211,5.622074,,127.060201,0.07594,
2,Santiago de Chile,Chile,2.097436,3.417211,1.462564,1.974359,1.974359,3.948718,6.882051,6.882051,13.764103,5.041026,0.040609,,0.134037,
3,Montevideo,Uruguay,4.065657,3.227742,,3.873737,3.873737,7.747475,12.969697,12.969697,25.939394,4.479798,,67.89899,0.207747,2.749495
4,Madrid,España,1.958159,3.518387,1.362343,1.895397,1.895397,3.790795,6.430962,6.430962,12.861925,6.079498,,89.757322,0.12051,
5,Sao Paulo,Brasil,1.800926,3.430669,1.253704,1.685185,1.685185,3.37037,5.847222,5.847222,11.694444,6.726852,0.039103,92.027778,0.129928,


In [41]:
df_concat['weight'][3]=df_concat['node size'][3]
df_concat=df_concat.drop('node size',axis=1)
df_concat=df_concat.drop('strongcompnum',axis=1)
df_concat=df_concat.drop('Authority',axis=1)

In [42]:
df_concat

Unnamed: 0,Ciudad,País,mentions,avg strength,weight,indegree,outdegree,Degree,weighted indegree,weighted outdegree,Weighted Degree,Eccentricity,clustering
0,CABA,Argentina,2.285088,3.244633,1.481579,2.192982,2.192982,4.385965,7.008772,7.008772,14.017544,4.798246,0.148791
1,CDMX,México,,3.477388,1.388629,1.829431,1.829431,3.658863,6.481605,6.481605,12.963211,5.622074,0.07594
2,Santiago de Chile,Chile,2.097436,3.417211,1.462564,1.974359,1.974359,3.948718,6.882051,6.882051,13.764103,5.041026,0.134037
3,Montevideo,Uruguay,4.065657,3.227742,2.749495,3.873737,3.873737,7.747475,12.969697,12.969697,25.939394,4.479798,0.207747
4,Madrid,España,1.958159,3.518387,1.362343,1.895397,1.895397,3.790795,6.430962,6.430962,12.861925,6.079498,0.12051
5,Sao Paulo,Brasil,1.800926,3.430669,1.253704,1.685185,1.685185,3.37037,5.847222,5.847222,11.694444,6.726852,0.129928


In [None]:
"""Global characteristics we may compute:
Diameter
Radius
Average path length
Transitivity
Global Efficiency
Modularity
Assortativity Coefficient
Small Worldness"""

