# iGED : Global Systems Dynamics Initiative

El objetivo principal de este documento es analizar las métricas globales en las siguientes redes capitales

  *  Ciudad Autónoma de Buenos Aires (CABA)
  *  Ciudad de México (CDMX)
  *  Santiago de Chile (SCL)
  *  Montevideo (MTV)
  *  Madrid (MAD)
  *  Sao Paulo (SAO)

y obtener un tidy DataFrame, para continuar analizando los datos obtenidos, por ejemplo obteniendo correlaciones entre parejas de métricas.

Respecto al notebook anterior, utilizamos la documentación de NetworkX: https://www.nas.ewi.tudelft.nl/people/Piet/papers/TUDreport20111111_MetricList.pdf

así como el siguiente recurso: https://www.nas.ewi.tudelft.nl/people/Piet/papers/TUDreport20111111_MetricList.pdf

para saber con cuáles métricas globales seguir estudiando nuestros ecosistemas.

In [1]:
#-------------------------------------------------------
# Importar paquetes a utilizar
#-------------------------------------------------------
import pandas as pd
import numpy as np 
import networkx as nx
import scipy.stats as stats

import matplotlib.pyplot as plt
import seaborn as sns; sns.set()
import math

#-------------------------------------------------------
# Archivo CSV describiendo los nodos de cada red
#-------------------------------------------------------
ags_nd=pd.read_csv('../Gephi_stats/Gephi AGS Stats.csv')             
caba_nd=pd.read_csv('../Gephi_stats/Gephi CABA Stats.csv')           #capital
cdmx_nd=pd.read_csv('../Gephi_stats/Gephi CDMX Stats.csv')           #capital
gdl_nd=pd.read_csv('../Gephi_stats/Gephi GDL Stats.csv') 
hgo_nd=pd.read_csv('../Gephi_stats/Gephi Hidalgo Stats.csv')
mad_nd=pd.read_csv('../Gephi_stats/Gephi Madrid Stats.csv')          #capital
mtv_nd=pd.read_csv('../Gephi_stats/Gephi Montevideo Stats.csv')      #capital
oax_nd=pd.read_csv('../Gephi_stats/Gephi Oaxaca Stats.csv')
sao_nd=pd.read_csv('../Gephi_stats/Gephi Sao Paulo Stats.csv')       #capital
scl_nd=pd.read_csv('../Gephi_stats/Gephi SCL Stats.csv')             #capital
#new
val_nd=pd.read_csv('../Gephi_stats/Gephi Valencia Stats.csv')


#-------------------------------------------------------
# Archivo CSV describiendo las aristas de cada red
#-------------------------------------------------------
ags_ed=pd.read_csv('../Gephi_edges/Gephi AGS Edges.csv')
caba_ed=pd.read_csv('../Gephi_edges/Gephi CABA Edges.csv')           #capital
cdmx_ed=pd.read_csv('../Gephi_edges/Gephi CDMX Edges.csv')           #capital
gdl_ed=pd.read_csv('../Gephi_edges/Gephi GDL Edges.csv')
hgo_ed=pd.read_csv('../Gephi_edges/Gephi Hidalgo Edges.csv')
mad_ed=pd.read_csv('../Gephi_edges/Gephi Madrid Edges.csv')          #capital
mtv_ed=pd.read_csv('../Gephi_edges/Gephi Montevideo Edges.csv')      #capital
oax_ed=pd.read_csv('../Gephi_edges/Gephi Oaxaca Edges.csv')
sao_ed=pd.read_csv('../Gephi_edges/Gephi Sao Paulo Edges.csv')       #capital
scl_ed=pd.read_csv('../Gephi_edges/Gephi SCL Edges.csv')             #capital
#new
val_ed=pd.read_csv('../Gephi_edges/Gephi Valencia Edges.csv')

In [2]:
def armar_grafo(nodes,edges,rol_str,weight_str):
    '''
    Función con la cual, a partir de una lista de nodos y conexiones, forma un grafo con NetworkX
    
    In:
    - nodes       lista de nodos
    - edges       lista de aristas
    - rol_str     un nombre para el parámetro que describe el rol de un actor
    - weight_str  un nombre para el parámetro que describe el peso de las aristas
    
    Out
    Un DiGrafo NetworkX llamado G.
    '''
    
    #crea un grafo dirigido a partir de la lista edges
    G=nx.from_pandas_edgelist(edges,'Source','Target',edge_attr=["Weight"],create_using=nx.DiGraph())
    #G=nx.from_pandas_edgelist(edges,'Source','Target',edge_attr=["Weight"],create_using=nx.MultiDiGraph())

    #rol es un diccionario que manda cada id de un nodo a el atributo correspondiente a rol
    rol = {nid: nodes[nodes['Id']==nid][rol_str].values[0] for nid in nodes['Id']}
    nx.set_node_attributes(G,rol,'rol')
    
    #weight es un diccionario que manda cada id de un nodo a el atributo correspondiente al peso de nodo
    weight = {nid: nodes[nodes['Id']==nid][weight_str].values[0] for nid in nodes['Id']}
    nx.set_node_attributes(G,weight,'weight')
    
    G.remove_edges_from(nx.selfloop_edges(G))

    
    return G

In [3]:
#--------------------------------------------------------------
#Armar grafos a partir de cada uno de los CSV que descargamos
#--------------------------------------------------------------

ags_G=armar_grafo(ags_nd,ags_ed,'role','weight')
caba_G=armar_grafo(caba_nd,caba_ed,'type','weight')
cdmx_G=armar_grafo(cdmx_nd,cdmx_ed,'rol estimado','weight')
gdl_G=armar_grafo(gdl_nd,gdl_ed,'type','weight')
hgo_G=armar_grafo(hgo_nd,hgo_ed,'type','weight')
mad_G=armar_grafo(mad_nd,mad_ed,'rol estimado','weight')
mtv_G=armar_grafo(mtv_nd,mtv_ed,'rol estimado','node size')
oax_G=armar_grafo(oax_nd,oax_ed,'rol','weight')
sao_G=armar_grafo(sao_nd,sao_ed,'rol estimado','weight')
scl_G=armar_grafo(scl_nd,scl_ed,'type','weight')
#new city!
val_G=armar_grafo(val_nd,val_ed, 'type','weight')

In [4]:
#----------------------------
# Medidas Globales
#----------------------------
#     Ahora obtenemos, para cada una de las capitales, una serie 
#     pandas.core.series.Series, utilizando la función mean(), 
#     con la que obtenemos los promedios de cada columna. 


city_stats={'CABA': caba_nd, 'CDMX':cdmx_nd, 'Santiago': scl_nd, 
            'Montevideo': mtv_nd, 'Madrid': mad_nd, 'Sao Paulo': sao_nd, 
            'AGS' : ags_nd, 'GDL' : gdl_nd, 'Pachuca':hgo_nd, 'Oaxaca':oax_nd,
           'Valencia' : val_nd}


averages={city: stats.mean() for city,stats in city_stats.items()}
averages['Valencia']

timeset                              NaN
mentions                        3.627778
avg strength                    3.539277
weight                          2.611111
indegree                        3.494444
outdegree                       3.494444
Degree                          6.988889
weighted indegree              12.511111
weighted outdegree             12.511111
Weighted Degree                25.022222
Eccentricity                    4.227778
closnesscentrality              0.340280
harmonicclosnesscentrality      0.369486
betweenesscentrality          180.311111
modularity_class                3.711111
clustering                      0.216784
triangles                      12.366667
eigencentrality                 0.154186
dtype: float64

In [5]:
cities={'Montevideo':mtv_G, 'CABA':caba_G, 'CDMX':cdmx_G,
          'Madrid':mad_G, 'Sao Paulo': sao_G, 'Santiago': scl_G, 
          'AGS': ags_G, 'GDL': gdl_G, 'Pachuca':hgo_G, 'Oaxaca':oax_G,
          'Valencia' : val_G}

In [6]:
countries={'Montevideo':'Uruguay', 'CABA':'Argentina', 'CDMX':'México',
          'Madrid':'España', 'Sao Paulo': 'Brasil', 'Santiago': 'Chile', 
          'AGS': 'México', 'GDL': 'México', 'Pachuca':'México', 'Oaxaca':'México',
          'Valencia': 'España'}

list_concat=[]
for city,avg in averages.items():
    datafr=avg.copy().to_frame().T
    
    #borramos del set de columnas a aquellos atributos que no son significantes 
    if 'timeset' in datafr.columns:
        datafr.drop(['timeset'], axis=1, inplace=True)
        
    if 'componentnumber' in datafr.columns:
        datafr.drop(['componentnumber'], axis=1, inplace=True)
        
    if 'type' in datafr.columns:
        datafr.drop(['type'], axis=1, inplace=True)
    
    #añadimos datafr a la lista de dataframes que vamos a concatenar
    list_concat.append(datafr)
    
    #añadimos el atributo que corresponde a el nombre de ciudad y país
    datafr.insert(0, 'País', [countries[city]], True) 
    datafr.insert(0, 'Ciudad', [city], True) 

df_concat=pd.concat(list_concat, ignore_index=True)
df_concat

Unnamed: 0,Ciudad,País,mentions,avg strength,weight,indegree,outdegree,Degree,weighted indegree,weighted outdegree,...,triangles,eigencentrality,ego,pageranks,node size,# gephi,participation,id interno,evaluador,avg. strength
0,CABA,Argentina,2.285088,3.244633,1.481579,2.192982,2.192982,4.385965,7.008772,7.008772,...,2.434211,0.093111,,,,,,,,
1,CDMX,México,,3.477388,1.388629,1.829431,1.829431,3.658863,6.481605,6.481605,...,0.862876,0.104015,,,,,,,,
2,Santiago,Chile,2.097436,3.417211,1.462564,1.974359,1.974359,3.948718,6.882051,6.882051,...,2.153846,0.074438,0.133333,0.00392,,,,,,
3,Montevideo,Uruguay,4.065657,3.227742,,3.873737,3.873737,7.747475,12.969697,12.969697,...,11.060606,0.10779,,,2.749495,,,,,
4,Madrid,España,1.958159,3.518387,1.362343,1.895397,1.895397,3.790795,6.430962,6.430962,...,1.330544,0.065103,,,,,,,,
5,Sao Paulo,Brasil,1.800926,3.430669,1.253704,1.685185,1.685185,3.37037,5.847222,5.847222,...,0.916667,0.144046,,,,,,,,
6,AGS,México,,,1.797917,2.427083,2.427083,4.854167,8.510417,8.510417,...,2.125,0.127227,,,,,,,,
7,GDL,México,,,1.855615,2.534759,2.534759,5.069519,8.834225,8.834225,...,2.871658,0.103678,,,,95.86631,,,,
8,Pachuca,México,2.168,3.570259,1.5632,2.032,2.032,4.064,6.976,6.976,...,1.584,0.115455,,,,,1.0,,,
9,Oaxaca,México,2.438462,,1.883221,2.187919,2.187919,4.375839,8.684564,8.684564,...,1.704698,0.096509,,,,,,75.0,0.261745,3.906913


In [7]:
df_concat.isnull().sum()

Ciudad                         0
País                           0
mentions                       3
avg strength                   3
weight                         1
indegree                       0
outdegree                      0
Degree                         0
weighted indegree              0
weighted outdegree             0
Weighted Degree                0
Eccentricity                   0
closnesscentrality             0
harmonicclosnesscentrality     0
betweenesscentrality           0
modularity_class               0
Authority                      4
Hub                            4
strongcompnum                  4
clustering                     0
triangles                      0
eigencentrality                0
ego                           10
pageranks                     10
node size                     10
# gephi                       10
participation                 10
id interno                    10
evaluador                     10
avg. strength                 10
dtype: int

In [8]:
#Quitar promedios que en la pagina no mencionan que describen una propiedad global del grafo

df_concat=df_concat.drop('closnesscentrality',axis=1)
df_concat=df_concat.drop('harmonicclosnesscentrality',axis=1)
df_concat=df_concat.drop('modularity_class',axis=1)
df_concat=df_concat.drop('triangles',axis=1)
df_concat=df_concat.drop('eigencentrality',axis=1)
df_concat=df_concat.drop('pageranks',axis=1)
df_concat=df_concat.drop('ego',axis=1)
df_concat=df_concat.drop('betweenesscentrality',axis=1)
df_concat=df_concat.drop('Hub',axis=1)

In [9]:
#Quitar promedios que son redundantes, ya que al hacer el análisis presentaron correlación de 1

df_concat=df_concat.drop('indegree',axis=1)
df_concat=df_concat.drop('outdegree',axis=1)
df_concat=df_concat.drop('weighted indegree',axis=1)
df_concat=df_concat.drop('weighted outdegree',axis=1)

In [10]:
df_concat['weight'][3]=df_concat['node size'][3]
df_concat['avg strength'][9]=df_concat['avg. strength'][9]

df_concat=df_concat.drop('node size',axis=1)
df_concat=df_concat.drop('strongcompnum',axis=1)
df_concat=df_concat.drop('Authority',axis=1)
df_concat=df_concat.drop('mentions',axis=1)
df_concat=df_concat.drop('# gephi',axis=1)
df_concat=df_concat.drop('participation',axis=1)
df_concat=df_concat.drop('id interno',axis=1)
df_concat=df_concat.drop('evaluador',axis=1)
df_concat=df_concat.drop('avg. strength',axis=1)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [11]:
df_concat

Unnamed: 0,Ciudad,País,avg strength,weight,Degree,Weighted Degree,Eccentricity,clustering
0,CABA,Argentina,3.244633,1.481579,4.385965,14.017544,4.798246,0.148791
1,CDMX,México,3.477388,1.388629,3.658863,12.963211,5.622074,0.07594
2,Santiago,Chile,3.417211,1.462564,3.948718,13.764103,5.041026,0.134037
3,Montevideo,Uruguay,3.227742,2.749495,7.747475,25.939394,4.479798,0.207747
4,Madrid,España,3.518387,1.362343,3.790795,12.861925,6.079498,0.12051
5,Sao Paulo,Brasil,3.430669,1.253704,3.37037,11.694444,6.726852,0.129928
6,AGS,México,,1.797917,4.854167,17.020833,4.510417,0.195189
7,GDL,México,,1.855615,5.069519,17.668449,4.705882,0.169277
8,Pachuca,México,3.570259,1.5632,4.064,13.952,4.496,0.082521
9,Oaxaca,México,3.906913,1.883221,4.375839,17.369128,4.456376,0.121452


In [12]:
'''
#------------------
# Small Worldness
#------------------


print('----------------------------------------')
print('Small Worldness en Grafos no dirigidos')
print('----------------------------------------')

small_worldness_dict = dict()
for city, graph in cities.items():
    try:
        sw = nx.algorithms.smallworld.sigma(graph.to_undirected(),niter=1,nrand=2)
        print(city+str(': ')+str(sw))
        print('')
        small_worldness_dict[city] = sw
    except:
        print(city+str(': ')+str(float('inf')))
        print('') 

#    Por motivos de tiempo y de consistencia de nuestros datos 
#    (esta medida se computa a partir de una red al azar, entonces hay variabilidad),
#    decidimos tomar una medida de Small worldness, 
#    registrarlo en una tabla y luego hacer el input manualmente. 

cityorder = ['CABA', 'CDMX', 'Santiago de Chile', 'Montevideo', 'Madrid',
             'Sao Paulo', 'AGS', 'GDL', 'Pachuca', 'Oaxaca']

small_worldness = []
for city in cityorder:
    small_worldness.append(small_worldness_dict[city])
    
    
    
PRINT STATEMENT
----------------------------------------
Small Worldness en Grafos no dirigidos
----------------------------------------
Montevideo: 1.1975877796443828

CABA: 1.0064805035612747

CDMX: 0.6149725449451627

Madrid: 0.930158535287913

Sao Paulo: 1.0712083183936894

Santiago de Chile: 1.0518599813264755

AGS: 1.1820418932080727

GDL: 1.0093135224066523

Pachuca: 0.852531189607657

Oaxaca: 1.1570584178934744


*****
After addition of Valencia, we computed its small worldness and obtained the following value: 1.1371687539619906
which we will add to the list below.
'''
    
small_worldness = [1.0064805035612747,
 0.6149725449451627,
 1.0518599813264755,
 1.1975877796443828,
 0.930158535287913,
 1.0712083183936894,
 1.1820418932080727,
 1.0093135224066523,
 0.852531189607657,
 1.1570584178934744,
 1.1371687539619906]


In [13]:
#--------------------------------
# Central point of Dominance
#--------------------------------


def central_point_dominance(G):
    betwennesses = nx.betweenness_centrality(G)
    b_max = max(betwennesses.values())
    N = len(betwennesses.keys())
    count = 0
    for i, b_i in betwennesses.items():
        count += ( b_max - b_i )/(N-1)
    return count

print('-------------------------------------------------')
print('Central point of dominance en grafos no dirigidos')
print('-------------------------------------------------')

cpds_dict=dict()
cpds = []

for city, graph in cities.items():
    cpd=central_point_dominance(graph.to_undirected())
    print(city+str(': ')+str(cpd))
    cpds_dict[city]=cpd
    print('')


cityorder = ['CABA', 'CDMX', 'Santiago', 'Montevideo', 'Madrid',
             'Sao Paulo', 'AGS', 'GDL', 'Pachuca', 'Oaxaca', 'Valencia']
for city in cityorder:
    cpds.append(cpds_dict[city])
    
cpds

-------------------------------------------------
Central point of dominance en grafos no dirigidos
-------------------------------------------------
Montevideo: 0.17867930592282757

CABA: 0.2513630404294009

CDMX: 0.2021890740713214

Madrid: 0.18370152799003858

Sao Paulo: 0.23686599553638013

Santiago: 0.5254322122333539

AGS: 0.3142364331530787

GDL: 0.28728968984229414

Pachuca: 0.40131699870933374

Oaxaca: 0.43529510932786014

Valencia: 0.11723984748201702



[0.2513630404294009,
 0.2021890740713214,
 0.5254322122333539,
 0.17867930592282757,
 0.18370152799003858,
 0.23686599553638013,
 0.3142364331530787,
 0.28728968984229414,
 0.40131699870933374,
 0.43529510932786014,
 0.11723984748201702]

In [14]:
#--------------------------------
# Spectral Radius
#--------------------------------
#     the smaller the spectral radius rho, the higher the probability of 'virus infection' tau, 
#     the more difficult it is for an idea to spread in the innovative network, 
#     namely, the less efficient

import numpy.linalg

def spectral_radius(G):
    L = nx.normalized_laplacian_matrix(G)
    e = numpy.linalg.eigvals(L.A)
    e_abs = [abs(x) for x in e]
    return max(e_abs)

print('-------------------------------------------------')
print('Radio Espectral en grafos no dirigidos')
print('-------------------------------------------------')

spectral_radii_dict = dict()
spectral_radii = []

for city, graph in cities.items():
    sr=spectral_radius(graph.to_undirected())
    spectral_radii_dict[city]=sr
    print(city+str(': ')+str(sr))
    print('')

for city in cityorder:
    spectral_radii.append(spectral_radii_dict[city])

spectral_radii

-------------------------------------------------
Radio Espectral en grafos no dirigidos
-------------------------------------------------
Montevideo: 1.7775046620196777

CABA: 1.8679496727413827

CDMX: 1.909881341485432

Madrid: 1.9196432998754678

Sao Paulo: 1.982719811731104

Santiago: 1.8899687417091213

AGS: 1.8683279531213095

GDL: 1.8036667595524662

Pachuca: 1.8600470761301018

Oaxaca: 1.8460954598656303

Valencia: 1.797818724319187



[1.8679496727413827,
 1.909881341485432,
 1.8899687417091213,
 1.7775046620196777,
 1.9196432998754678,
 1.982719811731104,
 1.8683279531213095,
 1.8036667595524662,
 1.8600470761301018,
 1.8460954598656303,
 1.797818724319187]

In [15]:
#----------
# Helpers
#----------
#Functions that will be used to compute 
#  *  core ratio
#  *  rich club coefficient
#  *  modularity

def core_ratio(G):
    return len(nx.k_core(G,k=2).nodes())/len(G.nodes())

def rich_club_coeffs(G):
    t_ok = True
    t = 0
    d = dict()
    while t_ok:
        nodes_large_degree=[]
        for x in G.nodes():
            if G.degree(x)>t:
                nodes_large_degree.append(x)
        core = G.subgraph(nodes_large_degree)
        edges_core = len(core.edges())
        nodes_core = len(core.nodes())
        if nodes_core<=1:
            t_ok = False
            break
        d[t] = (2*edges_core)/(nodes_core*(nodes_core-1))
        t += 1 
    return d        

def max_rich_club(G):
    #rich_club_1=nx.algorithms.rich_club_coefficient(G, normalized=False)
    rich_club=rich_club_coeffs(G)
    max_i=0
    
    for i in range(len(rich_club)):
        if rich_club[i]>rich_club[max_i]:
            max_i=i
    #print(max_i)
    return rich_club[max_i]

def modularity(G):
    return nx.algorithms.community.quality.performance(G,nx.algorithms.community.modularity_max.greedy_modularity_communities(G))

In [16]:

#las columnas que vamos a aregar al DataFrame
diameter = [0]*len(cities)
radius = [0]*len(cities)
avg_shortest_path_length = [0]*len(cities)
transitivity = [0]*len(cities)
global_efficiency = [0]*len(cities)
modularities = [0]*len(cities)
rich_club_coefficient = [0]*len(cities)
core_rate= [0]*len(cities)


for city, graph in cities.items():
    print(city)
    city_index=df_concat.index[df_concat['Ciudad']==city].tolist()[0]
    
    #conseguimos cada parámetro para esta ciudad
    undirected=nx.to_undirected(graph)
    diameter[city_index] =                     nx.diameter(undirected)
    radius[city_index] =                       nx.radius(undirected)
    avg_shortest_path_length[city_index] =     nx.average_shortest_path_length(undirected)
    transitivity[city_index] =                 nx.transitivity(undirected)
    global_efficiency[city_index] =            nx.global_efficiency(undirected)
    #small_worldness precomputado
    modularities[city_index] =                 modularity(undirected)
    rich_club_coefficient[city_index] =        max_rich_club(undirected)  
    core_rate[city_index]=                     core_ratio(undirected)


    
    
df_concat['Diámetro'] = diameter
df_concat['Radio'] = radius
df_concat['Camino más corto promedio'] = avg_shortest_path_length
df_concat['Transitividad'] = transitivity
df_concat['Eficiencia Global'] = global_efficiency
df_concat['Small Worldness'] = small_worldness
df_concat['Rich Club Coefficient'] = rich_club_coefficient
df_concat['Core Ratio'] = core_rate
df_concat['Central Point Dominance'] = [round(elem,3) for elem in cpds]
df_concat['Spectral radius'] = [round(elem,3) for elem in spectral_radii]
#df_concat['Modularidad'] = [round(elem,3) for elem in modularities]
df_concat['Modularidad'] = [0.356, 0.527, 0.623, 0.606, 0.682, 0.535, 0.523, 0.511, 0.525, 0.533, 0.37]


df_concat

Montevideo
CABA
CDMX
Madrid
Sao Paulo
Santiago
AGS
GDL
Pachuca
Oaxaca
Valencia


Unnamed: 0,Ciudad,País,avg strength,weight,Degree,Weighted Degree,Eccentricity,clustering,Diámetro,Radio,Camino más corto promedio,Transitividad,Eficiencia Global,Small Worldness,Rich Club Coefficient,Core Ratio,Central Point Dominance,Spectral radius,Modularidad
0,CABA,Argentina,3.244633,1.481579,4.385965,14.017544,4.798246,0.148791,6,3,3.355669,0.107392,0.325773,1.006481,0.866667,0.495614,0.251,1.868,0.356
1,CDMX,México,3.477388,1.388629,3.658863,12.963211,5.622074,0.07594,7,4,3.820318,0.05,0.28932,0.614973,0.361111,0.421405,0.202,1.91,0.527
2,Santiago,Chile,3.417211,1.462564,3.948718,13.764103,5.041026,0.134037,6,3,3.227544,0.100744,0.338889,1.05186,1.0,0.430769,0.525,1.89,0.623
3,Montevideo,Uruguay,3.227742,2.749495,7.747475,25.939394,4.479798,0.207747,6,3,3.081116,0.224523,0.360837,1.197588,1.0,0.565657,0.179,1.778,0.606
4,Madrid,España,3.518387,1.362343,3.790795,12.861925,6.079498,0.12051,8,5,3.783517,0.081917,0.294653,0.930159,1.0,0.439331,0.184,1.92,0.682
5,Sao Paulo,Brasil,3.430669,1.253704,3.37037,11.694444,6.726852,0.129928,8,4,4.324031,0.078571,0.266719,1.071208,0.222222,0.421296,0.237,1.983,0.535
6,AGS,México,,1.797917,4.854167,17.020833,4.510417,0.195189,5,3,3.120614,0.14011,0.363556,1.182042,1.0,0.59375,0.314,1.868,0.523
7,GDL,México,,1.855615,5.069519,17.668449,4.705882,0.169277,6,3,3.208499,0.124392,0.342444,1.009314,1.0,0.540107,0.287,1.804,0.511
8,Pachuca,México,3.570259,1.5632,4.064,13.952,4.496,0.082521,6,3,3.162323,0.095514,0.351108,0.852531,0.5,0.384,0.401,1.86,0.525
9,Oaxaca,México,3.906913,1.883221,4.375839,17.369128,4.456376,0.121452,5,3,3.320697,0.107699,0.33385,1.157058,1.0,0.516779,0.435,1.846,0.533


In [17]:
#Cambiar los nombres de los atributos del DataFrame
#algunos nombres de columnas no fueron modificados para facilitar legibilidad
df_concat.rename(columns={'Eccentricity': 'Excentricidad', 'clustering':'Clustering'}, inplace=True)
df_concat

Unnamed: 0,Ciudad,País,avg strength,weight,Degree,Weighted Degree,Excentricidad,Clustering,Diámetro,Radio,Camino más corto promedio,Transitividad,Eficiencia Global,Small Worldness,Rich Club Coefficient,Core Ratio,Central Point Dominance,Spectral radius,Modularidad
0,CABA,Argentina,3.244633,1.481579,4.385965,14.017544,4.798246,0.148791,6,3,3.355669,0.107392,0.325773,1.006481,0.866667,0.495614,0.251,1.868,0.356
1,CDMX,México,3.477388,1.388629,3.658863,12.963211,5.622074,0.07594,7,4,3.820318,0.05,0.28932,0.614973,0.361111,0.421405,0.202,1.91,0.527
2,Santiago,Chile,3.417211,1.462564,3.948718,13.764103,5.041026,0.134037,6,3,3.227544,0.100744,0.338889,1.05186,1.0,0.430769,0.525,1.89,0.623
3,Montevideo,Uruguay,3.227742,2.749495,7.747475,25.939394,4.479798,0.207747,6,3,3.081116,0.224523,0.360837,1.197588,1.0,0.565657,0.179,1.778,0.606
4,Madrid,España,3.518387,1.362343,3.790795,12.861925,6.079498,0.12051,8,5,3.783517,0.081917,0.294653,0.930159,1.0,0.439331,0.184,1.92,0.682
5,Sao Paulo,Brasil,3.430669,1.253704,3.37037,11.694444,6.726852,0.129928,8,4,4.324031,0.078571,0.266719,1.071208,0.222222,0.421296,0.237,1.983,0.535
6,AGS,México,,1.797917,4.854167,17.020833,4.510417,0.195189,5,3,3.120614,0.14011,0.363556,1.182042,1.0,0.59375,0.314,1.868,0.523
7,GDL,México,,1.855615,5.069519,17.668449,4.705882,0.169277,6,3,3.208499,0.124392,0.342444,1.009314,1.0,0.540107,0.287,1.804,0.511
8,Pachuca,México,3.570259,1.5632,4.064,13.952,4.496,0.082521,6,3,3.162323,0.095514,0.351108,0.852531,0.5,0.384,0.401,1.86,0.525
9,Oaxaca,México,3.906913,1.883221,4.375839,17.369128,4.456376,0.121452,5,3,3.320697,0.107699,0.33385,1.157058,1.0,0.516779,0.435,1.846,0.533


In [18]:
from sklearn.impute import KNNImputer
imputer = KNNImputer(n_neighbors=3)

df_numbers = df_concat.copy()
df_numbers=df_numbers.drop('Ciudad',axis=1)
df_numbers=df_numbers.drop('País',axis=1)


var = imputer.fit_transform(df_numbers)
df_numbers = pd.DataFrame(var)

df_numbers.columns = ['avg strength', 'weight', 'Degree', 'Weighted Degree', 'Excentricidad',
                       'Clustering', 'Diámetro', 'Radio', 'Camino más corto promedio', 'Transitividad',
                       'Eficiencia Global', 'Small Worldness', 'Rich Club Coefficient', 'Core Ratio',
                       'Central Point Dominance', 'Spectral radius','Modularidad']

df_concat['avg strength'] = df_numbers['avg strength']
df_concat


Unnamed: 0,Ciudad,País,avg strength,weight,Degree,Weighted Degree,Excentricidad,Clustering,Diámetro,Radio,Camino más corto promedio,Transitividad,Eficiencia Global,Small Worldness,Rich Club Coefficient,Core Ratio,Central Point Dominance,Spectral radius,Modularidad
0,CABA,Argentina,3.244633,1.481579,4.385965,14.017544,4.798246,0.148791,6,3,3.355669,0.107392,0.325773,1.006481,0.866667,0.495614,0.251,1.868,0.356
1,CDMX,México,3.477388,1.388629,3.658863,12.963211,5.622074,0.07594,7,4,3.820318,0.05,0.28932,0.614973,0.361111,0.421405,0.202,1.91,0.527
2,Santiago,Chile,3.417211,1.462564,3.948718,13.764103,5.041026,0.134037,6,3,3.227544,0.100744,0.338889,1.05186,1.0,0.430769,0.525,1.89,0.623
3,Montevideo,Uruguay,3.227742,2.749495,7.747475,25.939394,4.479798,0.207747,6,3,3.081116,0.224523,0.360837,1.197588,1.0,0.565657,0.179,1.778,0.606
4,Madrid,España,3.518387,1.362343,3.790795,12.861925,6.079498,0.12051,8,5,3.783517,0.081917,0.294653,0.930159,1.0,0.439331,0.184,1.92,0.682
5,Sao Paulo,Brasil,3.430669,1.253704,3.37037,11.694444,6.726852,0.129928,8,4,4.324031,0.078571,0.266719,1.071208,0.222222,0.421296,0.237,1.983,0.535
6,AGS,México,3.573935,1.797917,4.854167,17.020833,4.510417,0.195189,5,3,3.120614,0.14011,0.363556,1.182042,1.0,0.59375,0.314,1.868,0.523
7,GDL,México,3.573935,1.855615,5.069519,17.668449,4.705882,0.169277,6,3,3.208499,0.124392,0.342444,1.009314,1.0,0.540107,0.287,1.804,0.511
8,Pachuca,México,3.570259,1.5632,4.064,13.952,4.496,0.082521,6,3,3.162323,0.095514,0.351108,0.852531,0.5,0.384,0.401,1.86,0.525
9,Oaxaca,México,3.906913,1.883221,4.375839,17.369128,4.456376,0.121452,5,3,3.320697,0.107699,0.33385,1.157058,1.0,0.516779,0.435,1.846,0.533


In [19]:
colab_info =     pd.read_csv('../Índice de colaboración.csv')
muestra =        pd.read_csv('../Data_answers_evaluators/Evaluadores ecosistemas.xlsx - Muestra.csv')             


list_cities = ['AGS', 'CABA', 'CDMX', 'GDL', 'Pachuca',
                  'Madrid', 'Montevideo', 'Oaxaca', 'Sao Paulo', 'Santiago', 'Valencia']

colab_info

Unnamed: 0.1,Unnamed: 0,Madrid,Madrid SurveyGizmo,Madrid Typeform,CDMX,Santiago,CABA,Sao Paulo,Montevideo,Valencia,Oaxaca,GDL,Pachuca,AGS
0,Muestra,38.0,16.0,24.0,51.0,30.0,36.0,34.0,59.0,37.0,36.0,32.0,21.0,19.0
1,Nodes,239.0,120.0,156.0,299.0,195.0,228.0,216.0,198.0,180.0,149.0,187.0,125.0,96.0
2,Edges,453.0,166.0,205.0,547.0,385.0,500.0,364.0,767.0,629.0,326.0,474.0,254.0,233.0
3,Promedio colabs por Ego,12.13,10.13,8.63,12.33,13.04,13.48,10.38,13.4,15.05,6.39,10.81,9.52,8.21
4,Average Degree,3.8,2.76,2.62,3.6,3.9,4.3,3.3,7.6,6.92,4.36,5.06,4.06,4.84
5,Network Diameter,8.0,3.0,5.0,7.0,6.0,6.0,8.0,6.0,5.0,5.0,6.0,6.0,5.0
6,Graph Density,0.015,0.012,0.008,0.012,0.019,0.019,0.015,0.036,0.037,0.028,0.025,0.032,0.047
7,Modularity,0.606,0.64,0.655,0.623,0.535,0.527,0.682,0.356,0.37,0.533,0.511,0.525,0.523
8,Avg Clustering Coefficient (total undirected),0.269,0.143,0.112,0.18,0.315,0.306,0.299,0.367,0.379,0.245,0.317,0.215,0.347
9,1/2 * avgcolabs * (clust + (log(mod^2)*-1)),4.27,2.69,2.07,3.64,5.6,5.81,3.28,8.47,9.35,2.53,4.87,3.69,3.74


In [20]:

def colaborativity_original(df, index, city):
    '''
    Original Collaborativity Formula during summer 2020 for project GED
    Computes the colaborativity of a network G based on its measures, as shown below. 
    
    Input:
    - datadict : dictionary mapping Metric to Metric(G), for some real or synthetic graph G
    Returns:
    - Value between (technically) 0 and 3.5, describing the collaborativity of a graph, or network
    
    '''
    Avg_colabs = list(colab_info[city])[3]
    print(Avg_colabs)
    #Avg_colabs = np.mean(list(cities_info[city]['Colabs']))
    Clust = list(df['Clustering'])[index]
    Mod = list(df['Modularidad'])[index]
    return (1/2) * Avg_colabs * (Clust + (np.log(Mod**2)*-1))


def colaborativity_formula_1(df, index):
    '''
    First Proposed Collaborativity Formula in GED summer 2020 participation in the project
    Computes the colaborativity of a network G based on its measures, as shown below. 
    
    Input:
    - datadict : dictionary mapping Metric to Metric(G), for some real or synthetic graph G
    Returns:
    - Value between (technically) 0 and 3.5, describing the collaborativity of a graph, or network
    
    '''
    Efi = list(df['Eficiencia Global'])[index]
    Tran = list(df['Transitividad'])[index]
    Mod = list(df['Modularidad'])[index]
    Core = list(df['Core Ratio'])[index]
    return Efi + Tran + 1 - (Mod + Core)/2 




def colaborativity_formula_2(df, index):
    '''
    Second Proposed Collaborativity Formula in GED summer 2020 participation in the project
    Computes the colaborativity of a network G based on its measures, as shown below. 
    
    Input:
    - df : DataFrame showing metrics of graph G
    - index : of city in the dataframe
    Returns:
    - Value describing the collaborativity of a graph, or network
    
    '''
    Efi = list(df['Eficiencia Global'])[index]
    Tran = list(df['Transitividad'])[index]
    Exc = list(df['Excentricidad'])[index]
    Mod = list(df['Modularidad'])[index]
    return Efi + Tran + (1/Exc) - Mod




def colaborativity_formula_10(df, index):
    efi = list(df['Eficiencia Global'])[index]
    comunicacion = efi

    clus = list(df['Clustering'])[index]
    future = clus

    mod= list(df['Modularidad'])[index]
    preparacion = mod

    return (efi * clus * (1+math.cos(math.pi*mod)) /2 )**(1/3)



def colaborativity_formula_11(df, index):
    efi = list(df['Eficiencia Global'])[index]
    comunicacion = efi

    tran = list(df['Transitividad'])[index]
    future = tran

    mod= list(df['Modularidad'])[index]
    preparacion = mod

    return (efi * tran * (1+math.cos(math.pi*mod)) /2 )**(1/3)

In [21]:

#las columnas que vamos a aregar al DataFrame
collab_0 = [0]*len(cities)
collab_1 = [0]*len(cities)
collab_2 = [0]*len(cities)
collab_10 = [0]*len(cities)
collab_11 = [0]*len(cities)


for city, graph in cities.items():
    city_index=df_concat.index[df_concat['Ciudad']==city].tolist()[0]
    
    #conseguimos cada parámetro para esta ciudad
    undirected=nx.to_undirected(graph)
    
    collab_0[city_index] =                     colaborativity_original(df_concat, city_index, city)
    collab_1[city_index] =                     colaborativity_formula_1(df_concat, city_index)
    collab_2[city_index] =                     colaborativity_formula_2(df_concat, city_index)
    collab_10[city_index] =                     colaborativity_formula_10(df_concat, city_index)
    collab_11[city_index] =                     colaborativity_formula_11(df_concat, city_index)
    

    
    
df_concat['Colaboratividad Original'] = collab_0
df_concat['Colaboratividad 1'] = collab_1
df_concat['Colaboratividad 2'] = collab_2
df_concat['Colaboratividad 10'] = collab_10
df_concat['Colaboratividad 11'] = collab_11

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
df_concat

13.4
13.48
12.33
12.13
10.38
13.04
8.21
10.81
9.52
6.39
15.05


Unnamed: 0,Ciudad,País,avg strength,weight,Degree,Weighted Degree,Excentricidad,Clustering,Diámetro,Radio,Camino más corto promedio,Transitividad,Eficiencia Global,Small Worldness,Rich Club Coefficient,Core Ratio,Central Point Dominance,Spectral radius,Modularidad,Colaboratividad Original,Colaboratividad 1,Colaboratividad 2,Colaboratividad 10,Colaboratividad 11
0,CABA,Argentina,3.244633,1.481579,4.385965,14.017544,4.798246,0.148791,6,3,3.355669,0.107392,0.325773,1.006481,0.866667,0.495614,0.251,1.868,0.356,14.925325,1.007357,0.285574,0.326576,0.292942
1,CDMX,México,3.477388,1.388629,3.658863,12.963211,5.622074,0.07594,7,4,3.820318,0.05,0.28932,0.614973,0.361111,0.421405,0.202,1.91,0.527,8.36621,0.865118,-0.009809,0.215836,0.187769
2,Santiago,Chile,3.417211,1.462564,3.948718,13.764103,5.041026,0.134037,6,3,3.227544,0.100744,0.338889,1.05186,1.0,0.430769,0.525,1.89,0.623,7.044562,0.912748,0.015005,0.241885,0.219925
3,Montevideo,Uruguay,3.227742,2.749495,7.747475,25.939394,4.479798,0.207747,6,3,3.081116,0.224523,0.360837,1.197588,1.0,0.565657,0.179,1.778,0.606,8.103631,0.999532,0.202584,0.293292,0.300984
4,Madrid,España,3.518387,1.362343,3.790795,12.861925,6.079498,0.12051,8,5,3.783517,0.081917,0.294653,0.930159,1.0,0.439331,0.184,1.92,0.682,5.373353,0.815905,-0.140943,0.201218,0.176923
5,Sao Paulo,Brasil,3.430669,1.253704,3.37037,11.694444,6.726852,0.129928,8,4,4.324031,0.078571,0.266719,1.071208,0.222222,0.421296,0.237,1.983,0.535,7.166895,0.867142,-0.041052,0.248933,0.210508
6,AGS,México,3.573935,1.797917,4.854167,17.020833,4.510417,0.195189,5,3,3.120614,0.14011,0.363556,1.182042,1.0,0.59375,0.314,1.868,0.523,6.122757,0.945291,0.202375,0.320493,0.286961
7,GDL,México,3.573935,1.855615,5.069519,17.668449,4.705882,0.169277,6,3,3.208499,0.124392,0.342444,1.009314,1.0,0.540107,0.287,1.804,0.511,8.17262,0.941283,0.168336,0.303596,0.273964
8,Pachuca,México,3.570259,1.5632,4.064,13.952,4.496,0.082521,6,3,3.162323,0.095514,0.351108,0.852531,0.5,0.384,0.401,1.86,0.525,6.527077,0.992121,0.144041,0.237226,0.249075
9,Oaxaca,México,3.906913,1.883221,4.375839,17.369128,4.456376,0.121452,5,3,3.320697,0.107699,0.33385,1.157058,1.0,0.516779,0.435,1.846,0.533,4.408844,0.91666,0.132947,0.262923,0.252598


In [22]:
df_concat.to_csv('Tidy_DataFrame.csv',index=False)

In [27]:
df_only_colabs = df_concat[['Ciudad', 'Colaboratividad Original', 'Colaboratividad 1',
                          'Colaboratividad 2', 'Colaboratividad 10', 'Colaboratividad 11']]

df_only_colabs.to_csv('Tidy_Colab_Formulas.csv', index=False)