# iGED : Global Systems Dynamics Initiative

El objetivo principal de este documento es analizar las métricas globales en las siguientes redes capitales

  *  Ciudad Autónoma de Buenos Aires (CABA)
  *  Ciudad de México (CDMX)
  *  Santiago de Chile (SCL)
  *  Montevideo (MTV)
  *  Madrid (MAD)
  *  Sao Paulo (SAO)

y obtener un tidy DataFrame, para continuar analizando los datos obtenidos, por ejemplo obteniendo correlaciones entre parejas de métricas.

Respecto al notebook anterior, utilizamos la documentación de NetworkX: https://www.nas.ewi.tudelft.nl/people/Piet/papers/TUDreport20111111_MetricList.pdf

así como el siguiente recurso: https://www.nas.ewi.tudelft.nl/people/Piet/papers/TUDreport20111111_MetricList.pdf

para saber con cuáles métricas globales seguir estudiando nuestros ecosistemas.

In [1]:
#-------------------------------------------------------
# Importar paquetes a utilizar
#-------------------------------------------------------
import pandas as pd
import numpy as np 
import networkx as nx
import scipy.stats as stats

import matplotlib.pyplot as plt
import seaborn as sns; sns.set()


#in this dictionary we collect the information for the 200 random graphs we generated
G={'Nuevas_Conexiones':[],'Numero_Respuestas':[], 'Rango_Respondientes':[], 'Respondientes':[]}

for key in G.keys():
    for i in range(200):
        G[key].append(nx.read_graphml('Random_Graphs_'+key+'/Random_Graph_'+key+str(i)+'.graphml'))


In [2]:
#--------------------------------
# Central point of Dominance
#--------------------------------
# How centralized a graph is with respect to the metric betweenness centrality.

def central_point_dominance(G):
    betwennesses = nx.betweenness_centrality(G)
    b_max = max(betwennesses.values())
    N = len(betwennesses.keys())
    count = 0
    for i, b_i in betwennesses.items():
        count += ( b_max - b_i )/(N-1)
    return count

In [3]:
#--------------------------------
# Spectral Radius
#--------------------------------
#     the smaller the spectral radius rho, the higher the probability of 'virus infection' tau, 
#     the more difficult it is for an idea to spread in the innovative network, 
#     namely, the less efficient

import numpy.linalg

def spectral_radius(G):
    L = nx.normalized_laplacian_matrix(G)
    e = numpy.linalg.eigvals(L.A)
    e_abs = [abs(x) for x in e]
    return max(e_abs)

In [4]:
def core_ratio(G):
    return len(nx.k_core(G,k=2).nodes())/len(G.nodes())

In [5]:
def rich_club_coeffs(G):
    t_ok = True
    t = 0
    d = dict()
    while t_ok:
        nodes_large_degree=[]
        for x in G.nodes():
            if G.degree(x)>t:
                nodes_large_degree.append(x)
        core = G.subgraph(nodes_large_degree)
        edges_core = len(core.edges())
        nodes_core = len(core.nodes())
        if nodes_core<=1:
            t_ok = False
            break
        d[t] = (2*edges_core)/(nodes_core*(nodes_core-1))
        t += 1 
    return d        

def max_rich_club(G):
    #rich_club_1=nx.algorithms.rich_club_coefficient(G, normalized=False)
    rich_club=rich_club_coeffs(G)
    max_i=0
    for i in range(len(rich_club)):
        if rich_club[i]>rich_club[max_i]:
            max_i=i
        else:
            return rich_club[max_i]

In [6]:
def eccentricity(G):
    excentricidades=nx.algorithms.distance_measures.eccentricity(G)
    excentricidad=0
    for i in excentricidades.keys():
        excentricidad+=excentricidades[i]
    return excentricidad/len(excentricidades.keys())

In [7]:
def avg_responses(G):
    avg=0
    respondents=0
    for node in G.nodes:
        try:
            avg+= node['responses']
            respondents+=0
        except:
            pass
    if respondents==0:
        return 0
    return avg/respondents

In [16]:
def degree(G):
    return 2*len(G.edges())/len(G.nodes())

In [20]:
def modularity(G):
    nx.algorithms.community.quality.performance(G,nx.algorithms.community.modularity_max.greedy_modularity_communities(G))

## Creacion del Dataframe

El siguiente bloque crea todas las metricas de la grafica que usaremos para estudiarlas.

In [23]:
metric_function_map = {'Degree': degree,
            'Excentricidad': eccentricity,
            'Diametro': nx.diameter,
            'Radio':nx.radius,
            'Camino más corto promedio':nx.average_shortest_path_length,
            'Transitividad':nx.transitivity,
            'Eficiencia Global':nx.global_efficiency,
            'Rich Club Coefficient':max_rich_club,
            'Core Ratio':core_ratio,
            'Central Point Dominance':central_point_dominance,
            'Spectral radius':spectral_radius,
            'Modularidad':modularity
            'Average Collaborations': responses}

def measures(G):
    D = dict()
    for metric, function in metric_function_map.items():
        D[metric] = [function(nx.to_undirected(G))]
    return D

def embed(D1, D2):
    for key, value in D1.items():
        D2[key].append(value)
    return D2

def df_colaboraciones_test(keys,n=200):
    """
    keys es una lista de los atributos que queremos leer
    """

    dfs={}

    for key in keys:

        for index in range(n):
            graph=G[key][index]
            if index==0:
                dict_meas=measures(graph)
            new=measures(graph)
            for metric in metric_function_map.keys():
                dict_meas[metric]+=new[metric]

        dfs[key]=pd.DataFrame()
        for metric in metric_function_map.keys():
            dfs[key][metric]=dict_meas[metric]

    return dfs

In [9]:
"""
def df_colaboraciones_test(keys):
    """
    #keys es una lista de los atributos que queremos leer
    """

    n=200

    degree = {}
    diameter = {}
    radius = {}
    avg_shortest_path_length = {}
    transitivity = {}
    global_efficiency = {}
    modularity = {}
    rich_club_coefficient = {}
    core_rate= {}
    central_pt = {}
    spectral_radii = {}
    excentricidad = {}
    modularidad = {}
    responses = {}

    dfs={}

    for key in keys:

        degree[key] = n*[0]
        diameter[key] = n*[0]
        radius[key] = n*[0]
        avg_shortest_path_length[key] = n*[0]
        transitivity[key] = n*[0]
        global_efficiency[key] = n*[0]
        modularity[key] = n*[0]
        rich_club_coefficient[key] = n*[0]
        core_rate[key]= n*[0]
        central_pt[key] = n*[0]
        spectral_radii[key] = n*[0]
        excentricidad[key] = n*[0]
        modularidad[key] = n*[0]
        responses[key] = n*[0]

        for index in range(n):
            graph=G[key][index]
        
            #conseguimos cada parámetro para esta ciudad
            degree[key][index]=                        2*len(graph.edges())/len(graph.nodes())
            diameter[key][index] =                     nx.diameter(graph)
            radius[key][index] =                       nx.radius(graph)
            avg_shortest_path_length[key][index] =     nx.average_shortest_path_length(graph)
            transitivity[key][index] =                 nx.transitivity(graph)
            global_efficiency[key][index] =            nx.global_efficiency(graph)
            #modularidad precomputada
            rich_club_coefficient[key][index] =        max_rich_club(graph)  
            core_rate[key][index]=                     core_ratio(graph)
            central_pt[key][index]=                    central_point_dominance(graph)
            spectral_radii[key][index]=                spectral_radius(graph)
            excentricidad[key][index]=                 eccentricity(graph)
            modularidad[key][index]=                   nx.algorithms.community.quality.performance(graph,nx.algorithms.community.modularity_max.greedy_modularity_communities(graph))
            responses[key][index]=                     avg_responses(graph)

        dfs[key]=pd.DataFrame()

        dfs[key]['Eficiencia Global'] = global_efficiency[key]
        dfs[key]['Average Responses'] = responses[key]
        dfs[key]['Degree']= degree[key]
        dfs[key]['Transitividad'] = transitivity[key]
        dfs[key]['Modularidad'] = modularidad[key]
        dfs[key]['Excentricidad']= excentricidad[key]
        dfs[key]['Radio'] = radius[key]
        dfs[key]['Rich Club Coefficient'] = rich_club_coefficient[key]
        dfs[key]['Core Ratio'] = core_rate[key]
        dfs[key]['Central Point Dominance'] = [round(central_pt[key][index],3) for index in range(n)]
        dfs[key]['Spectral radius'] = [round(spectral_radii[key][index],3) for index in range(n)]
        dfs[key]['Camino más corto promedio'] = avg_shortest_path_length[key]
        dfs[key]['Diámetro'] = diameter[key]

    return dfs
"""

IndentationError: unexpected indent (<ipython-input-9-7811093adf6c>, line 5)

## Colaboracion

Empezamos analizando las metricas que ya hemos calculado y luego usaremos esta informacion para crear una medida de la colaboracion.

In [30]:
df_colaboraciones_test(['Nuevas_Conexiones'])['Nuevas_Conexiones'].to_csv('Random_Nuevas_Conexiones.csv')

Unnamed: 0,Degree,Excentricidad,Diametro,Radio,Camino más corto promedio,Transitividad,Eficiencia Global,Rich Club Coefficient,Core Ratio,Central Point Dominance,Spectral radius,Modularidad
0,3.378571,5.4,6,4,3.861623,0.030827,0.285958,0.01211,0.503571,0.148803,1.913123,


In [11]:
#---------------------------------------------
#Preparar escalas y herramientas para el analisis.
#---------------------------------------------

from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

X={}
normalized={}
pca={}
prin_comp={}

for key in G.keys():
    X[key]=dfs[key].to_numpy()

    normalized[key]= StandardScaler().fit_transform(X[key])
    pca[key]=PCA(n_components=5)
    prin_comp[key]=pca.fit_transform(normalized[key])

    print(pca.components_)

NameError: name 'dfs' is not defined

In [12]:
from matplotlib import pyplot as plt
for key in G.keys():
    print(key)
    plt.scatter(prin_comp[key][:,0], prin_comp[key][:,1],color='b')

Nuevas_Conexiones


KeyError: 'Nuevas_Conexiones'

In [13]:
"""Formulas de Colaboracion"""

def colaborativity_original(df):
    '''
    Original Collaborativity Formula during summer 2020 for project GED.
    Computes the colaborativity of a network G based on its measures, as shown below. 
    
    Input:
    - df : dataframe mapping each city to its graph metrics
    Returns:
    - Value describing the collaborativity of a graph, or network
    '''
    Avg_colabs =df['Average collabs']
    Clust = df['Clustering']
    Mod = df['Modularidad']
    return (1/2) * Avg_colabs * (Clust + (np.log10(Mod**2)*-1))


def colaborativity_formula_1(df):
    Efi = df['Eficiencia Global']
    Tran = df['Transitividad']
    Mod = df['Modularidad']
    Core = df['Core Ratio']
    return Efi + Tran + 1 - (Mod + Core)/2 


def colaborativity_formula_2(df):
    Efi = df['Eficiencia Global']
    Tran = df['Transitividad']
    Exc = df['Excentricidad']
    Mod = df['Modularidad']
    return Efi + Tran + (1/Exc) - Mod


def colaborativity_formula_3(df):
    efi = df['Eficiencia Global']   #comunicacion
    clus = df['Clustering']         #robustez
    mod = df['Modularidad']          #preparación para el futuro 
    return (efi * clus * (1+np.cos(math.pi*mod)) /2 )**(1/3)


def colaborativity_formula_4(df):
    efi = df['Eficiencia Global']    #comunicacion 
    tran = df['Transitividad']       #robustez
    mod = df['Modularidad']           #preparación para el futuro 
    return (efi * tran * (1+np.cos(math.pi*mod)) /2 )**(1/3)


def colaborativity_formula_5(df):
    efi = df['Eficiencia Global']    #comunicacion 
    tran = df['Transitividad']       #robustez
    core = df['Core Ratio']           #preparación para el futuro 
    return efi + tran + 1 - core 


def colaborativity_formula_6(df):
    efi = df['Eficiencia Global']    #comunicacion 
    tran = df['Transitividad']       #robustez
    exc = df['Excentricidad']        #preparación para el futuro 
    return (efi * tran * (np.sin(math.pi/exc)) )**(1/3)


# N e w    i d e a s    f o r   f o r m u l a s #

def colaborativity_formula_7(df):
    avg_deg = df['Average collabs']  #1 cantidad  
    efi = df['Eficiencia Global']    #2 calidad   comunicacion 
    tran = df['Transitividad']       #            robustez
    exc = df['Excentricidad']        #preparación para el futuro 
    return 0.5*(np.log10(avg_deg+1)/np.log10(26))  +  0.5*quadratic([efi , tran , (np.sin(math.pi/exc))])


def colaborativity_formula_8(df):
    avg_deg = df['Average collabs']  #1 cantidad  
    efi = df['Eficiencia Global']    #2 calidad   comunicacion 
    tran = df['Transitividad']       #            robustez
    mod = df['Modularidad']        #preparación para el futuro 
    return 0.5*(np.log10(avg_deg+1)/np.log10(26)) + 0.5*quadratic([efi , tran , 0.5*(1+np.cos(math.pi*mod)) ])

In [14]:
#---------------------------------------
# Fórmula para colaboratividad
#---------------------------------------

colaborativity_formulas={}
for i in range(9):
    colaborativity_formulas[i]=eval('colaborativity_formula_'+str(i))

for key in G.keys():
    for i in range(9):

        colab={}

        colaboration_results=colaborativity_formulas[i](df[key])
        colab[key] = {colaboration_results[x]:x for x in range(200)}
        order_colab=list(colab[key].keys())
        order_colab.sort()
        ratio=0
        print(key, i)
        for i in range(40):
            if colab[order_colab[i]]<101:
                ratio+=1
        ratio/40

NameError: name 'colaborativity_formula_0' is not defined

In [15]:
for i in range(200):
    print(colab[order_colab[i]])

NameError: name 'colab' is not defined