# Part 1: Random Graph generators

In [82]:
import numpy as np 
import pandas as pd
import networkx as nx
import random

In [83]:
def random_graph(prob_resp,prob_nuevo):
    """
    prob_resp float    define la probabilidad de que al tomar un nodo este haya sido un respondiente de la encuesta
    prob_nuevo float   define la probabilidad de que un respondiente tenga conexiones con nodos nuevos

    Esta funcion genera graficas aleatorias basadas en el metodo con el que se crearon las networks de emprendimiento
    """
    G=nx.Graph()
    queue=["0"]
    max_index=0
    max_respondents=random.randint(20,30)             #Indicador que nos dice cuantos respondientes puede haber
    respondents=0
    while len(queue)!=0:
        if (random.random()<prob_resp and respondents<max_respondents) or queue[0]=="0":      #Crear conexiones para un respondiente
            respondents+=1
            n=random.randint(6,21)
            new_n=0
            old_n=0
            for i in range(n):                                              #Calcular cuantas conexiones son con nodos nuevos y cuantos con ya existentesz
                if random.random()<prob_nuevo:
                    new_n+=1
                else:
                    old_n+=1
            for i in range(new_n):
                new_node=str(i+max_index+1)
                queue.append(new_node)
                G.add_node(new_node)
                G.add_edge(queue[0],new_node)
            for i in range(old_n):
                node=str(random.randint(0,int(queue[0]))-1)
                G.add_edge(queue[0],node)
            max_index+=new_n
            queue.pop(0)
        else:                                                                #Crear conexiones para los no respondientes
            if random.random()<0.25:
                n=random.randint(1,6)
                for i in range(n):
                    node=str(random.randint(0,int(queue[0]))-1)
                    G.add_edge(queue[0],node)
            queue.pop(0)
    if len(G.nodes())<150 or len(G.nodes())>400:
        return random_graph(prob_resp,prob_nuevo)
    else:
        return G
    
#for i in range(100):
#    nx.write_graphml(random_graph(0.25,0.55+i/400),'Random_Graphs/Random_Graph_'+str(11+i)+'.graphml')
#for i in range(100):
#    nx.write_graphml(random_graph(0.25,0.55-i/400),'Random_Graphs/Random_Graph_'+str(111+i)+'.graphml')

In [84]:
from numpy.random import choice

def are_adjacent(u,v,G):
    if v not in G.nodes():
        return False
    elif v in G.neighbors(u):
        return True
    else:
        return False

def random_graph_2(n_evals, prob_out, prob_new):
    '''
    Generator of a random graph, given that n
    Given a specific number of respondents of the questionnaire, and that each of them could have provided 25 responses maximum
    
    Input:
     - n_evals  : number of evaluators responding questionnaire
     - prob_out : probability that a mentionned collaboration is outside of the network of evaluators
    
    '''
    
    G=nx.DiGraph()
    setedges = set()
    
    inside = dict()
    outside = dict()
    
    last_index = n_evals-1
    
    for i in range(n_evals):              #add all evaluators to the graph
        G.add_node(i)
        inside[i] = 0
        
    # evaluator |--> no. of collaborations,   in (0,25)~ normal distribution
    sample_num_evaluations = np.random.normal(loc=12.3, scale=4, size=n_evals)
    list_degrees_evaluators = [int(x) for x in sample_num_evaluations]
    
    for i in inside.keys():
        for j in range(list_degrees_evaluators[i]):
            
            if random.random()<prob_out:           #edge added outside evaluators' list
                if outside == dict():                              # if list outside evaluators is new
                    last_index+=1
                    to = last_index
                    G.add_node(to)
                    G.add_edge(i,to)
                    setedges.add((i,to))
                    outside[to] = 1
                    inside[i] += 1
                else:
                    if random.random()<prob_new:               # if edge goes to a new vertex
                        last_index+=1       
                        to=last_index
                        G.add_node(to)
                        G.add_edge(i,to)
                        setedges.add((i,to))
                        outside[to] = 1
                        inside[i] +=1
                        
                    else:
                        vertices, degrees = [], []
                        for vertex, degree in outside.items():
                            if vertex!=i:
                                vertices.append(vertex)
                                degrees.append(degree+1)
                        s = sum(degrees)
                        w = [x/s for x in degrees]
                        to = choice(vertices, size=1, p=w)[0]
                        if are_adjacent(i,to,G):
                            to = choice(vertices, size=1, p=w)[0]
                        G.add_node(to)
                        G.add_edge(i,to)
                        setedges.add((i,to))
                        outside[to]+=1
                        inside[i]+=1
                        
            else:
                vertices, degrees = [], []
                for vertex, degree in inside.items():
                    if vertex != i:
                        vertices.append(vertex)
                        degrees.append(degree+1)
                s = sum(degrees)
                w = [x/s for x in degrees]
                to = choice(vertices, size=1, p=w)[0]
                if are_adjacent(i,to,G):
                    to = choice(vertices, size=1, p=w)[0]
                G.add_node(to)
                G.add_edge(i,to)
                setedges.add((i,to))
                inside[to]+=1
                inside[i]+=1
    return G         


list_cities = ['Aguascalientes', 'Buenos Aires', 'Ciudad de México', 'Guadalajara', 'Hidalgo',
                  'Madrid', 'Montevideo', 'Oaxaca', 'Sao Paulo', 'Santiago de Chile']
num_evaluators = {'Aguascalientes':19, 'Buenos Aires':31, 'Ciudad de México':36, 'Guadalajara':32, 'Hidalgo':19,
                  'Madrid':37, 'Montevideo':48, 'Oaxaca':36, 'Sao Paulo':28, 'Santiago de Chile':25}
for city in list_cities:
    G= random_graph_2(num_evaluators[city],0.2, 0.5)
    nx.write_graphml(G,'Random_Graphs_Second_Type_Corrected/Random_Graph_'+city+'.graphml')


In [85]:
muestra = pd.read_csv('Data_answers_evaluators/Evaluadores ecosistemas.xlsx - Muestra.csv')             


ags_info=pd.read_csv('Data_answers_evaluators/Evaluadores ecosistemas.xlsx - AGS.csv')             
caba_info=pd.read_csv('Data_answers_evaluators/Evaluadores ecosistemas.xlsx - CABA.csv')             
cdmx_info=pd.read_csv('Data_answers_evaluators/Evaluadores ecosistemas.xlsx - CDMX.csv')             
gdl_info=pd.read_csv('Data_answers_evaluators/Evaluadores ecosistemas.xlsx - GDL.csv')             
hgo_info=pd.read_csv('Data_answers_evaluators/Evaluadores ecosistemas.xlsx - HGO.csv')             
mad_info=pd.read_csv('Data_answers_evaluators/Evaluadores ecosistemas.xlsx - MAD.csv')             
mtv_info=pd.read_csv('Data_answers_evaluators/Evaluadores ecosistemas.xlsx - MVD.csv')             
oax_info=pd.read_csv('Data_answers_evaluators/Evaluadores ecosistemas.xlsx - OAX.csv')             
sao_info=pd.read_csv('Data_answers_evaluators/Evaluadores ecosistemas.xlsx - SAO.csv')             
scl_info=pd.read_csv('Data_answers_evaluators/Evaluadores ecosistemas.xlsx - SCL.csv')  

ags_info.rename(columns={'Unnamed: 1':'Colabs'}, inplace=True)
gdl_info.rename(columns={'Unnamed: 1':'Colabs'}, inplace=True)
hgo_info.rename(columns={'Unnamed: 1':'Colabs'}, inplace=True)
oax_info.rename(columns={'Unnamed: 1':'Colabs'}, inplace=True)


list_cities = ['AGS', 'CABA', 'CDMX', 'GDL', 'Pachuca',
                  'Madrid', 'Montevideo', 'Oaxaca', 'Sao Paulo', 'Santiago']
cities_info = {'AGS': ags_info, 
              'CABA' : caba_info,
              'CDMX' : cdmx_info,
              'GDL' : gdl_info,
              'Pachuca' : hgo_info,
              'Madrid' : mad_info,
              'Montevideo' : mtv_info,
              'Oaxaca' : oax_info,
              'Sao Paulo' : sao_info,
              'Santiago' : scl_info}

muestra

Unnamed: 0.1,Unnamed: 0,Madrid,Madrid SG,Madrid Typeform,CDMX,Santiago,CABA,Sao Paulo,Montevideo,Oaxaca,GDL,Pachuca,AGS
0,Muestra,38,16,24,51,30,36,34,59,36,32,21,19
1,Nodes,239,120,156,299,195,228,216,198,149,187,125,96
2,Edges,453,166,205,547,385,500,364,767,326,474,254,233
3,Promedio colabs. por participante,12.13,10.13,8.63,12.33,13.04,13.48,10.38,13.4,6.39,10.81,9.52,8.21
4,% Muestra,15.90%,13.33%,15.38%,17.06%,15.38%,15.79%,15.74%,29.80%,24.16%,17.11%,16.80%,19.79%


In [86]:
'''Third Random Graph Generator'''

def eliminate_small_components(G):
    '''
    We make sure there are no disconnected components. 
    '''
    to_remove = []
    
    for x in G.nodes():
        if len(list(nx.node_connected_component(nx.to_undirected(G),x)))< 20 :
            to_remove.append(x)
    for x in to_remove:
        G.remove_node(x)
    
    if nx.number_connected_components(nx.to_undirected(G))>1:
        print('****')    
    return G



def random_graph_3(infos, city, prob_out, prob_new):
    '''
    With this function, we are simulating the responses we obtained from each of the surveyed ecosystems/cities. 
    The information we have is, for each evaluator, how many 
    
    Generator of a random graph, given the number of respondents of the questionnaire,
    the number of evaluators of collaborations, and the number of collaborations reported by each of them.
    
    Input:
     - infos : dictionary mapping from city to num_collab information
     - city : name of city to be simulated
     - prob_out : probability that a mentionned collaboration is outside of the network of evaluators
     - prob_new : probability that a mentionned collaboration is a newly mentionned org
     
    '''
    
    G=nx.DiGraph()
    setedges = set()
    
    
    #degrees of nodes inside and outside respondent list
    inside = dict()
    outside = dict()
    
    
    #determine number of organizations that answered and number of evaluators: 
    n_responded = int(muestra[city][0])
    n_evals = len(list(infos[city]['Colabs']))
    last_index = n_responded
    
    
    #number of total nodes, and number of edges:                  #still don't know how to use
    num_nodes = int(muestra[city][1])
    num_edges = int(muestra[city][2])
    
    for i in range(last_index):              #add all evaluators to the graph
        G.add_node(i)
        if i in range(n_evals):
            inside[i] = 0
        else:
            outside[i] = 0
        
    
    for i in inside.keys():
        
        deg_i = infos[city]['Colabs'][i]         # the degree of that node is in the info retrieved
        for j in range(deg_i):
            
            if random.random() < prob_out:           #edge added outside evaluators' list
                if outside == dict():                              # if list outside evaluators is new
                    #last_index+=1
                    to = last_index
                    G.add_node(to)
                    G.add_edge(i,to)
                    setedges.add((i,to))
                    outside[to] = 1
                    inside[i] += 1
                else:
                    if random.random()<prob_new:               # if edge goes to a new vertex
                        last_index+=1       
                        to=last_index
                        G.add_node(to)
                        G.add_edge(i,to)
                        setedges.add((i,to))
                        outside[to] = 1
                        inside[i] +=1
                        
                    else:
                        vertices, degrees = [], []
                        for vertex, degree in outside.items():
                            if vertex!=i:
                                vertices.append(vertex)
                                degrees.append(degree+1)
                        s = sum(degrees)
                        w = [x/s for x in degrees]
                        to = choice(vertices, size=1, p=w)[0]
                        if are_adjacent(i,to,G):
                            to = choice(vertices, size=1, p=w)[0]
                        G.add_edge(i,to)
                        setedges.add((i,to))
                        outside[to]+=1
                        inside[i]+=1
                        
            else:
                vertices, degrees = [], []
                for vertex, degree in inside.items():
                    if vertex != i:
                        vertices.append(vertex)
                        degrees.append(degree+1)
                s = sum(degrees)
                w = [x/s for x in degrees]
                to = choice(vertices, size=1, p=w)[0]
                if are_adjacent(i,to,G):
                    to = choice(vertices, size=1, p=w)[0]
                G.add_edge(i,to)
                setedges.add((i,to))
                inside[to]+=1
                inside[i]+=1
                
    # now we have the degree of each evaluator covered, 
    # but also some of the orgs responded with
    # 2 key orgs in their development and establishment
    
    for i in range(n_responded):
        for j in range(2):

            if random.random()<prob_out:           #edge added outside evaluators' list
                if random.random()<prob_new:               # if edge goes to a new vertex
                    last_index+=1       
                    to=last_index
                    G.add_node(to)
                    G.add_edge(i,to)
                    setedges.add((i,to))
                    outside[to] = 1
                    if i in outside.keys():
                        outside[i]+=1  
                    elif i in inside.keys():
                        inside[i]+=1 
                        
                else:
                    vertices, degrees = [], []
                    for vertex, degree in outside.items():
                        if vertex!=i:
                            vertices.append(vertex)
                            degrees.append(degree+1)
                    s = sum(degrees)
                    w = [x/s for x in degrees]
                    to = choice(vertices, size=1, p=w)[0]
                    if are_adjacent(i,to,G):
                        to = choice(vertices, size=1, p=w)[0]
                    G.add_edge(i,to)
                    setedges.add((i,to))
                    outside[to]+=1
                    if i in outside.keys():
                        outside[i]+=1  
                    elif i in inside.keys():
                        inside[i]+=1 
                        
            else:
                vertices, degrees = [], []
                for vertex, degree in inside.items():
                    if vertex != i:
                        vertices.append(vertex)
                        degrees.append(degree+1)
                s = sum(degrees)
                w = [x/s for x in degrees]
                to = choice(vertices, size=1, p=w)[0]
                if are_adjacent(i,to,G):
                    to = choice(vertices, size=1, p=w)[0]
                G.add_edge(i,to)
                setedges.add((i,to))
                inside[to]+=1
                if i in outside.keys():
                    outside[i]+=1  
                elif i in inside.keys():
                    inside[i]+=1  
    
    F=eliminate_small_components(G)
    return F
                
            
for city in list_cities:
    G = random_graph_3(cities_info,city,0.8, 0.6)
    nx.write_graphml(G,'Random_Graphs_Third_Type_Corrected/Random_Graph_'+city+'.graphml')

# Part 2: Helper functions

We work with the helper functions for the four different types of average, and for the different measures we can take from an ecosystem.  

In [87]:
#-----------------------
# Averages
#-----------------------

def quadratic(L):
    return ( sum([x**2 for x in L])/len(L) )**0.5
    
def arithmetic(L):
    return sum(L)/len(L)

def geometric(L):
    prod = 1
    for x in L:
        prod *=x
    return prod**(1/len(L))

def harmonic(L):
    n = len(L)
    sum_reciprocals = sum([1/x for x in L])
    return n/sum_reciprocals


mean_map = {'quadratic': quadratic, 
           'arithmetic': arithmetic, 
           'geometric': geometric, 
           'harmonic': harmonic}

In [93]:
#-----------------------
# Graph metrics!
#-----------------------

def degree(G):
    return np.mean([G.degree(x) for x in G.nodes()])

def eccentricity(U):
    return np.mean([nx.eccentricity(U,x) for x in U.nodes()])

def clustering(U):
    return np.mean([nx.clustering(U,x) for x in U.nodes()]) 

def diameter(U):
    return nx.diameter(U)

def radius(U):
    return nx.radius(U)

def avg_shortest_path_length(U):
    return nx.average_shortest_path_length(U)

def transitivity(U):
    return nx.transitivity(U)

def global_efficiency(U):
    return nx.global_efficiency(U)

def small_worldness(U):
    return nx.algorithms.smallworld.sigma(U,niter=1,nrand=2)

def rich_club_coeffs(G):
    t_ok = True
    t = 0
    d = dict()
    while t_ok:
        nodes_large_degree=[]
        for x in G.nodes():
            if G.degree(x)>t:
                nodes_large_degree.append(x)
        core = G.subgraph(nodes_large_degree)
        edges_core = len(core.edges())
        nodes_core = len(core.nodes())
        if nodes_core<=1:
            t_ok = False
            break
        d[t] = (2*edges_core)/(nodes_core*(nodes_core-1))
        t += 1 
    return d  

def max_rich_club(G):
    rich_club=rich_club_coeffs(G)
    max_i=0
    
    for i in range(len(rich_club)):
        if rich_club[i]>rich_club[max_i]:
            max_i=i
    return rich_club[max_i]

def core_ratio(G):
    return len(nx.k_core(G,k=2).nodes())/len(G.nodes())
        
def central_point_dominance(G):
    betwennesses = nx.betweenness_centrality(G)
    b_max = max(betwennesses.values())
    N = len(betwennesses.keys())
    count = 0
    for i, b_i in betwennesses.items():
        count += ( b_max - b_i )/(N-1)
    return count


def spectral_radius(G):
    L = nx.normalized_laplacian_matrix(G)
    e = np.linalg.eigvals(L.A)
    e_abs = [abs(x) for x in e]
    return max(e_abs)

def modularity(G):
    return nx.algorithms.community.quality.performance(G,nx.algorithms.community.modularity_max.greedy_modularity_communities(G))



        
metric_function_map = {'Degree': degree,
            'Excentricidad': eccentricity,
            'Clustering': clustering,
            'Diámetro': diameter,
            'Radio':radius,
            'Camino más corto promedio':avg_shortest_path_length,
            'Transitividad':transitivity,
            'Eficiencia Global':global_efficiency,
            'Rich Club Coefficient':max_rich_club,
            'Core Ratio':core_ratio,
            'Central Point Dominance':central_point_dominance,
            'Spectral radius':spectral_radius,
            'Modularidad':modularity} 


def measures(G):
    D = dict()
    for metric, function in metric_function_map.items():
        D[metric] = function(nx.to_undirected(G))
    return D

def embed(D1, D2):
    for key, value in D1.items():
        D2[key].append(value)
    return D2


# Part 3: Colaborativity functions

In [89]:
def colaborativity(G, set_comunication, set_preparation, set_resilience, meantype):
    '''
    Computes the colaborativity of a network G based on sets which determine which metrics are to be computed
    
    - set_communication : set containing the names of the metrics that will measure communication pilar
    - set_preparation : contains metrics that will measure preparation for future collabs pilar
    - set_resilience : contins the names of the metrics that will measure resilience pilar
    '''
    comm, prep, resi = [], [], []
    U = nx.to_undirected(G)
    
    for metric in set_communication:
        comm.append( metric_function_map[metric](U) )
    comm = arithmetic(comm)
    
    for metric in set_preparation:
        prep.append( metric_function_map[metric](U) )
    prep = arithmetic(prep)

    
    for metric in set_resilience:
        resi.append( metric_function_map[metric](U) )
    resi = arithmetic(resi)
        
    #now we compute the mean
    
    return mean_map[meantype]([comm, prep, resi])


G = random_graph_3(cities_info,'GDL',0.8, 0.6)
set_communication = {'Eficiencia Global'}
set_preparation = {'Rich Club Coefficient'}
set_resilience = {'Transitividad'}
meantype = 'arithmetic'

print(colaborativity(G, set_communication, set_preparation, set_resilience, meantype))

0.2381281213064996


In [90]:
def colaborativity_from_dataframe(G, index, df, set_comunication, set_preparation, set_resilience, meantype):
    '''
    Computes the colaborativity of a network G based on sets which determine which metrics are to be computed
    
    - set_communication : set containing the names of the metrics that will measure communication pilar
    - set_preparation : contains metrics that will measure preparation for future collabs pilar
    - set_resilience : contins the names of the metrics that will measure resilience pilar
    '''
    comm, prep, resi = [], [], []
    U = nx.to_undirected(G)
    
    for metric in set_communication:
        comm.append( list(df[metric])[index] )
    comm = arithmetic(comm)
    
    for metric in set_preparation:
        prep.append( list(df[metric])[index] )
    prep = arithmetic(prep)

    
    for metric in set_resilience:
        resi.append( list(df[metric])[index] )
    resi = arithmetic(resi)
        
    #now we compute the mean
    
    return mean_map[meantype]([comm, prep, resi])

In [91]:
def colaborativity_original(datadict):
    '''
    Original Collaborativity Formula during summer 2020 for project GED
    Computes the colaborativity of a network G based on its measures, as shown below. 
    
    Input:
    - datadict : dictionary mapping Metric to Metric(G), for some real or synthetic graph G
    Returns:
    - Value between (technically) 0 and 3.5, describing the collaborativity of a graph, or network
    
    '''
    Avg_deg = datadict['Degree']
    Clus = datadict['Clustering']
    Mod = datadict['Modularidad']
    return (Avg_deg + Clus + - Mod ) / 3 



def colaborativity_formula_1(datadict):
    '''
    First Proposed Collaborativity Formula in GED summer 2020 participation in the project
    Computes the colaborativity of a network G based on its measures, as shown below. 
    
    Input:
    - datadict : dictionary mapping Metric to Metric(G), for some real or synthetic graph G
    Returns:
    - Value between (technically) 0 and 3.5, describing the collaborativity of a graph, or network
    
    '''
    Efi = datadict['Eficiencia Global']
    Tran = datadict['Transitividad']
    Mod = datadict['Modularidad']
    Core = datadict['Core Ratio']
    return Efi + Tran + 1 - (Mod + Core)/2 




def colaborativity_formula_2(datadict):
    '''
    Second Proposed Collaborativity Formula in GED summer 2020 participation in the project
    Computes the colaborativity of a network G based on its measures, as shown below. 
    
    Input:
    - datadict : dictionary mapping Metric to Metric(G), for some real or synthetic graph G
    Returns:
    - Value between (technically) 0 and 3, describing the collaborativity of a graph, or network
    
    '''
    Efi = datadict['Eficiencia Global']
    Tran = datadict['Transitividad']
    Exc = datadict['Excentricidad']
    Mod = datadict['Modularidad']
    return Efi + Tran + (1/Exc) - Mod




# Part 4: Monte Carlo Simulations

In [92]:
#----------------------
# Monte Carlo
#----------------------

tidydata = pd.read_csv('Olga/Tidy_DataFrame.csv')


def monte_carlo_for_city(infos, city, prob_out, prob_new, num_trials, infocolab1, infocolab2):
    data = {'Name':[],
            'Degree':[],
            'Eccentricity':[],
            'clustering':[],
            'Diámetro':[],
            'Radio':[],
            'Camino más corto promedio':[],
            'Transitividad':[],
            'Eficiencia Global':[],
            #'Small Worldness':[],
            'Rich Club Coefficient':[],
            'Core Ratio':[],
            'Central Point Dominance':[],
            #'Spectral radius':[],
            'Modularidad':[],
            'Colaboratividad 1':[],
            'Colaboratividad 2':[]} 
    
    for i in range(num_trials):
        G = random_graph_3(infos,city,prob_out, prob_new)
        U = nx.to_undirected(G)
        
        data['Name'].append('G'+str(i))
        
        data['Degree'].append( np.mean([G.degree(x) for x in G.nodes()]) )
        data['Eccentricity'].append( np.mean([nx.eccentricity(U,x) for x in U.nodes()]) )
        #data['clustering'].append( np.mean([nx.clustering(U,x) for x in U.nodes()]) )
        #data['Diámetro'].append(  nx.diameter(U)  )
        #data['Radio'].append(  nx.radius(U)  )
        #data['Camino más corto promedio'].append( nx.average_shortest_path_length(U) )
        data['Transitividad'].append( nx.transitivity(U) )
        data['Eficiencia Global'].append( nx.global_efficiency(U) )
        #data['Small Worldness'].append( nx.algorithms.smallworld.sigma(U,niter=1,nrand=2) )
        data['Rich Club Coefficient'].append( max_rich_club(U) )
        data['Core Ratio'].append( core_ratio(U) )
        data['Central Point Dominance'].append(central_point_dominance(U))
        #data['Spectral radius'].append( spectral_radius(U) )
        data['Modularidad'].append(nx.algorithms.community.quality.performance(U,nx.algorithms.community.modularity_max.greedy_modularity_communities(U)))
        #data['Colaboratividad 1'].append(colaborativity(G, infocolab1[0], infocolab1[1], infocolab1[2], infocolab1[3]))
        #data['Colaboratividad 2'].append(colaborativity(G, infocolab2[0], infocolab2[1], infocolab2[2], infocolab2[3]))
        data['Colaboratividad 1'].append(colaborativity_from_dataframe(G, i, data, infocolab1[0], infocolab1[1], infocolab1[2], infocolab1[3]))
        data['Colaboratividad 2'].append(colaborativity_from_dataframe(G, i, data, infocolab2[0], infocolab2[1], infocolab2[2], infocolab2[3]))        
        
        
    #mean of all data
    G = random_graph_3(infos,city,prob_out, prob_new)
    U = nx.to_undirected(G)
    data['Name'].append('Mean')
    data['Degree'].append( np.mean(data['Degree']) )
    data['Eccentricity'].append( np.mean(data['Eccentricity'] ))
    #data['clustering'].append( np.mean(data['clustering']) )
    #data['Diámetro'].append(  np.mean(data['Diámetro'])  )
    #data['Radio'].append(  np.mean(data['Radio'])  )
    #data['Camino más corto promedio'].append( np.mean(data['Camino más corto promedio']) )
    data['Transitividad'].append( np.mean(data['Transitividad']) )
    data['Eficiencia Global'].append( np.mean(data['Eficiencia Global']) )
    #data['Small Worldness'].append( np.mean(data['Small Worldness']) )
    data['Rich Club Coefficient'].append( np.mean(data['Rich Club Coefficient']) )
    data['Core Ratio'].append( np.mean(data['Core Ratio']) )
    data['Central Point Dominance'].append(np.mean(data['Central Point Dominance']))
    #data['Spectral radius'].append( np.mean(data['Spectral radius']) )
    data['Modularidad'].append( np.mean(data['Modularidad']) )
    data['Colaboratividad 1'].append( np.mean(data['Colaboratividad 1']) )
    data['Colaboratividad 2'].append( np.mean(data['Colaboratividad 2']) )
    
    
    
    
    
    #real data
    citydata = tidydata.loc[tidydata['Ciudad'] == city]
        
    data['Name'].append(city+' real')
    data['Degree'].append( list(citydata['Degree'])[0] )
    data['Eccentricity'].append( list(citydata['Excentricidad'])[0] )
    #data['clustering'].append( np.mean([nx.clustering(U,x) for x in U.nodes()]) )
    #data['Diámetro'].append(  nx.diameter(U)  )
    #data['Radio'].append(  nx.radius(U)  )
    #data['Camino más corto promedio'].append( nx.average_shortest_path_length(U) )
    data['Transitividad'].append( list(citydata['Transitividad'])[0] )
    data['Eficiencia Global'].append( list(citydata['Eficiencia Global'])[0] )
    #data['Small Worldness'].append( citydata['Small Worldness'] )
    data['Rich Club Coefficient'].append( list(citydata['Rich Club Coefficient'])[0] )
    data['Core Ratio'].append( list(citydata['Core Ratio'])[0] )
    data['Central Point Dominance'].append( list(citydata['Central Point Dominance'])[0] )
    #data['Spectral radius'].append( 0 )
    data['Modularidad'].append( list(citydata['Modularidad'])[0])
    data['Colaboratividad 1'].append(colaborativity_from_dataframe(G, 0, citydata, infocolab1[0], infocolab1[1], infocolab1[2], infocolab1[3]))
    data['Colaboratividad 2'].append(colaborativity_from_dataframe(G, 0, citydata, infocolab2[0], infocolab2[1], infocolab2[2], infocolab2[3]))
    
    
    
    
    
    
    #'avg strength', 'weight', 'Weighted Degree','Small Worldness','Spectral radius',
    df = pd.DataFrame(data, columns = ['Name','Degree', 'Eccentricity', 
                                       #'clustering', 'Diámetro', 'Radio', 'Camino más corto promedio', 
                                       'Transitividad', 'Eficiencia Global',
                                       'Rich Club Coefficient', 'Core Ratio', 'Central Point Dominance', 
                                       'Modularidad', 'Colaboratividad 1', 'Colaboratividad 2'])
    
    return df



infocolab1=[{'Eficiencia Global'}, {'Rich Club Coefficient'}, {'Transitividad'}, 'arithmetic']
infocolab2=[{'Eficiencia Global'}, {'Core Ratio'}, {'Modularidad', 'Transitividad'}, 'quadratic']
#monte_carlo_for_city(cities_info, 'Montevideo', 0.8, 0.6, 20, infocolab1, infocolab2)




In [95]:
#----------------------------
# Monte Carlo Cooler Version
#----------------------------

tidydata = pd.read_csv('Olga/Tidy_DataFrame.csv')

def monte_carlo_2(infos, city, prob_out, prob_new, num_trials):
    
    data = {key : [] for key in metric_function_map.keys()}
    data['Name'] = []
    data['Colaboratividad Original'] = []
    data['Colaboratividad 1'] = []
    data['Colaboratividad 2'] = []
    
    
    #Samples
    for i in range(num_trials):
        print(i)
        G = random_graph_3(infos,city,prob_out, prob_new)
        M = measures(G)
        
        data = embed(M,data)
        data['Name'].append('G'+str(i))
        data['Colaboratividad Original'].append(colaborativity_original(M))
        data['Colaboratividad 1'].append(colaborativity_formula_1(M))
        data['Colaboratividad 2'].append(colaborativity_formula_2(M))
        
    #Means
    for key in data.keys():
        if key != 'Name':
            data[key].append(np.mean( data[key] ))
    data['Name'].append('Mean')
    
    
    #Real Data
    citydata = pd.DataFrame(tidydata.loc[tidydata['Ciudad'] == city])
    citymeasures = {metric: list(tidydata.loc[tidydata['Ciudad'] == city][metric])[0] for metric in metric_function_map.keys()}
    
    data = embed(citymeasures, data)
    data['Name'].append(city+' real')
    data['Colaboratividad Original'].append(colaborativity_original(citymeasures))
    data['Colaboratividad 1'].append(colaborativity_formula_1(citymeasures))
    data['Colaboratividad 2'].append(colaborativity_formula_2(citymeasures))

    
    #Incorporating the dataframe! 
    #'avg strength', 'weight', 'Weighted Degree','Small Worldness',
    df = pd.DataFrame(data, columns = ['Name','Degree', 'Excentricidad', 
                                       'Clustering', 'Diámetro', 'Radio', 'Camino más corto promedio', 
                                       'Transitividad', 'Eficiencia Global',
                                       'Rich Club Coefficient', 'Core Ratio', 'Central Point Dominance',
                                       'Spectral radius', 'Modularidad', 'Colaboratividad Original', 
                                       'Colaboratividad 1', 'Colaboratividad 2'])
    
    return df



#let's try to test this function
#monte_carlo_2(cities_info, 'Montevideo', 0.8, 0.6, 2)




In [73]:
for city in cities_info.keys():
    print(city)
    df = monte_carlo_2(cities_info, city, 0.8, 0.6, 50)
    df.to_csv('Data_Cities_wrt_50Random/Trials_'+city+'.csv',index=False)


AGS
CABA
CDMX
GDL
Pachuca
Madrid
Montevideo
Oaxaca
Sao Paulo
Santiago


In [44]:
list(tidydata.loc[tidydata['Ciudad'] == city]['Clustering'])

[0.1340367487179487]

In [23]:
dataframe = pd.read_csv('Data_Cities_wrt_Random/Trials_Oaxaca.csv')
dataframe

Unnamed: 0,Name,Degree,Eccentricity,Transitividad,Eficiencia Global,Rich Club Coefficient,Core Ratio,Central Point Dominance,Modularidad,Colaboratividad 1,Colaboratividad 2
0,G0,3.313953,5.534884,0.045918,0.30369,0.392857,0.465116,0.165912,0.914049,0.247489,0.423849
1,G1,3.333333,5.615819,0.076225,0.302647,1.0,0.457627,0.252322,0.90357,0.459624,0.424664
2,G2,3.48538,5.549708,0.043716,0.312697,1.0,0.508772,0.15383,0.901823,0.452138,0.43975
3,G3,3.296089,5.502793,0.041096,0.306304,1.0,0.469274,0.253046,0.907539,0.449133,0.423878
4,G4,3.54491,5.646707,0.043847,0.312569,1.0,0.538922,0.155563,0.902027,0.452139,0.451592
5,G5,3.247191,5.786517,0.069638,0.303498,0.4,0.460674,0.258516,0.917476,0.257712,0.427368
6,G6,3.18232,5.944751,0.069195,0.288027,0.333333,0.458564,0.17585,0.914058,0.230185,0.42227
7,G7,3.213115,6.42623,0.058496,0.299679,1.0,0.486339,0.263079,0.924218,0.452725,0.435034
8,G8,3.0,6.70202,0.041475,0.275461,1.0,0.454545,0.200645,0.925088,0.438979,0.41475
9,G9,3.410405,5.583815,0.068713,0.306378,0.333333,0.473988,0.150561,0.892324,0.236141,0.427953
