In [5]:
import numpy as np 
import pandas as pd
import networkx as nx
import random

def random_graph(prob_resp,prob_nuevo):
    """
    prob_resp float    define la probabilidad de que al tomar un nodo este haya sido un respondiente de la encuesta
    prob_nuevo float   define la probabilidad de que un respondiente tenga conexiones con nodos nuevos

    Esta funcion genera graficas aleatorias basadas en el metodo con el que se crearon las networks de emprendimiento
    """
    G=nx.Graph()
    queue=["0"]
    max_index=0
    max_respondents=random.randint(20,30)             #Indicador que nos dice cuantos respondientes puede haber
    respondents=0
    while len(queue)!=0:
        if (random.random()<prob_resp and respondents<max_respondents) or queue[0]=="0":      #Crear conexiones para un respondiente
            respondents+=1
            n=random.randint(6,21)
            new_n=0
            old_n=0
            for i in range(n):                                              #Calcular cuantas conexiones son con nodos nuevos y cuantos con ya existentesz
                if random.random()<prob_nuevo:
                    new_n+=1
                else:
                    old_n+=1
            for i in range(new_n):
                new_node=str(i+max_index+1)
                queue.append(new_node)
                G.add_node(new_node)
                G.add_edge(queue[0],new_node)
            for i in range(old_n):
                node=str(random.randint(0,int(queue[0]))-1)
                G.add_edge(queue[0],node)
            max_index+=new_n
            queue.pop(0)
        else:                                                                #Crear conexiones para los no respondientes
            if random.random()<0.25:
                n=random.randint(1,6)
                for i in range(n):
                    node=str(random.randint(0,int(queue[0]))-1)
                    G.add_edge(queue[0],node)
            queue.pop(0)
    if len(G.nodes())>150 and len(G.nodes())<400:
        return G
    else:
        return random_graph(prob_resp,prob_nuevo)

for i in range(25):
    G = random_graph(0.25,0.55+i/1000)
    nx.write_graphml(G,'Random_Graphs/Random_Graph_'+str(11+i)+'.graphml')
for i in range(25):
    G = random_graph(0.25,0.55-i/1000)
    nx.write_graphml(G,'Random_Graphs/Random_Graph_'+str(36+i)+'.graphml')

In [24]:
def random_graph_2(n_evals, prob_out):
    '''
    Generator of a random graph, given that n
    Given a specific number of respondents of the questionnaire, and that each of them could have provided 25 responses maximum
    
    Input:
     - n_evals  : number of evaluators responding questionnaire
     - prob_out : probability that a mentionned collaboration is outside of the network of evaluators
    '''
    G=nx.DiGraph()
    setedges=set()
    outside = [n_evals]
    last_index = n_evals
    for i in range(n_evals):        #add all evaluators to the graph
        G.add_node(i)
        
    # for each evaluator we determine how many collaborations are going to be reported, 
    # from 0 to 25 according to a normal distribution
    sample_num_evaluations = np.random.normal(loc=12.3, scale=4, size=n_evals)
    n_evaluated_list = [int(x) for x in sample_num_evaluations]
    
    for i in range(len(n_evaluated_list)):
        for j in range(n_evaluated_list[i]):
            if random.random()<prob_out:
                #the edge we are going to add is not within the evaluators' list
                to = random.choice(outside)
                while (i,to) in setedges:
                    to = random.choice(outside)
                G.add_edge(i,to)
                setedges.add((i,to))
                if to == outside[-1]:
                    last_index += 1
                    outside.append(last_index)
            else:
                to= random.choice(range(n_evals))
                while (i,to) in setedges:
                    to = random.choice(range(n_evals))
                G.add_edge(i,to)
                setedges.add((i,to))
                
    #print(n_evaluated_list)
    #print('')
    #print(G.nodes())
    #print('')
    #print(G.edges())
    #print('')
    #print([len(list(G.neighbors(x))) for x in G.nodes()])
    return G      
#random_graph_2(30,0.9)   


list_cities = ['Aguascalientes', 'Buenos Aires', 'Ciudad de México', 'Guadalajara', 'Hidalgo',
                  'Madrid', 'Montevideo', 'Oaxaca', 'Sao Paulo', 'Santiago de Chile']
num_evaluators = {'Aguascalientes':19, 'Buenos Aires':31, 'Ciudad de México':36, 'Guadalajara':32, 'Hidalgo':19,
                  'Madrid':37, 'Montevideo':48, 'Oaxaca':36, 'Sao Paulo':28, 'Santiago de Chile':25}
for city in list_cities:
    G= random_graph_2(num_evaluators[city],0.8)
    nx.write_graphml(G,'Random_Graphs_Second_Type/Random_Graph_'+city+'.graphml')


In [115]:
muestra = pd.read_csv('Data_answers_evaluators/Evaluadores ecosistemas.xlsx - Muestra.csv')             


ags_info=pd.read_csv('Data_answers_evaluators/Evaluadores ecosistemas.xlsx - AGS.csv')             
caba_info=pd.read_csv('Data_answers_evaluators/Evaluadores ecosistemas.xlsx - CABA.csv')             
cdmx_info=pd.read_csv('Data_answers_evaluators/Evaluadores ecosistemas.xlsx - CDMX.csv')             
gdl_info=pd.read_csv('Data_answers_evaluators/Evaluadores ecosistemas.xlsx - GDL.csv')             
hgo_info=pd.read_csv('Data_answers_evaluators/Evaluadores ecosistemas.xlsx - HGO.csv')             
mad_info=pd.read_csv('Data_answers_evaluators/Evaluadores ecosistemas.xlsx - MAD.csv')             
mtv_info=pd.read_csv('Data_answers_evaluators/Evaluadores ecosistemas.xlsx - MVD.csv')             
oax_info=pd.read_csv('Data_answers_evaluators/Evaluadores ecosistemas.xlsx - OAX.csv')             
sao_info=pd.read_csv('Data_answers_evaluators/Evaluadores ecosistemas.xlsx - SAO.csv')             
scl_info=pd.read_csv('Data_answers_evaluators/Evaluadores ecosistemas.xlsx - SCL.csv')  

ags_info.rename(columns={'Unnamed: 1':'Colabs'}, inplace=True)
gdl_info.rename(columns={'Unnamed: 1':'Colabs'}, inplace=True)
hgo_info.rename(columns={'Unnamed: 1':'Colabs'}, inplace=True)
oax_info.rename(columns={'Unnamed: 1':'Colabs'}, inplace=True)


list_cities = ['AGS', 'CABA', 'CDMX', 'GDL', 'Pachuca',
                  'Madrid', 'Montevideo', 'Oaxaca', 'Sao Paulo', 'Santiago']
cities_info = {'AGS': ags_info, 
              'CABA' : caba_info,
              'CDMX' : cdmx_info,
              'GDL' : gdl_info,
              'Pachuca' : hgo_info,
              'Madrid' : mad_info,
              'Montevideo' : mtv_info,
              'Oaxaca' : oax_info,
              'Sao Paulo' : sao_info,
              'Santiago' : scl_info}

In [116]:
#(muestra['GDL'][0])
muestra
#gdl_info['Colabs'][31]
#sum(list(gdl_info['Colabs']))

Unnamed: 0.1,Unnamed: 0,Madrid,Madrid SG,Madrid Typeform,CDMX,Santiago,CABA,Sao Paulo,Montevideo,Oaxaca,GDL,Pachuca,AGS
0,Muestra,38,16,24,51,30,36,34,59,36,32,21,19
1,Nodes,239,120,156,299,195,228,216,198,149,187,125,96
2,Edges,453,166,205,547,385,500,364,767,326,474,254,233
3,Promedio colabs. por participante,12.13,10.13,8.63,12.33,13.04,13.48,10.38,13.4,6.39,10.81,9.52,8.21
4,% Muestra,15.90%,13.33%,15.38%,17.06%,15.38%,15.79%,15.74%,29.80%,24.16%,17.11%,16.80%,19.79%


In [117]:
list_cities_2 = ['AGS', 'CABA', 'CDMX', 'GDL', 'Pachuca',
                  'Madrid', 'Montevideo', 'Oaxaca', 'Sao Paulo', 'Santiago']
cities_info_2 = {'AGS': ags_info, 
              'CABA' : caba_info,
              'CDMX' : cdmx_info,
              'GDL' : gdl_info,
              'Pachuca' : hgo_info,
              'Madrid' : mad_info,
              'Montevideo' : mtv_info,
              'Oaxaca' : oax_info,
              'Sao Paulo' : sao_info,
              'Santiago' : scl_info}

for city in list_cities_2:
    print(city)
    M = int(muestra[city][0])
    L = len(list(cities_info_2[city]['Colabs']))
    S = sum(list(cities_info_2[city]['Colabs']))
    
    print( M )
    print( L )
    print( S )
    print( M*2 + S )
    print('')

AGS
19
19
156
194

CABA
36
31
418
490

CDMX
51
36
444
546

GDL
32
32
346
410

Pachuca
21
21
200
242

Madrid
38
37
369
445

Montevideo
59
48
643
761

Oaxaca
36
36
230
302

Sao Paulo
34
29
301
369

Santiago
30
25
326
386



In [118]:
def random_graph_3(infos, city, prob_out):
    '''
    With this function, we are simulating the responses we obtained from each of the surveyed ecosystems/cities. 
    The information we have is, for each evaluator, how many 
    '''
    G=nx.DiGraph()
    
    num_answered = int(muestra[city][0])
    num_evaluators = len(list(infos[city]['Colabs']))
    num_nodes = int(muestra[city][1])
    num_edges = int(muestra[city][2])
    
    last_index = num_evaluators
    outside = [num_evaluators]
    setedges = set()
    
    for i in range(num_nodes):                          #add as many nodes as there could be added
        G.add_node(i)
    for i in range(num_evaluators):                      #for each node, as many colabs as indicated
        print(i, city) 
        deg_i = infos[city]['Colabs'][i]
        for j in range(deg_i):
            
            
            if random.random()<prob_out:                 #edge to add is not in the evaluators' list
                to = random.choice(outside)
                while (i,to) in setedges:
                    to = random.choice(outside)
                G.add_edge(i,to)
                setedges.add((i,to))
                if to == outside[-1]:
                    last_index += 1
                    outside.append(last_index)
                    
            else:
                to= random.choice(range(num_evaluators))
                while (i,to) in setedges:
                    to = random.choice(range(num_evaluators))
                G.add_edge(i,to)
                setedges.add((i,to))
                
    remaining_nodes = list(range(num_evaluators+1, num_answered))
    remaining_edges = num_edges - len(setedges)
    #remaining_nodes=list(range(num_evaluators+1, num_nodes))
    #nodes_only_profile = random.sample(remaining_nodes, remaining_edges//2)
    
    for i in range(num_answered):
        for j in range(2):

            if random.random()<prob_out:                 #edge to add is not in the evaluators' list
                to = random.choice(outside)
                while (i,to) in setedges:
                    to = random.choice(outside)
                G.add_edge(i,to)
                setedges.add((i,to))
                if to == outside[-1]:
                    last_index += 1
                    outside.append(last_index)
                    
            else:
                to= random.choice(range(num_evaluators))
                while (i,to) in setedges:
                    to = random.choice(range(num_evaluators))
                G.add_edge(i,to)
                setedges.add((i,to))        
    return G
                
            
for city in list_cities:
    G= random_graph_3(cities_info,city,0.8)
    nx.write_graphml(G,'Random_Graphs_Third_Type/Random_Graph_'+city+'.graphml')

0 AGS
1 AGS
2 AGS
3 AGS
4 AGS
5 AGS
6 AGS
7 AGS
8 AGS
9 AGS
10 AGS
11 AGS
12 AGS
13 AGS
14 AGS
15 AGS
16 AGS
17 AGS
18 AGS
0 CABA
1 CABA
2 CABA
3 CABA
4 CABA
5 CABA
6 CABA
7 CABA
8 CABA
9 CABA
10 CABA
11 CABA
12 CABA
13 CABA
14 CABA
15 CABA
16 CABA
17 CABA
18 CABA
19 CABA
20 CABA
21 CABA
22 CABA
23 CABA
24 CABA
25 CABA
26 CABA
27 CABA
28 CABA
29 CABA
30 CABA
0 CDMX
1 CDMX
2 CDMX
3 CDMX
4 CDMX
5 CDMX
6 CDMX
7 CDMX
8 CDMX
9 CDMX
10 CDMX
11 CDMX
12 CDMX
13 CDMX
14 CDMX
15 CDMX
16 CDMX
17 CDMX
18 CDMX
19 CDMX
20 CDMX
21 CDMX
22 CDMX
23 CDMX
24 CDMX
25 CDMX
26 CDMX
27 CDMX
28 CDMX
29 CDMX
30 CDMX
31 CDMX
32 CDMX
33 CDMX
34 CDMX
35 CDMX
0 GDL
1 GDL
2 GDL
3 GDL
4 GDL
5 GDL
6 GDL
7 GDL
8 GDL
9 GDL
10 GDL
11 GDL
12 GDL
13 GDL
14 GDL
15 GDL
16 GDL
17 GDL
18 GDL
19 GDL
20 GDL
21 GDL
22 GDL
23 GDL
24 GDL
25 GDL
26 GDL
27 GDL
28 GDL
29 GDL
30 GDL
31 GDL
0 Pachuca
1 Pachuca
2 Pachuca
3 Pachuca
4 Pachuca
5 Pachuca
6 Pachuca
7 Pachuca
8 Pachuca
9 Pachuca
10 Pachuca
11 Pachuca
12 Pachuca
13 Pachuca
14 