In [1]:
import os
import networkx as nx
import numpy as np     
import pandas as pd
from help_functions_misc import *

In [2]:
path = os.getcwd() +'/Data/'

In [3]:
### Simulation with Holme-Kim graphs
def run_experiment_HK(min_n, max_n, step_n, nr_epochs, p_blue_values, p_values): 
    # Creates, for each number of nodes n between [min_n] and [max_n], for each m in m_values (m is the number of random edges to add for each new node), for each value in [p_values] (p is the probability of 
    # adding a triangle after adding a random edge), for each value in [p_blue_values], 
    # [nr_epochs] Holme- Kim powerlaw cluster graphs with these values for the random parameters, and check on each graph the number of nodes in a majority 
    # illusion and whether the graph is in a majority-majority illusion. The results are returned in a pandas dataframe We also record for each graph certain measures.
    # m_values is defined in the loop, because for every n, m should be in the range 1<=m<=n.

    graph_type = 'HK'
    filename = 'minn' + str(min_n) + '_maxn' + str(max_n) + '_step' + str(step_n) + '_epochs' + str(nr_epochs) + '_' + graph_type + '.csv'

    node_numbers = range(min_n, max_n+1, step_n)

    # Create empty dataframe:
    data = pd.DataFrame(columns=['n', 'm', 'p', 'p_blue', 'proportion_blue_global', 'mi', 'wmi','nr_nodes_str_ill', 'nr_nodes_weak_ill', 'MSE', 'MSE_no_illusion', 'MSE_any_illusion', 'MSE_strict_illusion', 'sum_squared_error_no_illusion', 'sum_squared_error_any_illusion', 'sum_squared_error_strict_illusion', 'deg_assort_coef', 'deg_seq', 'avg_path_length', 'CC', 'EV_centr', 'close_centr', 'between_centr', 'frac_largest_comp','probability_mixed_edge', 'actual_fraction_mixed_edges']) #create empty dataframe

    for n in node_numbers:
        print(f'n: {n}')
        m_values = [n/10, n/2, 9*n/10] #m is 10, 50, and 90 percent of n
        m_values = [int(x) for x in m_values]
        for m in m_values:
            # print(f'm = {m}')
            for p in p_values:
                # print(f'p = {p}')
                for p_blue in p_blue_values:
                    for epoch in range(nr_epochs):
                        G=nx.powerlaw_cluster_graph(n, m, p)
                        (nr_red_nodes, nr_blue_nodes) = color_randomly(G, p_blue)
                        (nr_edges, nr_mixed_edges) = color_edges(G)
                        (probability_mixed_edge, actual_fraction_mixed_edges) = homopily(n, nr_red_nodes, nr_blue_nodes, nr_edges, nr_mixed_edges)
                        (has_strict_illusion, has_weak_illusion, nr_strict_illusion, nr_any_illusion, proportion_blue_global, MSE, MSE_no_illusion, MSE_any_illusion, MSE_strict_illusion, sum_squared_error_no_illusion, sum_squared_error_any_illusion, sum_squared_error_strict_illusion) = check_illusion_and_MSE(G)  

                    ###### Other measures of G:
                        deg_assort_coef = nx.degree_assortativity_coefficient(G) #Degree assortativity coefficient
                        deg_seq = list(dict(nx.degree(G)).values()) # Degree sequence
                        if nx.is_connected(G): #If G is not connected, path length raises error
                            avg_path_length = nx.average_shortest_path_length(G) #Average shortest path length
                        else:
                            avg_path_length = 'NA'
                        CC = nx.transitivity(G) #Clustering coefficient
                        
                        #Centrality measures:
                        try:
                            EV_centr = nx.eigenvector_centrality(G, max_iter=500) 
                            EV_centr = list(EV_centr.values()) #Eigenvector centrality
                        except:
                            print('EV_centrality failed. Use NA instead.')
                            EV_centr = 'NA'

                        close_centr = nx.closeness_centrality(G) 
                        close_centr = list(close_centr.values()) #Closeness centrality

                        between_centr = nx.betweenness_centrality(G)
                        between_centr = list(between_centr.values()) #Betweenness centrality

                        # Fraction of nodes in largest component:
                        connected_components = sorted(nx.connected_components(G), key=len, reverse=True)
                        G0 = G.subgraph(connected_components[0])
                        size_largest_comp = G0.number_of_nodes()
                        frac_largest_comp = size_largest_comp / G.number_of_nodes() # Fraction of nodes in largest component
                        
                        # Save the new data in the dataframe:
                        newrow = pd.DataFrame({'n': [n], 'm': [m], 'p':[p], 'p_blue': [p_blue], 'proportion_blue_global':[proportion_blue_global], 'mi': [has_strict_illusion], 'wmi': [has_weak_illusion], 'nr_nodes_str_ill': [nr_strict_illusion], 'nr_nodes_weak_ill': [nr_any_illusion],  'MSE': [MSE],  'MSE_no_illusion' : [MSE_no_illusion], 'MSE_any_illusion' : [MSE_any_illusion], 'MSE_strict_illusion': [MSE_strict_illusion], 'sum_squared_error_no_illusion': [sum_squared_error_no_illusion], 'sum_squared_error_any_illusion': [sum_squared_error_any_illusion], 'sum_squared_error_strict_illusion': [sum_squared_error_strict_illusion], 'deg_assort_coef': [deg_assort_coef], 'deg_seq': [deg_seq], 'avg_path_length': [avg_path_length], 'CC': [CC], 'EV_centr': [EV_centr], 'close_centr': [close_centr], 'between_centr': [between_centr], 'frac_largest_comp':[frac_largest_comp], 'probability_mixed_edge': [probability_mixed_edge], 'actual_fraction_mixed_edges' : [actual_fraction_mixed_edges]})
                        data = pd.concat([data, newrow], ignore_index=True)

    data.to_csv(path+filename, header=True, sep=';')
    return data

In [4]:
#Test run
min_n = 20
max_n = 100
step_n = 20
nr_epochs = 2
p_blue_values = np.arange(0.1, 0.5+0.01, 0.1).tolist()
p_blue_values = np.around(p_blue_values, 2) # To change 0.600000001 into 0.6
# print(p_blue_values)
p_values = np.arange(0, 1+0.01, 0.2).tolist()
p_values = np.around(p_values, 2)  

HK_data = run_experiment_HK(min_n, max_n, step_n, nr_epochs, p_blue_values, p_values)

n: 20
n: 40
n: 60
n: 80
n: 100


In [4]:
# Even sized graphs
min_n = 20
max_n = 100
step_n = 20
nr_epochs = 1000
p_blue_values = np.arange(0.1, 0.5+0.01, 0.1).tolist()
p_blue_values = np.around(p_blue_values, 2) 
# print(p_blue_values)
p_values = np.arange(0, 1+0.01, 0.2).tolist()
p_values = np.around(p_values, 2)  

HK_data = run_experiment_HK(min_n, max_n, step_n, nr_epochs, p_blue_values, p_values)

n: 20
n: 40
n: 60
n: 80
n: 100


In [None]:
# Odd sized graphs
min_n = 21
max_n = 101
step_n = 20
nr_epochs = 1000
p_blue_values = np.arange(0.1, 0.5+0.01, 0.1).tolist()
p_blue_values = np.around(p_blue_values, 2) 
p_values = np.arange(0, 1+0.01, 0.2).tolist()
p_values = np.around(p_values, 2)  

HK_data = run_experiment_HK(min_n, max_n, step_n, nr_epochs, p_blue_values, p_values)

n: 21
n: 41
n: 61
n: 81
n: 101


In [6]:
filenameHK_even_read = 'minn20_maxn100_step20_epochs1000_HK.csv'
HK_data_even = pd.read_csv(path + filenameHK_even_read, sep=';', index_col=0)
# Calculate the global values of centrality:
HK_data_even = calculate_aggregated_values(HK_data_even)
# Save the new data:
filenameHK_even_write = 'HK_data_even.csv'
HK_data_even.to_csv(path + filenameHK_even_write, header=True, sep=';')

# Same for odd sized graphs:
filenameHK_odd_read = 'minn21_maxn101_step20_epochs1000_HK.csv'
HK_data_odd = pd.read_csv(path + filenameHK_odd_read, sep=';', index_col=0)
HK_data_odd = calculate_aggregated_values(HK_data_odd)
filenameHK_odd_write = 'HK_data_odd.csv'
HK_data_odd.to_csv(path + filenameHK_odd_write, header=True, sep=';')

In [None]:
# Now, there are in the Data folder 'HK_data_even.csv' and 'HK_data_odd.csv', which are further dealt with in Data_preprocessing.