In [1]:
import numpy as np
import pandas as pd
import networkx as nx

from polygraphs.analysis import Processor

class StructureProcessor(Processor):
    def __init__(self, path):
        super().__init__(path)
        # Add the columns when processor is initialised
        self.add(self.length(), self.edges(), self.clustering(), self.diameter())

    def length(self):
        """Determine the length (=size) of the graph for all sims"""
        length_list = [len(graph) for graph in self.graphs]
        self.dataframe["length"] = length_list  
        
    def edges(self):
        """Use NetworkX to count number of edges in graph for all sims"""
        edges_list = [nx.number_of_edges(graph) for graph in self.graphs]
        self.dataframe["edges"] = edges_list

    def clustering(self):
        """Use NetworkX to calculate the average clustering coefficient in graph for all sims"""
        clustering_list = [nx.average_clustering(graph) for graph in self.graphs]
        self.dataframe["avg clustering"] = clustering_list

    def density(self):
        """Use NetworkX to calculate the density of the graph for all sims"""
        density_list = [nx.density(graph) for graph in self.graphs]
        self.dataframe["density"] = density_list
    
    def diameter(self):
        """Use NetworkX to calculate the diameter of the graph for all sims"""
        diameter_list = [nx.diameter(graph) for graph in self.graphs]
        self.dataframe["diameter"] = diameter_list       

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
x = StructureProcessor("~/polygraphs-cache/results/2024-06-11/")
x.get()

Unnamed: 0,bin_file_path,hd5_file_path,config_json_path,trials,network_size,network_kind,op,epsilon,steps,duration,action,undefined,converged,polarized,uid,length,edges,avg clustering,diameter
0,/Users/prudhvivuda/polygraphs-cache/results/20...,/Users/prudhvivuda/polygraphs-cache/results/20...,/Users/prudhvivuda/polygraphs-cache/results/20...,10.0,16.0,complete,BalaGoyalOp,0.01,49.0,0.063611,B,False,True,False,bf72c08ae0e84b5785b90c2ee324114a,16,120,1.0,1
1,/Users/prudhvivuda/polygraphs-cache/results/20...,/Users/prudhvivuda/polygraphs-cache/results/20...,/Users/prudhvivuda/polygraphs-cache/results/20...,10.0,16.0,complete,BalaGoyalOp,0.01,73.0,0.053102,B,False,True,False,bf72c08ae0e84b5785b90c2ee324114a,16,120,1.0,1
2,/Users/prudhvivuda/polygraphs-cache/results/20...,/Users/prudhvivuda/polygraphs-cache/results/20...,/Users/prudhvivuda/polygraphs-cache/results/20...,10.0,16.0,complete,BalaGoyalOp,0.01,107.0,0.077257,B,False,True,False,bf72c08ae0e84b5785b90c2ee324114a,16,120,1.0,1
3,/Users/prudhvivuda/polygraphs-cache/results/20...,/Users/prudhvivuda/polygraphs-cache/results/20...,/Users/prudhvivuda/polygraphs-cache/results/20...,10.0,16.0,complete,BalaGoyalOp,0.01,64.0,0.048188,B,False,True,False,bf72c08ae0e84b5785b90c2ee324114a,16,120,1.0,1
4,/Users/prudhvivuda/polygraphs-cache/results/20...,/Users/prudhvivuda/polygraphs-cache/results/20...,/Users/prudhvivuda/polygraphs-cache/results/20...,10.0,16.0,complete,BalaGoyalOp,0.01,93.0,0.067554,B,False,True,False,bf72c08ae0e84b5785b90c2ee324114a,16,120,1.0,1


In [4]:
x.get().shape

(5, 19)

In [5]:
x.get().count()

bin_file_path       5
hd5_file_path       5
config_json_path    5
trials              5
network_size        5
network_kind        5
op                  5
epsilon             5
steps               5
duration            5
action              5
undefined           5
converged           5
polarized           5
uid                 5
length              5
edges               5
avg clustering      5
diameter            5
dtype: int64

In [6]:
x.get().to_csv('structure_index.csv')

In [11]:
import numpy as np
import pandas as pd
import networkx as nx

from polygraphs.analysis import Processor

class StructureProcessor(Processor):
    def __init__(self, path):
        super().__init__(path)
        # Add the columns when processor is initialized
        self.add(self.length(), self.edges(), self.clustering(), self.diameter(),
                 self.shortest_path_stats(), self.betweenness(), self.closeness(),
                 self.degree(), self.eigenvector(), self.local_clustering())

    def length(self):
        """Determine the length (=size) of the graph for all sims"""
        length_list = [len(graph) for graph in self.graphs]
        self.dataframe["length"] = length_list  

    def edges(self):
        """Use NetworkX to count the number of edges in the graph for all sims"""
        edges_list = [nx.number_of_edges(graph) for graph in self.graphs]
        self.dataframe["edges"] = edges_list

    def clustering(self):
        """Use NetworkX to calculate the average clustering coefficient in the graph for all sims"""
        clustering_list = [nx.average_clustering(graph) for graph in self.graphs]
        self.dataframe["avg_clustering"] = clustering_list

    def density(self):
        """Use NetworkX to calculate the density of the graph for all sims"""
        density_list = [nx.density(graph) for graph in self.graphs]
        self.dataframe["density"] = density_list

    def diameter(self):
        """Use NetworkX to calculate the diameter of the graph for all sims"""
        diameter_list = []
        for graph in self.graphs:
            if nx.is_connected(graph):
                diameter_list.append(nx.diameter(graph))
            else:
                diameter_list.append(float('inf'))  # or some other placeholder value
        self.dataframe["diameter"] = diameter_list       

    def shortest_path_stats(self):
        """Calculate shortest path statistics for all sims"""
        avg_shortest_path_list = []
        max_shortest_path_list = []
        for graph in self.graphs:
            if nx.is_connected(graph):
                lengths = dict(nx.all_pairs_shortest_path_length(graph))
                avg_shortest_path = np.mean([length for target_dict in lengths.values() for length in target_dict.values()])
                max_shortest_path = max([length for target_dict in lengths.values() for length in target_dict.values()])
            else:
                components = list(nx.connected_components(graph))
                avg_lengths = []
                max_lengths = []
                for component in components:
                    subgraph = graph.subgraph(component)
                    lengths = dict(nx.all_pairs_shortest_path_length(subgraph))
                    avg_lengths.extend([length for target_dict in lengths.values() for length in target_dict.values()])
                    max_lengths.append(max([length for target_dict in lengths.values() for length in target_dict.values()]))
                avg_shortest_path = np.mean(avg_lengths)
                max_shortest_path = max(max_lengths)
            avg_shortest_path_list.append(avg_shortest_path)
            max_shortest_path_list.append(max_shortest_path)
        self.dataframe["avg_shortest_path"] = avg_shortest_path_list
        self.dataframe["max_shortest_path"] = max_shortest_path_list

    def betweenness(self):
        """Calculate betweenness centrality for all sims"""
        betweenness_list = []
        for graph in self.graphs:
            if nx.is_connected(graph):
                betweenness = np.mean(list(nx.betweenness_centrality(graph).values()))
            else:
                components = list(nx.connected_components(graph))
                betweenness = np.mean([bc for component in components for bc in nx.betweenness_centrality(graph.subgraph(component)).values()])
            betweenness_list.append(betweenness)
        self.dataframe["avg_betweenness"] = betweenness_list

    def closeness(self):
        """Calculate closeness centrality for all sims"""
        closeness_list = []
        for graph in self.graphs:
            if nx.is_connected(graph):
                closeness = np.mean(list(nx.closeness_centrality(graph).values()))
            else:
                components = list(nx.connected_components(graph))
                closeness = np.mean([cc for component in components for cc in nx.closeness_centrality(graph.subgraph(component)).values()])
            closeness_list.append(closeness)
        self.dataframe["avg_closeness"] = closeness_list

    def degree(self):
        """Calculate degree centrality for all sims"""
        degree_list = [np.mean(list(nx.degree_centrality(graph).values())) for graph in self.graphs]
        self.dataframe["avg_degree"] = degree_list

    def eigenvector(self):
        """Calculate eigenvector centrality for all sims"""
        eigenvector_list = []
        for graph in self.graphs:
            try:
                eigenvector = np.mean(list(nx.eigenvector_centrality_numpy(graph).values()))
            except nx.NetworkXException:  # Handle non-convergence
                eigenvector = float('nan')
            eigenvector_list.append(eigenvector)
        self.dataframe["avg_eigenvector"] = eigenvector_list

    def local_clustering(self):
        """Calculate the average local clustering coefficient for all sims"""
        local_clustering_list = [np.mean(list(nx.clustering(graph).values())) for graph in self.graphs]
        self.dataframe["avg_local_clustering"] = local_clustering_list

x = StructureProcessor("~/polygraphs-cache/results/2024-06-11/")
x.get()


Unnamed: 0,bin_file_path,hd5_file_path,config_json_path,trials,network_size,network_kind,op,epsilon,steps,duration,...,edges,avg_clustering,diameter,avg_shortest_path,max_shortest_path,avg_betweenness,avg_closeness,avg_degree,avg_eigenvector,avg_local_clustering
0,/Users/prudhvivuda/polygraphs-cache/results/20...,/Users/prudhvivuda/polygraphs-cache/results/20...,/Users/prudhvivuda/polygraphs-cache/results/20...,10.0,16.0,snap,BalaGoyalOp,0.01,100.0,2.311805,...,13838,0.109399,inf,3.053545,8,0.001083,0.335061,0.007679,0.013221,0.109399
1,/Users/prudhvivuda/polygraphs-cache/results/20...,/Users/prudhvivuda/polygraphs-cache/results/20...,/Users/prudhvivuda/polygraphs-cache/results/20...,10.0,16.0,snap,BalaGoyalOp,0.01,100.0,2.275685,...,13838,0.109399,inf,3.053545,8,0.001083,0.335061,0.007679,0.013221,0.109399
2,/Users/prudhvivuda/polygraphs-cache/results/20...,/Users/prudhvivuda/polygraphs-cache/results/20...,/Users/prudhvivuda/polygraphs-cache/results/20...,10.0,16.0,snap,BalaGoyalOp,0.01,100.0,2.150561,...,13838,0.109399,inf,3.053545,8,0.001083,0.335061,0.007679,0.013221,0.109399
3,/Users/prudhvivuda/polygraphs-cache/results/20...,/Users/prudhvivuda/polygraphs-cache/results/20...,/Users/prudhvivuda/polygraphs-cache/results/20...,10.0,16.0,snap,BalaGoyalOp,0.01,100.0,2.215644,...,13838,0.109399,inf,3.053545,8,0.001083,0.335061,0.007679,0.013221,0.109399
4,/Users/prudhvivuda/polygraphs-cache/results/20...,/Users/prudhvivuda/polygraphs-cache/results/20...,/Users/prudhvivuda/polygraphs-cache/results/20...,10.0,16.0,snap,BalaGoyalOp,0.01,100.0,2.317939,...,13838,0.109399,inf,3.053545,8,0.001083,0.335061,0.007679,0.013221,0.109399
5,/Users/prudhvivuda/polygraphs-cache/results/20...,/Users/prudhvivuda/polygraphs-cache/results/20...,/Users/prudhvivuda/polygraphs-cache/results/20...,10.0,16.0,snap,BalaGoyalOp,0.01,5.0,0.095188,...,13838,0.109399,inf,3.053545,8,0.001083,0.335061,0.007679,0.013221,0.109399
6,/Users/prudhvivuda/polygraphs-cache/results/20...,/Users/prudhvivuda/polygraphs-cache/results/20...,/Users/prudhvivuda/polygraphs-cache/results/20...,10.0,16.0,snap,BalaGoyalOp,0.01,5.0,0.100212,...,13838,0.109399,inf,3.053545,8,0.001083,0.335061,0.007679,0.013221,0.109399
7,/Users/prudhvivuda/polygraphs-cache/results/20...,/Users/prudhvivuda/polygraphs-cache/results/20...,/Users/prudhvivuda/polygraphs-cache/results/20...,10.0,16.0,snap,BalaGoyalOp,0.01,5.0,0.086888,...,13838,0.109399,inf,3.053545,8,0.001083,0.335061,0.007679,0.013221,0.109399
8,/Users/prudhvivuda/polygraphs-cache/results/20...,/Users/prudhvivuda/polygraphs-cache/results/20...,/Users/prudhvivuda/polygraphs-cache/results/20...,10.0,16.0,snap,BalaGoyalOp,0.01,5.0,0.091391,...,13838,0.109399,inf,3.053545,8,0.001083,0.335061,0.007679,0.013221,0.109399
9,/Users/prudhvivuda/polygraphs-cache/results/20...,/Users/prudhvivuda/polygraphs-cache/results/20...,/Users/prudhvivuda/polygraphs-cache/results/20...,10.0,16.0,snap,BalaGoyalOp,0.01,5.0,0.087474,...,13838,0.109399,inf,3.053545,8,0.001083,0.335061,0.007679,0.013221,0.109399


In [12]:
x.get().to_csv('structure_index.csv')