# Analyse network centrality

This notebook reads the graph data generated by the co-occurence reading and calculates centrality using different measures, which are then stored to csv-files in two formats: per chapter and for a whole book per name.

## User variables

In [1]:
# path to where the graphs are stored
path = "c:/Frederik/Afstuderen/Results/test07/"
# what centrality measures to use
centrality_measures = ['eigenvector_centrality', 'katz_centrality']
#['betweenness_centrality', 'closeness_centrality', 'degree_centrality', 'eigenvector_centrality', 'katz_centrality']

# export csv files to-location
exportpath = "{}analysis/".format(path)

## Import and open files

In [2]:
import networkx as nx
import os
import operator
import re
import csv
import collections
import community

if not os.path.exists(exportpath):
    os.makedirs(exportpath)

graphs = []
chapterlist = []
bookgraph = []

for file in os.listdir(path):
    if file.endswith(".gexf"):
        match = re.search(r'Bible(_([\w]+))?\.gexf', file)
        if match is not None:
            graphs.append([match.group(2), nx.read_gexf(path+file, relabel=True)])
            print("Imported {}".format(file))
            
            chaptermatch = re.search(r'_(\d+)', file)
            bookmatch = re.search(r'Bible_([A-Za-z_]+)\.gexf', file)
            
            if bookmatch is not None:
                print('Using {} as bookgraph, so adding community stuff there'.format(match.group(0)))
                bookgraph = [match.group(2), nx.read_gexf(path+file, relabel=True)]
            elif chaptermatch is not None:
                chapterlist.append(int(chaptermatch.group(1)))

Imported Bible_Genesis_13.gexf


## Calculate centrality

In [3]:
centralities = collections.defaultdict(lambda: collections.defaultdict(lambda:0))
centralities_sorted = collections.defaultdict(lambda:[])

for centrality_measure_name in centrality_measures:
    centrality_measure = getattr(nx, centrality_measure_name)
    
    for graphlist in graphs:
        graphname = graphlist[0]
        graph = graphlist[1]
        
        try:
            if centrality_measure_name == 'closeness_centrality':
                centrality = centrality_measure(graph, distance="weight")
            elif centrality_measure_name == 'degree_centrality':
                centrality = centrality_measure(graph)
            else:
                centrality = centrality_measure(graph, weight="weight")

            centralities[centrality_measure_name][graphname] = centrality
            centrality_sorted = sorted(centrality.items(), key=operator.itemgetter(1), reverse=True)
            centralities_sorted[centrality_measure_name].append([graphname, centrality_sorted])
        except nx.NetworkXError:
            print('Failed to converge on graph {}'.format(graphname))
        except ZeroDivisionError:
            print("Division by zero on graph {}".format(graphname))
            
    print("Calculated {}".format(centrality_measure_name))

Calculated eigenvector_centrality
Calculated katz_centrality


## Calculate network properties

In [4]:
network_properties = []
header = ['graph', 'density', 'avg_clust']
for centrality_measure_name in centrality_measures:
    header.append('centralization_' + centrality_measure_name)
network_properties.append(header)

#density = []
#avg_clust = []
#centralization = collections.defaultdict(lambda: [])

for graphlist in graphs:
    graphname = graphlist[0]
    graph = graphlist[1]
    
    network_properties_row = []
    network_properties_row.append(graphname)
    
    # calculate density for this graph
    this_density = round(nx.density(graph),5)
    network_properties_row.append(this_density)
    #density.append([graphname, this_density])
    
    # calculate average clustering coefficient for this graph
    this_avg_clust = round(nx.average_clustering(graph),5)
    network_properties_row.append(this_avg_clust)
    #avg_clust.append([graphname, this_avg_clust])
    
    # calculate Freeman centralization for every centrality measure for this grpah
    for centrality_measure_name in centrality_measures:
        N = graph.order()
        try:
            all_cen = centralities[centrality_measure_name][graphname].values()
        except AttributeError:
            # this happens when the centrality could not be calculated in the previous section
            continue
        max_cen = max(all_cen)
        try:
            this_centralization = round(float((N*max_cen - sum(all_cen)))/(N-1)**2,5)
        except ZeroDivisionError:
            # this seems to happen when the network is too small (smaller than 2?)
            print('Failed to calculate Freeman centralization for graph {}'.format(graphname))
            continue
            
        #centralization[centrality_measure_name].append([graphname, this_centralization])
        network_properties_row.append(this_centralization)

    network_properties.append(network_properties_row)
    
print("Calculated graph density, average clustering coefficient and Freeman centralization")

Calculated graph density, average clustering coefficient and Freeman centralization


## Export

### GEXF

In [5]:
## add 'community' attribute to book graph and export GEXF
if bookgraph:
    graphname = bookgraph[0]
    graph = bookgraph[1]

    part = community.best_partition(graph)
    nx.set_node_attributes(graph, 'community', part)
    graphcommunity = community.induced_graph(part, graph)
    nx.write_gexf(graph, "{}Bible_{}_comattr.gexf".format(exportpath, graphname))
    nx.write_gexf(graphcommunity, "{}Bible_{}_comnodes.gexf".format(exportpath, graphname))

### CSV

In [6]:
for centrality_measure_name, centrality_dict in centralities_sorted.items():
    # Create graph-table
    with open("{}pergraph_{}.csv".format(exportpath, centrality_measure_name), mode="w", newline="\n") as table:
        writer = csv.writer(table)
        for centrality_dictitem in centrality_dict:
            for item in centrality_dictitem[1]:
                writer.writerow((centrality_dictitem[0], item[0], round(item[1],5)))

        print("Succesfully exported pergraph_{}.csv".format(centrality_measure_name))
        
    # Create graph over time-table
    namelist = []
    for centrality_dictitem in centrality_dict:
        for item in centrality_dictitem[1]:
            if item[0] not in namelist:
                namelist.append(item[0])
   
    try:
        names_centralities = [[0 for _ in range(max(chapterlist)+1)] for _ in range (len(namelist))]
        for idx_name, name in enumerate(namelist):
            names_centralities[idx_name][0] = name
            for centrality_dictitem in centrality_dict:
                try:
                    chapternr = int(re.search(r'_(\d+)', centrality_dictitem[0]).group(1))
                except AttributeError:
                    continue

                for item in centrality_dictitem[1]:
                    if item[0] == name:
                        if item[1] > 0.001:
                            names_centralities[idx_name][chapternr] = round(item[1],5)
            names_centralities[idx_name].append(round(sum(names_centralities[idx_name][1:]),5))

        with open("{}pername_{}.csv".format(exportpath, centrality_measure_name), mode="w", newline="\n") as table:
            writer = csv.writer(table)
            for row in names_centralities:
                writer.writerow(row)

            print("Succesfully exported pername_{}.csv".format(centrality_measure_name))
    except ValueError:
        pass
    
# Create network properties-table
with open("{}network_properties.csv".format(exportpath), mode="w", newline="\n") as table:
    writer = csv.writer(table)
    for network_properties_row in network_properties:
        writer.writerow(network_properties_row)
        
    print("Succesfully exported network_properties.csv")

Succesfully exported pergraph_katz_centrality.csv
Succesfully exported pername_katz_centrality.csv
Succesfully exported pergraph_eigenvector_centrality.csv
Succesfully exported pername_eigenvector_centrality.csv
Succesfully exported network_properties.csv
