In [45]:
import networkx as nx
import numpy as np
import joblib
import h5py

import matplotlib.pyplot as plt
from matplotlib import pylab

from src.interact_with_matrices import find_degree_for_word, open_object
from src.matrix_reduce import reduce_dimension_adjacency_matrix

from itertools import islice
import operator

# Utils

In [46]:
def take(n, iterable):
    return list(islice(iterable, n))

# Loading data

In [47]:
degree_matrix = open_object('data/degree_matrix.hdf5')
adjacency_matrix = open_object('data/adjacency_matrix.hdf5')

In [48]:
with open('data/keys.pkl', 'rb') as infile:
    keys = joblib.load(infile)

In [49]:
# Reducing the adjacency and degree matrices

adjacency_matrix, degree_matrix, keys = reduce_dimension_adjacency_matrix(
    adjacency_matrix,
    degree_matrix,
    keys, 
    threshold=20
)

In [50]:
G = nx.from_numpy_matrix(adjacency_matrix)

# Centrality indicators

In [15]:
def print_k_best_closeness_centrality(G,
                                      keys,
                                      k=20):
        
    closeness_centrality = nx.closeness_centrality(G)
    k_best = dict(
        sorted(
            closeness_centrality.items(), 
            key=operator.itemgetter(1),
            reverse=True
        )
    )
    
    return take(k, k_best.items())

In [25]:
def print_k_best_betweenness_centrality(G,
                                        keys,
                                        k=20):
    
    betweenness_centrality = nx.betweenness_centrality(G)
    k_best = dict(
        sorted(
            betweenness_centrality.items(),
            key=operator.itemgetter(1),
            reverse=True
        )
    )
    
    return take(k, k_best.items())

In [26]:
def print_k_best_eigenvector_centrality(G,
                                        keys,
                                        k=20):
    
    eigenvector_centrality = nx.eigenvector_centrality(G)
    k_best = dict(
        sorted(
            eigenvector_centrality.items(),
            key=operator.itemgetter(1),
            reverse=True
        )
    )
    
    return take(k, k_best.items())

In [29]:
close_cent_nodes = print_k_best_closeness_centrality(
    G,
    keys,
    k=20
)

for k, v in close_cent_nodes:
    print(f'({k}, {v})')

(176, 0.584870848708487)
(10, 0.46824224519940916)
(299, 0.4574314574314574)
(23, 0.4502840909090909)
(201, 0.44647887323943664)
(76, 0.44089012517385257)
(199, 0.4366391184573003)
(271, 0.4360385144429161)
(96, 0.4348422496570645)
(247, 0.43424657534246575)
(129, 0.4324693042291951)
(287, 0.4318801089918256)
(4, 0.42953929539295393)
(53, 0.4249329758713137)
(121, 0.4249329758713137)
(316, 0.4249329758713137)
(278, 0.42379679144385024)
(283, 0.42379679144385024)
(147, 0.4204244031830239)
(9, 0.4198675496688742)


In [30]:
between_cent_nodes = print_k_best_betweenness_centrality(
    G,
    keys,
    k=20
)

for k, v in between_cent_nodes:
    print(f'({k}, {v})')

(176, 0.38883204130611837)
(299, 0.0882971637840703)
(199, 0.08430351507241142)
(10, 0.038395317382919826)
(2, 0.030042601590659114)
(23, 0.029507060233284352)
(201, 0.02934940756678783)
(76, 0.02514412921452543)
(271, 0.02275671317492574)
(278, 0.01786401888073828)
(160, 0.017241648598005112)
(297, 0.016831261769043017)
(129, 0.016399495007260245)
(316, 0.015736464340184338)
(121, 0.0155637854514391)
(4, 0.015251712874460598)
(161, 0.015170642069287327)
(206, 0.014430435136124542)
(209, 0.013821878498383715)
(283, 0.013755783871937046)


In [31]:
eigenvec_cent_nodes = print_k_best_eigenvector_centrality(
    G,
    keys,
    k=20
)

for k, v in eigenvec_cent_nodes:
    print(f'({k}, {v})')

(176, 0.511774587695703)
(10, 0.16557993159249368)
(199, 0.14766693334015732)
(299, 0.14148444039039879)
(201, 0.128584901721062)
(23, 0.12402405902657021)
(271, 0.12048305522505039)
(76, 0.1129770337124415)
(4, 0.10798059583010292)
(287, 0.10226664454878999)
(2, 0.09850237924510949)
(9, 0.09542842213115661)
(96, 0.09492990274437757)
(283, 0.09453385732407907)
(247, 0.09443559619264895)
(161, 0.09357816989766249)
(206, 0.09275015183981494)
(316, 0.09153638774554693)
(215, 0.09094001303238099)
(121, 0.08937839065976765)


# Combining the centrality indicators

In [32]:
# Doing a mere intersection

a = set([x[0] for x in close_cent_nodes])
b = set([x[0] for x in between_cent_nodes])
c = set([x[0] for x in eigenvec_cent_nodes])

d = a.intersection(b.intersection(c))

print(d)

{4, 199, 201, 10, 299, 76, 271, 176, 23, 121, 283, 316}


In [None]:
# Doing an average of all the indicators and ranking the keys