In [17]:
import networkx as nx
import numpy as np
import joblib
import h5py

import matplotlib.pyplot as plt
from matplotlib import pylab

from src.interact_with_matrices import find_degree_for_word, open_object
from src.matrix_reduce import reduce_dimension_adjacency_matrix

from itertools import islice
import operator

# Utils

In [18]:
def take(n, iterable):
    return list(islice(iterable, n))

# Loading data

In [19]:
degree_matrix = open_object('data/degree_matrix.hdf5')
adjacency_matrix = open_object('data/adjacency_matrix.hdf5')

In [20]:
with open('data/keys.pkl', 'rb') as infile:
    keys = joblib.load(infile)

In [21]:
# Reducing the adjacency and degree matrices

"""
adjacency_matrix, degree_matrix, keys = reduce_dimension_adjacency_matrix(
    adjacency_matrix,
    degree_matrix,
    keys, 
    threshold=20
)
"""

'\nadjacency_matrix, degree_matrix, keys = reduce_dimension_adjacency_matrix(\n    adjacency_matrix,\n    degree_matrix,\n    keys, \n    threshold=20\n)\n'

In [22]:
G = nx.from_numpy_matrix(adjacency_matrix)

# Centrality indicators

In [23]:
def print_k_best_closeness_centrality(G,
                                      keys,
                                      k=20):
        
    closeness_centrality = nx.closeness_centrality(G)
    k_best = dict(
        sorted(
            closeness_centrality.items(), 
            key=operator.itemgetter(1),
            reverse=True
        )
    )
    
    return take(k, k_best.items())

In [24]:
def print_k_best_betweenness_centrality(G,
                                        keys,
                                        k=20):
    
    betweenness_centrality = nx.betweenness_centrality(G)
    k_best = dict(
        sorted(
            betweenness_centrality.items(),
            key=operator.itemgetter(1),
            reverse=True
        )
    )
    
    return take(k, k_best.items())

In [25]:
def print_k_best_eigenvector_centrality(G,
                                        keys,
                                        k=20):
    
    eigenvector_centrality = nx.eigenvector_centrality(G)
    k_best = dict(
        sorted(
            eigenvector_centrality.items(),
            key=operator.itemgetter(1),
            reverse=True
        )
    )
    
    return take(k, k_best.items())

In [26]:
close_cent_nodes = print_k_best_closeness_centrality(
    G,
    keys,
    k=20
)

for k, v in close_cent_nodes:
    print(f'({keys[k]}, {v})')

(n, 0.5470173755920197)
(p, 0.44839420874436065)
(v, 0.4433897356017553)
(see, 0.42814271193706077)
(form, 0.4192546559161498)
(anything, 0.4100030424470604)
(state, 0.40803600305157817)
(containing, 0.40529732258274415)
(letter, 0.40418506085410344)
(related, 0.4020858176295316)
(english, 0.40187174292295386)
(part, 0.40152435728175445)
(causing, 0.398161994537802)
(person, 0.3980045352728912)
(body, 0.3968014725430612)
(power, 0.3951396943567796)
(used, 0.394159582843362)
(character, 0.3938767731498471)
(manner, 0.3935943689975431)
(end, 0.3926474289069709)


In [27]:
between_cent_nodes = print_k_best_betweenness_centrality(
    G,
    keys,
    k=20
)

for k, v in between_cent_nodes:
    print(f'({keys[k]}, {v})')

(n, 0.4469560240882281)
(p, 0.16171930574967508)
(v, 0.11177089694052826)
(act, 0.03508281739800355)
(see, 0.03018025077134157)
(state, 0.01976216903880438)
(manner, 0.017470388635059505)
(used, 0.015319738272230527)
(alt, 0.011388456486478672)
(person, 0.010443728896795349)
(part, 0.010386266003625385)
(body, 0.009688271079798605)
(place, 0.009579428846419717)
(anything, 0.00854725770098287)
(form, 0.00846051310237848)
(also, 0.008306346572046725)
(power, 0.007077232129002096)
(especially, 0.006702493591832771)
(time, 0.006021392439364339)
(hence, 0.005545552686699481)


In [28]:
eigenvec_cent_nodes = print_k_best_eigenvector_centrality(
    G,
    keys,
    k=20
)

for k, v in eigenvec_cent_nodes:
    print(f'({keys[k]}, {v})')

(n, 0.6289562071552716)
(p, 0.18732729509249527)
(state, 0.06446402009869591)
(v, 0.06159723517257305)
(act, 0.058099085074507964)
(see, 0.05564488527419995)
(anything, 0.04820806583066752)
(used, 0.04668566752855766)
(part, 0.046255163734152685)
(person, 0.0459451278063671)
(body, 0.04232165000766931)
(also, 0.03990977039380512)
(form, 0.03776278561572398)
(especially, 0.034455654082746874)
(thing, 0.03425462590914799)
(place, 0.03080390431477959)
(substance, 0.030444475656371005)
(power, 0.03029032713400835)
(end, 0.03026241886037142)
(something, 0.029835947492966263)


# Combining the centrality indicators

In [30]:
# Doing a mere intersection

a = set([x[0] for x in close_cent_nodes])
b = set([x[0] for x in between_cent_nodes])
c = set([x[0] for x in eigenvec_cent_nodes])

d = a.intersection(b.intersection(c))

print([keys[i] for i in d])

['body', 'p', 'state', 'form', 'person', 'anything', 'used', 'part', 'power', 'see', 'v', 'n']
