In [1]:
import networkx as nx
import numpy as np
import joblib
import h5py

import matplotlib.pyplot as plt
from matplotlib import pylab

from src.interact_with_matrices import find_degree_for_word, open_object
from src.matrix_reduce import reduce_dimension_adjacency_matrix

from itertools import islice
import operator

# Utils

In [2]:
def take(n, iterable):
    return list(islice(iterable, n))

# Loading data

In [3]:
degree_matrix = open_object('data/degree_matrix.hdf5')
adjacency_matrix = open_object('data/adjacency_matrix.hdf5')

In [4]:
with open('data/keys.pkl', 'rb') as infile:
    keys = joblib.load(infile)

In [5]:
# Reducing the adjacency and degree matrices

"""
adjacency_matrix, degree_matrix, keys = reduce_dimension_adjacency_matrix(
    adjacency_matrix,
    degree_matrix,
    keys, 
    threshold=20
)
"""

'\nadjacency_matrix, degree_matrix, keys = reduce_dimension_adjacency_matrix(\n    adjacency_matrix,\n    degree_matrix,\n    keys, \n    threshold=20\n)\n'

In [6]:
G = nx.from_numpy_matrix(adjacency_matrix)

# Centrality indicators

In [7]:
def print_k_best_closeness_centrality(G,
                                      keys,
                                      k=20):
        
    closeness_centrality = nx.closeness_centrality(G)
    k_best = dict(
        sorted(
            closeness_centrality.items(), 
            key=operator.itemgetter(1),
            reverse=True
        )
    )
    
    return take(k, k_best.items())

In [8]:
def print_k_best_betweenness_centrality(G,
                                        keys,
                                        k=20):
    
    betweenness_centrality = nx.betweenness_centrality(G)
    k_best = dict(
        sorted(
            betweenness_centrality.items(),
            key=operator.itemgetter(1),
            reverse=True
        )
    )
    
    return take(k, k_best.items())

In [9]:
def print_k_best_eigenvector_centrality(G,
                                        keys,
                                        k=20):
    
    eigenvector_centrality = nx.eigenvector_centrality(G)
    k_best = dict(
        sorted(
            eigenvector_centrality.items(),
            key=operator.itemgetter(1),
            reverse=True
        )
    )
    
    return take(k, k_best.items())

In [10]:
close_cent_nodes = print_k_best_closeness_centrality(
    G,
    keys,
    k=40
)

for k, v in close_cent_nodes:
    print(f'({keys[k]}, {v:.2f})')

(act, 0.37)
(use, 0.37)
(anything, 0.36)
(state, 0.36)
(part, 0.36)
(person, 0.35)
(place, 0.35)
(thing, 0.35)
(body, 0.35)
(make, 0.34)
(see, 0.34)
(power, 0.34)
(action, 0.34)
(form, 0.34)
(end, 0.34)
(purpose, 0.33)
(from, 0.33)
(animal, 0.33)
(cause, 0.33)
(something, 0.33)
(life, 0.33)
(give, 0.33)
(house, 0.33)
(motion, 0.33)
(object, 0.33)
(system, 0.33)
(substance, 0.33)
(kind, 0.33)
(statute, 0.33)
(hence, 0.33)
(time, 0.32)
(physical, 0.32)
(machine, 0.32)
(composition, 0.32)
(natural, 0.32)
(consist, 0.32)
(authority, 0.32)
(man, 0.32)
(passage, 0.32)
(manner, 0.32)


In [11]:
between_cent_nodes = print_k_best_betweenness_centrality(
    G,
    keys,
    k=40
)

for k, v in between_cent_nodes:
    print(f'({keys[k]}, {v:.2f})')

(act, 0.08)
(use, 0.05)
(see, 0.05)
(state, 0.04)
(part, 0.04)
(person, 0.03)
(place, 0.03)
(make, 0.03)
(anything, 0.03)
(body, 0.02)
(thing, 0.02)
(manner, 0.02)
(alt, 0.02)
(form, 0.02)
(time, 0.01)
(give, 0.01)
(animal, 0.01)
(power, 0.01)
(hence, 0.01)
(end, 0.01)
(small, 0.01)
(something, 0.01)
(house, 0.01)
(cause, 0.01)
(purpose, 0.01)
(quality, 0.01)
(kind, 0.01)
(man, 0.01)
(good, 0.01)
(action, 0.01)
(life, 0.01)
(consist, 0.01)
(name, 0.01)
(word, 0.01)
(large, 0.01)
(substance, 0.01)
(sense, 0.01)
(object, 0.01)
(authority, 0.01)
(order, 0.01)


In [12]:
eigenvec_cent_nodes = print_k_best_eigenvector_centrality(
    G,
    keys,
    k=40
)

for k, v in eigenvec_cent_nodes:
    print(f'({keys[k]}, {v:.2f})')

(act, 0.35)
(state, 0.23)
(use, 0.16)
(part, 0.12)
(anything, 0.12)
(person, 0.12)
(body, 0.12)
(thing, 0.11)
(place, 0.10)
(power, 0.09)
(make, 0.08)
(form, 0.08)
(action, 0.07)
(process, 0.07)
(give, 0.07)
(quality, 0.07)
(condition, 0.06)
(law, 0.06)
(motion, 0.06)
(animal, 0.06)
(authority, 0.06)
(cause, 0.06)
(purpose, 0.06)
(see, 0.06)
(substance, 0.05)
(end, 0.05)
(life, 0.05)
(time, 0.05)
(object, 0.05)
(system, 0.05)
(change, 0.05)
(manner, 0.05)
(knowledge, 0.05)
(something, 0.05)
(from, 0.05)
(machine, 0.05)
(physical, 0.05)
(mind, 0.05)
(consist, 0.04)
(strength, 0.04)


# Combining the centrality indicators

In [13]:
# Doing a mere intersection

a = set([x[0] for x in close_cent_nodes])
b = set([x[0] for x in between_cent_nodes])
c = set([x[0] for x in eigenvec_cent_nodes])

d = a.intersection(b.intersection(c))

print([keys[i] for i in d])

['consist', 'something', 'life', 'purpose', 'make', 'cause', 'see', 'anything', 'object', 'manner', 'body', 'end', 'person', 'thing', 'act', 'action', 'authority', 'form', 'substance', 'use', 'part', 'power', 'state', 'give', 'place', 'time', 'animal']
