In [24]:
import pandas as pd
import networkx as nx
from networkx.algorithms import community
from IPython.display import display

In [25]:
def load_network():
    tn_edges = pd.read_csv('thrones-network.csv')
    tn_temp = nx.from_pandas_edgelist(tn_edges, source='Node A', target='Node B', edge_attr='Weight')
    tn = nx.to_undirected(tn_temp)
    return tn

In [26]:
def preprocess(tn):  # Q1
    remove = [edge for edge in tn.edges().items() if edge[1]['Weight'] < 7]
    remove_list = [remove[i][0] for i in range(len(remove))]
    tn.remove_edges_from(remove_list)
    isolated = list(nx.isolates(tn)) # isolate and remove the unconnected nodes
    tn.remove_nodes_from(isolated)
    return tn

In [27]:
def graph_attr(tn):
    clustering_coefficient = nx.clustering(tn)
    avg_cluster = nx.average_clustering(tn)
    density = nx.density(tn)
    diameter = nx.diameter(tn)
    avg_path_length = nx.average_shortest_path_length(tn)
    print('clustering for all: ', clustering_coefficient)
    print('average clustering: ', avg_cluster)
    print('density: ', density)
    print('diameter: ', diameter)
    print('avg path length: ', avg_path_length)

In [18]:
f = load_network()
a = nx.Graph(f)
tn = preprocess(a)
graph_attr(tn)

clustering for all:  {'Aemon': 1.0, 'Grenn': 0.6666666666666666, 'Samwell': 0.2545454545454545, 'Aerys': 1.0, 'Jaime': 0.20588235294117646, 'Robert': 0.6, 'Tyrion': 0.17647058823529413, 'Tywin': 0.37777777777777777, 'Alliser': 1.0, 'Mance': 0.3111111111111111, 'Oberyn': 1.0, 'Arya': 0.2564102564102564, 'Anguy': 0, 'Beric': 0.6666666666666666, 'Bran': 0.3181818181818182, 'Brynden': 1.0, 'Cersei': 0.4642857142857143, 'Gendry': 1.0, 'Gregor': 0.10714285714285714, 'Joffrey': 0.3333333333333333, 'Jon': 0.14210526315789473, 'Rickon': 0.7333333333333333, 'Sandor': 0.4, 'Thoros': 0.6666666666666666, 'Loras': 0.5333333333333333, 'Belwas': 0.5, 'Barristan': 1.0, 'Illyrio': 0, 'Hodor': 1.0, 'Jojen': 0.8333333333333334, 'Meera': 0.8333333333333334, 'Nan': 0, 'Theon': 0.8333333333333334, 'Brienne': 0.6666666666666666, 'Bronn': 1.0, 'Podrick': 0.6666666666666666, 'Lothar': 1.0, 'Walder': 0.6666666666666666, 'Catelyn': 0.26666666666666666, 'Edmure': 0.4, 'Hoster': 1.0, 'Jeyne': 1.0, 'Lysa': 0.3, 'Pet

In [28]:
def centrality(tn):
    degree_cen = nx.degree_centrality(tn)
    eigen_cen = nx.eigenvector_centrality(tn)
    closeness_cen = nx.closeness_centrality(tn)
    between_cen = nx.betweenness_centrality(tn)
    return degree_cen, eigen_cen, closeness_cen, between_cen

In [29]:
def top_10(degree, eigen, closeness, between):
    top_10_degree = dict(sorted(degree.items(), key=lambda x: x[1], reverse=True)[:10])
    top_10_eigen = dict(sorted(eigen.items(), key=lambda x: x[1], reverse=True)[:10])
    top_10_closeness = dict(sorted(closeness.items(), key=lambda x: x[1], reverse=True)[:10])
    top_10_between = dict(sorted(between.items(), key=lambda x: x[1], reverse=True)[:10])
    df_deg = pd.DataFrame.from_dict(top_10_degree, orient='index')
    df_eigen = pd.DataFrame.from_dict(top_10_eigen, orient='index')
    df_close = pd.DataFrame.from_dict(top_10_closeness, orient='index')
    df_between = pd.DataFrame.from_dict(top_10_between, orient='index')
    print('top ten degree centrality', df_deg)
    print('top ten eigen centrality', df_eigen)
    print('top ten closeness centrality', df_close)
    print('top ten between centrality', df_between)
    return top_10_between, top_10_closeness, top_10_eigen, top_10_degree

In [30]:
def correlation(between, closeness, eigen, degree):
    between_list = list(between.keys())
    closeness_list = list(closeness.keys())
    eigen_list = list(eigen.keys())
    degree_list = list(degree.keys())
    print('correlation between centrality measures: ',list(set(between_list) & set(closeness_list) & set(eigen_list) & set(degree_list)))


In [20]:
deg, eig, close, bet = centrality(tn)
top_10_be, top_10_cl, top_10_ei, top_10_de = top_10(deg, eig, close, bet)
correlation(top_10_be, top_10_cl, top_10_ei, top_10_de)

top ten degree centrality                  0
Jon       0.219780
Tyrion    0.197802
Robb      0.197802
Jaime     0.186813
Sansa     0.186813
Arya      0.142857
Daenerys  0.142857
Bran      0.131868
Joffrey   0.131868
Samwell   0.120879
top ten eigen centrality                 0
Robb     0.343685
Sansa    0.322489
Jaime    0.312683
Tyrion   0.292689
Joffrey  0.262783
Arya     0.236090
Tywin    0.219292
Bran     0.201478
Jon      0.198358
Cersei   0.189814


top ten closeness centrality                 0
Arya     0.402655
Robb     0.395652
Tyrion   0.382353
Sansa    0.372951
Tywin    0.371429
Jon      0.365462
Gregor   0.364000
Jaime    0.362550
Bran     0.362550
Joffrey  0.359684
top ten between centrality                  0
Gregor    0.313272
Elia      0.278388
Jon       0.271808
Rhaegar   0.263248
Daenerys  0.245299
Arya      0.238963
Robb      0.229551
Tyrion    0.182055
Sansa     0.137770
Jaime     0.116188
correlation between centrality measures:  ['Jaime', 'Tyrion', 'Sansa', 'Arya', 'Robb', 'Jon']


In [31]:
def find_communities(tn):
    communities_generator = community.girvan_newman(tn)
    for i in range(0,3):
        comm = tuple(sorted(c) for c in next(communities_generator))
        comm_dict = dict(enumerate(comm))
        final = dict()
        for key in comm_dict:
            for item in comm_dict[key]:
                final[item] = key
        final_df = pd.DataFrame.from_dict(final, orient='index')
        print('Partition '+str(i))
        display(final_df)

In [32]:
find_communities(tn)

Partition 0


Unnamed: 0,0
Aemon,0
Aerys,0
Alliser,0
Anguy,0
Arya,0
Beric,0
Bran,0
Brienne,0
Bronn,0
Brynden,0


Partition 1


Unnamed: 0,0
Aemon,0
Alliser,0
Bran,0
Craster,0
Dalla,0
Davos,0
Eddison,0
Gilly,0
Grenn,0
Hodor,0


Partition 2


Unnamed: 0,0
Aemon,0
Alliser,0
Bran,0
Craster,0
Dalla,0
Davos,0
Eddison,0
Gilly,0
Grenn,0
Hodor,0


In [None]:
def jaccard_lp(tn):
    pred_jc = nx.jaccard_coefficient(tn)
    pred_dict = {}
    for u, v, p in pred_jc:
        pred_dict[(u, v)] = p
    return sorted(pred_dict.items(), key=lambda x:x[1], reverse=True)[:10]

def preferential_lp(tn):
    pred_pa = nx.preferential_attachment(tn)
    pred_dict = {}
    for u, v, p in pred_pa:
        pred_dict[(u, v)] = p
    return sorted(pred_dict.items(), key=lambda x: x[1], reverse=True)[:10]