### In this notebook we find nodes with the highest degree and/or betweenness centrality in each community and manually investigate the those nodes to get a better understanding of what that community is about

In [1]:
import snap

from joblib import Parallel, delayed
from datetime import datetime

import ast, operator
from copy import deepcopy

import time, pandas as pd, pickle, json, networkx as nx, numpy as np
from networkx.readwrite import json_graph

In [2]:
data = json.load(open("../REST/static/networks/latest_tw_ntw.json"))
graph = json_graph.node_link_graph(data, directed=True)
del data

In [3]:
list(graph.nodes(data=True))[0]

(396662786,
 {'betweenness': 0.0,
  'closeness_centrality': 0.0,
  'clustering_coefficient': 1.0,
  'community': 'foci',
  'degree': 2,
  'eigenvector_centrality': 2.4260626929564234e-24,
  'followers_count': 33.0,
  'friends_count': 284.0,
  'in_degree': 0,
  'lang': 'tr',
  'match_name': ' Sehir MBA',
  'name': 'nemasehir',
  'out_degree': 2,
  'pagerank': 0.00018450710615505045,
  'screen_name': 'nemasehir'})

In [5]:
communities_dict = dict()  # {community: {user_id:(betweenness_centrality, degree)}}

for user_id, user_data in graph.nodes(data=True):
    
    community = user_data["community"]
    communities_dict.setdefault(community, dict())
    
    btw = user_data["betweenness"]
    deg = user_data["degree"]
    communities_dict[community][user_id] = (btw, deg)

### Getting top 5 users by betweenness in each community

In [38]:
def sort_com_by_users_metric(communities_dict, metric_idx, top_k=2):
    """comms: {community: {user_id:(metric 0, metric 1, ...)}}
    returns {community: [top k user_ids data sorted by metric at metric_idx]}
    e.g. [(179250667, (0.005290261280310729, 17)), (231908931, (0.0, 4))]"""
    sorted_coms = {comm: sorted(users_data.items(), key=lambda x:x[1][metric_idx], reverse=True)[:top_k]
               for comm, users_data in communities_dict.items()}
    return sorted_coms

In [39]:
sorted_coms_btw = sort_com_by_users_metric(communities_dict, 0, 5)
sorted_coms_btw['10']

[(179250667, (0.005290261280310729, 17)),
 (943572432, (0.0, 1)),
 (770506495, (0.0, 2)),
 (231908931, (0.0, 4)),
 (169209299, (0.0, 2))]

In [40]:
sorted_coms_btw = sort_com_by_users_metric(communities_dict, 1, 5)
sorted_coms_btw['10']

[(179250667, (0.005290261280310729, 17)),
 (231908931, (0.0, 4)),
 (770506495, (0.0, 2)),
 (169209299, (0.0, 2)),
 (358763677, (0.0, 2))]

### Investigating SCC dominant communities

In [43]:
scc_data = json.load(open("../REST/static/networks/SCC_graph.json"))
scc_graph = json_graph.node_link_graph(scc_data, directed=True)
del scc_data

In [57]:
def get_top_k_nodes_in_scc(communities_dict, scc_graph, metric_idx, top_k=2):
    """comms: {community: {user_id:(metric 0, metric 1, ...)}}
    returns {scc_id: [(user_id, (metric 0, metric 1, ...))] top k users sorted by metric_index}"""
    sorted_coms = sort_com_by_users_metric(communities_dict, metric_idx, top_k)
    
    influential_nodes_per_scc = dict()  # {scc: top_k_nodes}

    for n, d in scc_graph.nodes(data=True):
        if d['degree']>1:
            influential_nodes_per_scc[n] = sorted_coms[d['biggest_community']]
    return influential_nodes_per_scc

In [60]:
btw_index = 0
deg_index = 1

influential_nodes_per_scc = get_top_k_nodes_in_scc(communities_dict, scc_graph, deg_index, 2)
influential_nodes_per_scc

{0: [(377901136, (0.025097580520380414, 81)),
  (334139315, (0.018701713785200642, 62))],
 1: [(344090521, (0.007097456024185624, 20)), (52171453, (0.0, 3))],
 2: [(139032492, (0.002963211342957341, 11)),
  (805514119, (0.0012636631231703853, 4))],
 3: [(995806488, (0.03718076845945561, 143)),
  (294182546, (4.5512783121060325e-06, 5))],
 8: [(162310009, (0.0, 71)), (344339030, (5.01818911596035e-05, 8))],
 124: [(281127185, (0.0, 18)), (447455430, (0.0, 2))],
 209: [(995806488, (0.03718076845945561, 143)),
  (294182546, (4.5512783121060325e-06, 5))],
 211: [(281127185, (0.0, 18)), (447455430, (0.0, 2))],
 225: [(253616057, (9.122626071305532e-05, 20)), (740721020, (0.0, 2))],
 236: [(179250667, (0.005290261280310729, 17)), (231908931, (0.0, 4))],
 251: [(618131962, (0.0379104467976198, 176)),
  (164596213, (0.02038580064030419, 14))],
 265: [(139032492, (0.002963211342957341, 11)),
  (805514119, (0.0012636631231703853, 4))],
 286: [(162310009, (0.0, 71)), (344339030, (5.01818911596035