In [6]:
import os, sys, email
import json
import time
import datetime
import ast

import numpy as np
import pandas as pd

import community
import networkx as nx

from tqdm import tqdm

In [7]:
TOPIC_KEYWORD = 'email'
POSTS_THRESHOLD = 0 # involve all people
LINKS_THRESHOLD = 20
DEGREE_THRESHOLD = 5
HIGH_CONTRIBUTION_THRESHOLD = 60

# DB_CONNECT_STRING = 'mysql+pymysql://root:Initial0@10.58.78.253:3306/nexus?charset=utf8mb4'

# engine = create_engine(DB_CONNECT_STRING, max_overflow=5)
FILEPATH = './output/external-people-email.json'

In [8]:
class CommunityStructure:
    def __init__(self, keyword, filepath):
        self.nodes = []
        self.links = []
        self.graph = nx.Graph(name='community-network')
        self.partition = []
        self.community_size = dict()
        self.keyword = keyword
#         self.timestamp = timestamp
#         self.nonorg = nonorg
        self.source_file_path = filepath
        

    def set_nodes_with_links(self):
        if not os.path.exists(self.source_file_path):
            return

        with open(self.source_file_path) as json_file:
            ds = json.load(json_file)
            self.nodes = ds['nodes']
            self.links = ds['links']

        print('nodes:', len(self.nodes), 'links:', len(self.links))

    def community_analysis(self):
        if len(self.nodes) == 0 or len(self.links) == 0:
            return

        # g = nx.Graph(name='community-network')
        for item in self.links:
            self.graph.add_edge(item['source'], item['target'])

        self.partition = community.best_partition(self.graph)

        print('partition:', len(self.partition))

        if len(self.partition) > 0:
            for k, v in self.partition.items():
                # v -> ###### string
                str_v = str(v).zfill(6)
                if str_v in self.community_size:
                    self.community_size[str_v] += 1
                else:
                    self.community_size[str_v] = 1

        print('community:', len(self.community_size))

    def set_node_community_attr(self):
        if len(self.nodes) == 0 or len(self.links) == 0 or len(self.partition) == 0:
            return

        for node in self.nodes:
            if node['name'] in self.partition:
                node['community'] = str(self.partition[node['name']]).zfill(6)
                node['community_size'] = self.community_size[node['community']]

    def export_dataset(self):
        result = {"nodes": self.nodes, "links": self.links}
        with open(self.source_file_path, 'w', encoding='utf-8') as json_file:
            json.dump(result, json_file, ensure_ascii=False)

        return result

In [9]:
ds = CommunityStructure(TOPIC_KEYWORD, FILEPATH)

ds.set_nodes_with_links()
ds.community_analysis()
ds.set_node_community_attr()
final_result = ds.export_dataset()


nodes: 150 links: 2003
partition: 140
community: 7


In [10]:
final_result

{'nodes': [{'name': 'dean-c',
   'username': 'dean-c',
   'displayname': 'dean-c',
   'boardarea': 'None',
   'functionalarea': 'None',
   'costcenter': 'None',
   'officelocation': 'None',
   'localinfo': 'None',
   'region': 'None',
   'city': 'None',
   'profile': 'None',
   'email': 'None',
   'mobile': 'None',
   'value': 59.54,
   'posts': 2429,
   'comments': 12914,
   'degree': 15.11,
   'betweenness': 6.22,
   'closeness': 67.8,
   'networktype': 'Soloists',
   'category': 'Soloists',
   'community': '000004',
   'community_size': 18},
  {'name': 'kitchen-l',
   'username': 'kitchen-l',
   'displayname': 'kitchen-l',
   'boardarea': 'None',
   'functionalarea': 'None',
   'costcenter': 'None',
   'officelocation': 'None',
   'localinfo': 'None',
   'region': 'None',
   'city': 'None',
   'profile': 'None',
   'email': 'None',
   'mobile': 'None',
   'value': 39.75,
   'posts': 5546,
   'comments': 23657,
   'degree': 52.37,
   'betweenness': 4.24,
   'closeness': 83.16,
   'ne