In [1]:
from sqlalchemy import create_engine, distinct
from sqlalchemy.orm import sessionmaker

from JamScrapy import config
from JamScrapy.preprocess.entity import Profile, People

import json

In [2]:
engine = create_engine(config.DB_CONNECT_STRING, max_overflow=5)
session = sessionmaker(bind=engine)()
profiles = session.query(Profile).all()

print('Total posts:', len(profiles))

Total posts: 1404


In [3]:
def add_node(dict, str, value):
    if str:
        jsons = json.loads(str)
        for item in jsons:
            name = item['name']
            if name not in dict:
                dict[name] = value
            else:
                dict[name] |= value

def add_link(list, filters, str, source=None, target=None, ban=False):
    if str:
        jsons = json.loads(str)
        if not ban or ban and (len(jsons) < 20):
            for item in jsons:
                name = item['name']               
                if (name in filters) and (name not in ['Francis Lui']):
                    if source is not None:
                        list.append({"source": source, "target": name})
                    elif target is not None:
                        list.append({"source": name, "target": target})

def get_key(dict, value):
    return [k for k, v in dict.items() if v >= value]

# binary value of dictionary nodes
- 10000 - Creator / Participator
- 01000 - Manager
- 00010 - Reporter
- 00100 - Follower
- 00001 - Following

In [4]:
creators = dict()
results = session.query(People).filter(People.roletype=='creator').all()
for p in results:
    if p.displayname not in creators:
        creators[p.displayname] = 1
    else:
        creators[p.displayname] += 1
        
print(len(creators))

people = dict()
for p in profiles:
    if p.displayname in get_key(creators, 2):
        people[p.displayname] = 0b10000
    
    add_node(people, p.managers, 0b01000)
    add_node(people, p.reports, 0b00010)
    add_node(people, p.followers, 0b00100)
    add_node(people, p.following, 0b00001)

print(len(people))    



1088
13425


In [5]:
# Classified by people relationship
#nodes = []
#for k, v in people.items(): 
#    if v >= 0b11111:
#        nodes.append({"name": k,"value": v,"symbolSize": 12,"category": 0})
#    elif v >= 0b10111:
#        nodes.append({"name": k,"value": v,"symbolSize": 10,"category": 1})
#    elif v >= 0b10011:
#        nodes.append({"name": k,"value": v,"symbolSize": 8,"category": 2})
#    elif v >= 0b10001:
#        nodes.append({"name": k,"value": v,"symbolSize": 6,"category": 3})
#    elif v == 0b10000:
#        nodes.append({"name": k,"value": v,"symbolSize": 4,"category": 4})        

#print(len(nodes))

In [6]:
# Classified by people relationship
nodes = []
for k, v in people.items(): 
    if k in creators:
        rank = int(creators[k])
        if rank >= 20:
            nodes.append({"name": k,"value": rank, "symbolSize": 15, "category": 0})
        elif rank >=15:
            nodes.append({"name": k,"value": rank, "symbolSize": 12, "category": 1})
        elif rank >=10:
            nodes.append({"name": k,"value": rank, "symbolSize": 10, "category": 2})
        elif rank >=5:
            nodes.append({"name": k,"value": rank, "symbolSize": 8, "category": 3})
        elif rank >1:
            nodes.append({"name": k,"value": rank, "symbolSize": 4, "category": 4})
        
print(len(nodes))

477


In [7]:
filters = get_key(creators, 2)
links = []
for p in profiles:
    add_link(links, filters, p.managers, target=p.displayname)
    add_link(links, filters, p.reports, source=p.displayname)
    add_link(links, filters, p.followers, target=p.displayname, ban=True)
    add_link(links, filters, p.following, source=p.displayname, ban=True)

print(len(links))

1022


In [8]:
with open("./nodes.json",'w',encoding='utf-8') as json_file:
    json.dump(nodes,json_file,ensure_ascii=False)

In [9]:
with open("./links.json",'w',encoding='utf-8') as json_file:
    json.dump(links,json_file,ensure_ascii=False)