In [1]:
import Preprocess # Preprocess.py 필요!
# LDA,py 들어올 곳
import networkx as nx
import pyvis as pyv
import pickle as pkl

import numpy as np
import matplotlib.pyplot as plt

In [2]:
def network(plaintext, sep='HOTKEY123!@#', maxEdge=50, saveFilename='network',
            returnEnglishMorph=False,
            targetMorphs=['NNP','NNG'],
            morphWeight={'NNG':1,'NNP':10,'emj':0},skipList=['\n'],
            minlength=2,
            kwdWeightsFile = None):
    
    data = Preprocess.preprocess(plaintext=plaintext,sep=sep,
                                 targetMorphs=targetMorphs,returnMorph=True,returnEnglishMorph=returnEnglishMorph)
    if kwdWeightsFile:
        with open(kwdWeightsFile,'rb') as file:
            kwdWeights = pkl.load(file)
        for weights in kwdWeights:
            npv = np.array(list(weights.values()), dtype='float16')
            npv /= np.sum(npv)
            for idx,key in enumerate(list(weights.keys())):
                weights[key] = npv[idx]
        
        topicTitles = list()
        for topics in kwdWeights:
            title=""
            ks = list(topics.keys())
            vs = list(topics.values())
            for n in range(5):
                idx = vs.index(max(vs))
                title+="%s_"%(ks[idx])
                ks.pop(idx)
                vs.pop(idx)
            topicTitles.append(title)
    
    kwdPair, kwdMorphs, kwdTopics = kwd_paring(data, skipList=skipList, morphWeight=morphWeight,minlength=minlength, kwdWeights=kwdWeights)
    topPair = get_top_pair(kwdPair, maxEdge=maxEdge)
    nxG = darw_networkx_network(topPair, kwdMorphs, kwdTopics, topicTitles)
    pyvis_network_html(nxG, name=saveFilename)

In [3]:
def kwd_paring(data, kwdWeights = None, skipList=['\n'], morphWeight={'NNG':1,'NNP':10,'emj':0}, minlength=2,):
    kwdMorphs = dict()
    kwdPair = dict()
    kwdTopics = dict()
    for post in data:
        if len(post) < minlength:
            continue
            
        uniqueKwds = list(set(post))
        uklen = len(uniqueKwds)
        
        for idx1 in range(0,uklen-1):
            kwd1 = uniqueKwds[idx1][0]
            morp1 = uniqueKwds[idx1][1]

            try:
                kwdMorphs[kwd1][morp1]+=1
            except:
                try:
                    kwdMorphs[kwd1][morp1]=1
                except:
                    kwdMorphs[kwd1]=dict()
                    kwdMorphs[kwd1][morp1]=1

                    
            try:
                kwd1topic = kwdTopics[kwd1]
            except:
                kwd1topic = 0
                highestValue = -1
                for topic in range(len(kwdWeights)):    
                    try:
                        weight = kwdWeights[topic][kwd1]
                    except:
                        weight = 0
                    if weight > highestValue:
                        highestValue = weight
                        kwd1topic = topic+1
                kwdTopics[kwd1] = kwd1topic

            for idx2 in range(idx1+1, uklen):
                kwd2 = uniqueKwds[idx2][0]
                morp2 = uniqueKwds[idx2][1]

                try:
                    kwd2topic = kwdTopics[kwd2]
                except:
                    kwd2topic = 0
                    highestValue = -1
                    for topic in range(len(kwdWeights)):    
                        try:
                            weight = kwdWeights[topic][kwd2]
                        except:
                            weight = 0
                        if weight > highestValue:
                            highestValue = weight
                            kwd2topic = topic+1
                    kwdTopics[kwd2] = kwd2topic  
                
                if kwd1 == kwd2:
                    continue
                if len(kwd1) == 1:
                    continue
                if len(kwd2) == 1:
                    continue
                if kwd1 in skipList:
                    continue
                if kwd2 in skipList:
                    continue
                try:
                    weight = morphWeight[morp1] * morphWeight[morp2]
                except:
                    weight = 0
                
                '''
                if kwd1topic == kwd2topic:
                    weight += 10'''
                
                try:
                    kwdPair['%s<*>%s'%(kwd2,kwd1)]+=weight
                except:
                    try:
                        kwdPair['%s<*>%s'%(kwd1,kwd2)]+=weight
                    except:
                        kwdPair['%s<*>%s'%(kwd1,kwd2)]=weight
        try:
            kwdMorphs[uniqueKwds[idx2]][morp2]+=1
        except:
            try:
                kwdMorphs[kwd2][morp2]=1
            except:
                kwdMorphs[kwd2]=dict()
                kwdMorphs[kwd2][morp2]=1
    
    return kwdPair, kwdMorphs, kwdTopics

In [4]:
def get_top_pair(kwdPair, maxEdge=50):
    topPair = dict()

    ks = list(kwdPair.keys())
    vs = list(kwdPair.values())
    for _ in range(maxEdge):    
        w = max(list(vs))
        idx = vs.index(w)

        nodes = ks[idx].split('<*>')
        node1, node2 = nodes[0], nodes[1]

        try:
            topPair[node1][node2] = w
        except:
            topPair[node1] = dict()
            topPair[node1][node2] = w

        ks.pop(idx)
        vs.pop(idx)
    
    return topPair

In [5]:
def darw_networkx_network(topPair, kwdMorphs, kwdTopics, topicTitles):
    countmax=-1
    countmin=100000
    for kwdFrom in topPair.keys():
        for kwdTo in topPair[kwdFrom].keys():
            weight = topPair[kwdFrom][kwdTo]
            if weight > countmax:
                countmax=weight
            if weight < countmin:
                countmin=weight

    G = nx.Graph(topPair)

    localConnectivity = dict()
    for pair in list(G.edges.keys()):
        try:
            localConnectivity[pair[0]]+=1
        except:
            localConnectivity[pair[0]]=1
        try:
            localConnectivity[pair[1]]+=1
        except:
            localConnectivity[pair[1]]=1

    for edge in G.edges.data():
        try:
            G.edges[edge[0],edge[1]]['weight'] = ((topPair[edge[0]][edge[1]] - countmin)/(countmax-countmin))*3+2        
        except:
            G.edges[edge[0],edge[1]]['weight'] = ((topPair[edge[1]][edge[0]] - countmin)/(countmax-countmin))*3+2

    for node in G.nodes.data():
        if list(kwdMorphs[node[0]].keys())[list(kwdMorphs[node[0]].values()).index(max(list(kwdMorphs[node[0]].values())))] == 'NNP':
            G.nodes[node[0]]['shape']='diamond'
        if list(kwdMorphs[node[0]].keys())[list(kwdMorphs[node[0]].values()).index(max(list(kwdMorphs[node[0]].values())))] == 'NNG':
            G.nodes[node[0]]['shape']='triangle'

        lc = localConnectivity[node[0]]
        if lc<5:
            G.nodes[node[0]]['size'] = 10
        elif lc<10:
            G.nodes[node[0]]['size'] = 20
        else:
            G.nodes[node[0]]['size'] = 30
        
        
        topic = kwdTopics[node[0]]
        G.nodes[node[0]]['title'] = str("토픽 %s : %s"%(topic,topicTitles[topic-1]))
        if topic == 1:
            G.nodes[node[0]]['color'] = "#ff6622"
        if topic == 2:
            G.nodes[node[0]]['color'] = "#22ff66"
        if topic == 3:
            G.nodes[node[0]]['color'] = "#6622ff"
         
    return G

In [6]:
def pyvis_network_html(G, name='network'):
    nt = pyv.network.Network(height="1000px", width="100%",bgcolor="#222222",font_color="white",filter_menu=True)
    nt.show_buttons(filter_=['physics'])
    nt.from_nx(G)
    nt.save_graph('%s.html'%name)