# 共起ネットワーク

## 前準備

必要なライブラリのインポート

In [None]:
import csv

import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt

from multiprocessing import Pool

関数の定義

In [None]:
def dftodict(df):
    return {pair: (count, jaccrad) for pair, count, jaccrad in df.itertuples()}

In [None]:
def load(path):
    df = pd.read_csv(path)
    df.drop(df.columns[0], axis='columns', inplace=True)
    df.set_index(['用語1', '用語2'], inplace=True)
    return dftodict(df)

データの読み込み

In [None]:
cas = pd.read_csv('src/categories.csv').set_index('name')

## プロット

In [None]:
def project(values, floor, ceil):
    if len(values) == 0:
        return[]
    
    min_value = min(values)
    max_value = max(values)

    src_range = max_value - min_value
    dst_range = ceil - floor

    if (src_range == 0):
        src_range = min_value

    if (src_range == 0):
        src_range = 1

    projected_values = []
    for value in values:

        projected_value = value
        projected_value -= min_value
        projected_value /= src_range
        projected_value *= dst_range
        projected_value += floor

        projected_values.append(projected_value)

    return projected_values

In [None]:
def conet(jaccard_dict, output_path, min_count=4, edge_th=0.2):
    G = nx.Graph()
    
    for (term1, term2), (count, coef) in jaccard_dict.items():
        if count < min_count or coef < edge_th:
            continue

        G.add_edge(term1, term2, penwidth=coef)

    pageranks = nx.pagerank(G, weight='penwidth')
    node_colors = project(pageranks.values(), 0, 1)

    agraph = nx.nx_agraph.to_agraph(G)

    agraph.node_attr["style"] = "filled"
    agraph.node_attr["shape"] = "circle"
    # agraph.node_attr["fontcolor"] = "#FFFFFF"

    for v, color in zip(pageranks, node_colors):
        rgba = plt.cm.terrain(color)
        r, g, b, a = [int(x * 255) for x in rgba]
        agraph.get_node(v).attr['fillcolor'] = f"#{r:2x}{g:2x}{b:2x}{a:2x}"

    # agraph.draw(output_path, prog="dot", format="svg")
    # agraph.draw(output_path, prog="fdp", format="svg")
    agraph.draw(output_path, prog="neato", format="svg")

    return
    
    positions = nx.drawing.nx_agraph.graphviz_layout(
        G,
        prog='neato',
        args=f"-Gsep=`+6` -Gnodesep=1 -GpackMode=`graph`",
    )

    node_scales = project(pageranks.values(), 1, 2)

    plt.figure(figsize=(30, 15))
    
    nx.draw_networkx_nodes(
        G,
        positions,
        node_color=node_colors,
        cmap=plt.cm.terrain,
        alpha=1.0,
        node_size=[500 * scale for scale in node_scales],
        edgecolors='black'
    )
    nx.draw_networkx_edges(
        G,
        positions,
        alpha=0.7,
        edge_color='black',
        width=[weight * 8 for _, _, weight in G.edges(data='weight')]
    )
    nx.draw_networkx_labels(
        G,
        positions,
        font_family='IPAexGothic',
        font_size=6
    )

    plt.axis('off')
    plt.tight_layout()
    plt.savefig(output_path, bbox_inches='tight')

In [None]:
def conet(jaccard_iter, output_path, edge_th):
    G = nx.Graph()

    for term1, term2, _, coef in jaccard_iter:
        coef = float(coef)
        if coef < edge_th:
            continue
        
        # THIS IS A BUG OF PYGRAPHVIZ #########################################
        term1 = term1.replace('%', '\%')
        term2 = term2.replace('%', '\%')
        #######################################################################

        G.add_edge(term1, term2, penwidth=coef * 10)

    pageranks = nx.pagerank(G, weight='penwidth')
    node_colors = project(pageranks.values(), 0, 1)

    agraph = nx.nx_agraph.to_agraph(G)

    agraph.node_attr["style"] = "filled"
    agraph.node_attr["shape"] = "circle"
    agraph.edge_attr['len'] = 5

    for v, color in zip(pageranks, node_colors):
        rgba = plt.cm.terrain(color)
        r, g, b, a = [int(x * 255) for x in rgba]
        agraph.get_node(v).attr['fillcolor'] = f"#{r:2x}{g:2x}{b:2x}{a:2x}"

    agraph.draw(output_path, format="svg", prog="neato")

In [None]:
context = [
    ('借金', 0.5),
    ('交通事故', 0.5),
    ('離婚・男女問題', 0.5),
    ('相続', 0.5),
    ('労働', 0.5),
    ('債権回収', 0.5),
    ('医療', 0.5),
    ('消費者被害', 0.5),
    ('税務訴訟', 0.2),
    ('国際・外国人問題', 0.2),
    ('民事・その他', 0.2),
    ('インターネット', 0.5),
    ('犯罪・刑事事件', 0.5),
    ('不動産・建築', 0.5),
    ('企業法務', 0.5),
    ('行政事件', 0.5),
    ('民事紛争の解決手続き', 0.5),
    ('qus_title_terms', 0.2),
    ('qus_body_terms', 0.5),
    ('ans_body_terms', 0.5),
    ('all_terms', 0.5)
]

In [None]:
def loadconet(filename, th):
    input_path = f"dst/jaccard/{filename}.csv"
    output_path = f"dst/conet/{filename}.svg"

    # jaccard = load(f"dst/jaccard/{filename}.csv")
    # conet(jaccard, f"dst/conet/{filename}.svg", edge_th=th)
    
    with open(input_path, 'r', encoding='utf8') as f:
        header = f.readline().strip()
        if header != '用語1,用語2,出現回数,Jaccard':
            if input('The header looks wrong! Continue? (Y/n): ') != 'Y':
                return
                
        conet(csv.reader(f), output_path, edge_th=th)

    print('Done:', filename)

In [None]:
for filename, th in context:
    loadconet(filename, th)

In [None]:
# if __name__ == '__main__':
#     with Pool(3) as p:
#         p.starmap(loadconet, context)