In [1]:
import networkx as nx
from yfiles_jupyter_graphs import GraphWidget
from graphrag.index.operations.cluster_graph import cluster_graph
import pandas as pd

# converts the entities dataframe to a list of dicts for yfiles-jupyter-graphs
def convert_entities_to_dicts(nodes, df, level=0):
    """Convert the entities dataframe to a list of dicts for yfiles-jupyter-graphs."""
    nodes_dict = {}
    for node in nodes:
        # Create a dictionary for each row and collect unique nodes
        community_df = df[(df['title']==node['properties']['label']) & (df["level"]==level)]
        if len(community_df) > 0:
            community_info = community_df.iloc[0].to_dict()
        else:
            community_info = {"community": None, "title": node['properties']['label']}
        node["properties"].update(community_info)
    return nodes

# map community to a color
def community_to_color(community):
    """Map a community to a color."""
    colors = [
        "crimson",
        "darkorange",
        "indigo",
        "cornflowerblue",
        "cyan",
        "teal",
        "green",
        "gold",
        "brown",
    ]
    return (
        colors[int(community) % len(colors)] if community is not None else "lightgray"
    )


def edge_to_source_community(edge):
    """Get the community of the source node of an edge."""
    source_node = next(
        (entry for entry in w.nodes if entry["id"] == edge["start"]),
        None,
    )
    source_node_community = source_node["properties"]["community"]
    return source_node_community if source_node_community is not None else None


dataset = "rag"
filepath = f'./{dataset}/graph_chunk_entity_relation.graphml'

# GraphWidget インスタンスの作成
G = nx.read_graphml(filepath)
w = GraphWidget(graph=G)

# クラスタリングを実行
strategy = {
    "type": "leiden",
    "max_cluster_size": 10,  # クラスタの最大サイズ
    "use_lcc": True,         # 最大全結合成分のみを使用
    "seed": 0xDEADBEEF,      # ランダムシード
    "levels": None,          # すべてのレベルを使用
    "verbose": True          # ログを表示
}

communities = cluster_graph(G, strategy)
base_communities = pd.DataFrame(
  communities, columns=pd.Index(["level", "community", "parent", "title"])
).explode("title")
base_communities["community"] = base_communities["community"].astype(int)

w.directed = True
w.nodes = convert_entities_to_dicts(w.get_nodes(), base_communities, level=0)

# show title on the node
w.node_label_mapping = "label"
w.node_color_mapping = lambda node: community_to_color(node["properties"]["community"])
w.edge_color_mapping = lambda edge: community_to_color(edge_to_source_community(edge))
# use weight for edge thickness
w.edge_thickness_factor_mapping = "weight"

# 結果を表示
w.circular_layout()
w.show()

GraphWidget(layout=Layout(height='800px', width='100%'))

In [125]:
display(w.get_nodes()[0])
print("="*200)
display(base_communities[base_communities["title"]=='"HTML"'])
print("="*200)
display(w.get_edges()[0])

{'id': 0,
 'properties': {'entity_type': '"CATEGORY"',
  'description': '"HTML は、Webページを作成するためのマークアップ言語です。Streamlitでは、HTMLを直接記述する必要がありません。"<SEP>"Webページの構造を作るためのマークアップ言語。Streamlitでは不要。"',
  'source_id': 'chunk-1d8cebb67957c49908664e642b8995c1<SEP>chunk-6c8e35d01b434c6d98c0dd5f33f7ca37',
  'label': '"HTML"',
  'level': 0,
  'community': 4,
  'parent': -1,
  'title': '"HTML"'},
 'color': 'cyan',
 'styles': {},
 'label': '"HTML"',
 'scale_factor': 1.0,
 'type': 'cyan',
 'size': (55.0, 55.0),
 'position': (0.0, 0.0)}



Unnamed: 0,level,community,parent,title
4,0,4,-1,"""HTML"""
20,1,20,4,"""HTML"""




{'id': 0,
 'start': 0,
 'end': 44,
 'properties': {'weight': 9.0,
  'description': '"Streamlitを使うことで、HTMLの知識がなくてもWebアプリを作成できる。"',
  'keywords': '"代替技術"',
  'source_id': 'chunk-6c8e35d01b434c6d98c0dd5f33f7ca37'},
 'color': 'cyan',
 'thickness_factor': 9.0,
 'directed': True,
 'styles': {},
 'label': ''}