In [1]:
from pyvis.network import Network
import pandas as pd
from collections import defaultdict

In [3]:
df = pd.read_csv("triplets.csv")

# 初始化 pyvis 图
net = Network(height="800px", width="100%", bgcolor="#222222", font_color="white", notebook=True)
net.toggle_physics(True)

# 统计节点频率
node_freq = defaultdict(int)
node_type = defaultdict(lambda: "Unknown")  # 论文/会议/关键词
for _, row in df.iterrows():
    h, r, t = row['head'], row['relation'], row['tail']
    node_freq[h] += 1
    node_freq[t] += 1

    if r == "published_in":
        node_type[h] = "Paper"
        node_type[t] = "Conference"
    elif r == "has_keyword":
        node_type[h] = "Paper"
        node_type[t] = "Keyword"
    elif r == "belongs_to":
        node_type[h] = "Keyword"
        node_type[t] = "Topic"

# 显示用颜色映射
color_map = {
    "Paper": "deepskyblue",
    "Conference": "orange",
    "Keyword": "lightgreen",
    "Topic": "violet",
    "Unknown": "gray"
}




In [4]:
def normalize_size(freq, min_size=10, max_size=40):
    min_freq = min(node_freq.values())
    max_freq = max(node_freq.values())
    if max_freq == min_freq:
        return (min_size + max_size) / 2
    return min_size + (freq - min_freq) / (max_freq - min_freq) * (max_size - min_size)

In [5]:
added_nodes = set()
for _, row in df.iterrows():
    h, r, t = row['head'], row['relation'], row['tail']

    for node in [h, t]:
        if node not in added_nodes:
            size = normalize_size(node_freq[node])
            net.add_node(node, label=node, title=node, size=size)
            added_nodes.add(node)

    net.add_edge(h, t, label=r)

In [6]:
net.show("knowledge_graph_max_color.html")
print("已生成")

knowledge_graph_max_color_max.html
已生成
