In [None]:
import pandas as pd
from collections import defaultdict 

# Load the CSV file to check its content
df = pd.read_csv('persona.csv')

# Display the first few rows of the dataframe to understand its structure
df.head()

In [None]:
# 處理標籤欄位，確保每個關鍵字都被正確的分割為獨立的節點
df['標籤'] = df['標籤'].str.replace('、', ',').str.split(',')

# 創建關鍵字與ID之間的映射對照表
keyword_to_ids = defaultdict(list)
id_to_keywords = defaultdict(list)

for index, row in df.iterrows():
    id = row['id']
    id_to_keywords[id].append
    keywords = [keyword.strip() for keyword in row['標籤']]
    for keyword in keywords:
        keyword_to_ids[keyword].append(id)

In [None]:
id_to_keywords


In [None]:
import networkx as nx
import matplotlib.pyplot as plt

# 使用 networkx
G = nx.Graph()

# 加點
for id in id_to_keywords:
    G.add_node(id, type='id')
for keyword in keyword_to_ids:
    G.add_node(keyword, type='keyword')

# 加邊
for keyword, ids in keyword_to_ids.items():
    for id in ids:
        G.add_edge(id, keyword)

# 畫圖
plt.figure(figsize=(20, 20))
pos = nx.spring_layout(G, k=1, iterations=40)
nx.draw_networkx_nodes(G, pos, nodelist=id_to_keywords.keys(), node_color='lightblue', label='id')
nx.draw_networkx_nodes(G, pos, nodelist=keyword_to_ids.keys(), node_color='lightgreen', label='keyword')
nx.draw_networkx_edges(G, pos, width=2.0, alpha=0.5)
nx.draw_networkx_labels(G, pos, font_size=16, font_family='SimSun')

plt.axis('off')
plt.legend(scatterpoints=1)
plt.show()


In [None]:
import networkx as nx
import matplotlib.pyplot as plt
from community import community_louvain

partition = community_louvain.best_partition(G)

community_colors = {node: partition[node] for node in G.nodes()}
values = [community_colors[node] for node in G.nodes()]

node_sizes = [100 * G.degree(node) for node in G.nodes()]

pos = nx.spring_layout(G, k=0.15, iterations=40) 

plt.figure(figsize=(30, 30))
nx.draw_networkx_edges(G, pos, alpha=0.5)
nx.draw_networkx_nodes(G, pos, node_color=values, node_size=node_sizes, cmap=plt.cm.jet)
nx.draw_networkx_labels(G, pos, font_size=20, font_family='SimSun')

plt.axis('off')
plt.show()