# Social Network

node: 用戶 (為了簡化整個網路 edge>2 才繪出)
edge: 回覆貼文or引用貼文的有向邊

In [6]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
from tqdm import tqdm

# 讀取數據
user_df = pd.read_csv('users.csv')
tweet_df = pd.read_csv('tweets.csv')

# 通過 userId 把兩個DataSet合併
merged_df = pd.merge(tweet_df, user_df, how="inner", on="userId")

# 建立一個空的有向圖
G = nx.DiGraph()

# 添加節點（這裡假設每個用戶是一個節點）
for index, row in tqdm(user_df.iterrows(), total=user_df.shape[0], desc="Adding nodes"):
    G.add_node(row['username'])

# 添加邊（這裡假設每個回覆或引用是一個邊）
# 這裡假設我們有一個指向用戶的回覆或引用的數據，例如 `mentionedUsers`
for index, row in tqdm(merged_df.iterrows(), total=merged_df.shape[0], desc="Adding edges"):
    if pd.notnull(row['mentionedUsers']):
        # 應該解析 `mentionedUsers` 並添加所有相應的邊
        # 這裡只是一個範例，可能需要根據 `mentionedUsers` 的實際結構進行調整
        for mentioned_user in row['mentionedUsers'].split(';'):
            G.add_edge(row['username'], mentioned_user)


Adding nodes: 100%|█████████████████████████████████████████████████████████| 235660/235660 [00:08<00:00, 28077.89it/s]
Adding edges: 100%|███████████████████████████████████████████████████████| 2524900/2524900 [01:43<00:00, 24374.59it/s]


In [8]:
# 簡化網絡 大於2的才畫
degree = dict(G.degree())
to_keep = [n for n in degree if degree[n] > 2]  
G2 = G.subgraph(to_keep)

# 使用 Fruchterman-Reingold force-directed algorithm
#pos = nx.spring_layout(G2, seed=42)  
#plt.figure(figsize=(10, 10))  

# 分別畫node,edge
#nx.draw_networkx_nodes(G2, pos, node_size=20)
#nx.draw_networkx_edges(G2, pos, alpha=0.1)

#plt.axis("off")
#plt.show()

# social community

用louvain算法

In [None]:
import community as community_louvain

# 將有向圖轉化為無向圖
G2_un = G2.to_undirected()

# 使用Louvain方法找出最佳劃分
partition = community_louvain.best_partition(G2_un)

pos = nx.spring_layout(G)
cmap = cm.get_cmap('viridis', max(partition.values()) + 1)
nx.draw_networkx_nodes(G, pos, partition.keys(), node_size=100, cmap=cmap, node_color=list(partition.values()))
nx.draw_networkx_edges(G, pos, alpha=0.5)
plt.show()

# Degree Centrality

In [None]:
degree_centrality = nx.degree_centrality(G)

plt.figure()
plt.hist(list(degree_centrality.values()))
plt.title('Degree Centrality')
plt.show()