### 2. 随机颜色（不考虑流动类型）

In [5]:
import pandas as pd
import matplotlib.colors as mcolors
import plotly.graph_objects as go

# 1. 路径（自己改）
data_path   = './data.xlsx'   # 你的 Excel 路径
out_html    = './my_sankey.html'

# 2. 读数据
df = pd.read_excel(data_path, engine='openpyxl')

# 3. 构造节点列表 & 映射
all_nodes = list(pd.concat([df['source'], df['target']]).unique())
node_map  = {n: i for i, n in enumerate(all_nodes)}


# 5. 准备 Sankey 三件套
sources = df['source'].map(node_map)
targets = df['target'].map(node_map)
values  = df['num']

# 6. 渐变色边（同模板）
C0, mid, C1 = '#ff7f0e', '#d3d3d3', '#1f77b4'
cmap = mcolors.LinearSegmentedColormap.from_list('cool', [C0, mid, C1])
alpha = 0.825
def remove_prefix(keyword):
    return keyword[2:] if keyword.startswith(('s_', 'p_', 't_')) else keyword
def grad_color(val):
    r, g, b, _ = cmap(val)
    return f'rgba({int(r*255)},{int(g*255)},{int(b*255)},{alpha})'

# 7. 给每条边配一个颜色（这里用随机渐变示例，可改逻辑）
import random
random.seed(42)
link_colors = [grad_color(random.random()) for _ in range(len(df))]

# 8. 节点标签 & 颜色
node_labels = [remove_prefix(n) for n in all_nodes]
node_colors = ['rgba(128,128,128,0.75)'] * len(all_nodes)

# 9. 画图
fig = go.Figure(go.Sankey(
    node=dict(pad=15, thickness=30, line=dict(color='black', width=0),
              label=node_labels, color=node_colors),
    link=dict(source=sources, target=targets, value=values, color=link_colors)
))

# 10. 顶部三栏标题（同模板）
for text, x in [('Urban Science', 0.025),
                ('Real-World Problems', 0.5),
                ('Urban Technology', 0.975)]:
    fig.add_annotation(text=text, x=x, y=1.0475, xref='paper', yref='paper',
                       showarrow=False, font=dict(size=45, color='black'))

# 11. 版面
fig.update_layout(
    font=dict(family='Times New Roman', size=36, color='black'),
    width=2000, height=2000,
    paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(0,0,0,0)'
)

# 12. 保存 & 展示
fig.write_html(out_html)
print('已生成', out_html)
fig.show()   # 若在无 GUI 环境可注释掉

已生成 ./my_sankey.html


### 1. 按流动A占比划分颜色

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import pickle
from collections import defaultdict
import matplotlib.colors as mcolors
import plotly.graph_objects as go

# 1. 路径（自己改）
data_path   = './data.xlsx'   # 你的 Excel 路径
out_html    = './my_sankey.html'

# 2. 读数据
df = pd.read_excel(data_path, engine='openpyxl')

# 结果字典
flow_counts = {}

for _, row in df.iterrows():
    key = (row["source"], row["target"])
    value = {"NSF": int(row["NSF"]), "NSFC": int(row["NSFC"]),"num": int(row["num"])}
    flow_counts[key] = value



C1 = '#1f77b4' 
middle_color = "#d3d3d3" 
C0 = '#ff7f0e'

#用matplotlib.colors根据三色列表创建线性分段的自定义连续色图（colormap），命名为 "coolwarm_custom",之后通过输入 0~1 的数值从该色图中采样对应的 RGBA 颜色。
colors = [C0, middle_color, C1]
cmap_custom = mcolors.LinearSegmentedColormap.from_list("coolwarm_custom", colors)

#利用set函数去重
keywords = list(set([kw1 for (kw1, kw2) in flow_counts.keys()] + [kw2 for (kw1, kw2) in flow_counts.keys()]))
keyword_index = {kw: idx for idx, kw in enumerate(keywords)}

sources = []
targets = []
values = []
colors = []
node_flow_counts = defaultdict(lambda: {"NSF": 0, "NSFC": 0})

for (kw1, kw2), count_dict in flow_counts.items():
    sources.append(keyword_index[kw1])
    targets.append(keyword_index[kw2])
    total_count = sum(count_dict.values())
    values.append(total_count)

    node_flow_counts[kw1]["NSF"] += count_dict["NSF"]
    node_flow_counts[kw1]["NSFC"] += count_dict["NSFC"]
    node_flow_counts[kw2]["NSF"] += count_dict["NSF"]
    node_flow_counts[kw2]["NSFC"] += count_dict["NSFC"]

cmap=cmap_custom
alpha = 0.825

def get_gradient_color(val, alpha=alpha):
    color = cmap(val) 
    return (color[0], color[1], color[2], alpha)

def remove_prefix(keyword):
    return keyword[2:] if keyword.startswith(('s_', 'p_', 't_')) else keyword

for (kw1, kw2), count_dict in flow_counts.items():
    nsf_count_kw1 = node_flow_counts[kw1]["NSF"]
    nsfc_count_kw1 = node_flow_counts[kw1]["NSFC"]
    total_kw1 = nsf_count_kw1 + nsfc_count_kw1
    ratio_kw1 = nsf_count_kw1 / total_kw1 if total_kw1 != 0 else 0

    nsf_count_kw2 = node_flow_counts[kw2]["NSF"]
    nsfc_count_kw2 = node_flow_counts[kw2]["NSFC"]
    total_kw2 = nsf_count_kw2 + nsfc_count_kw2
    ratio_kw2 = nsf_count_kw2 / total_kw2 if total_kw2 != 0 else 0

    avg_ratio = (ratio_kw1 + ratio_kw2) / 2

    color_rgba = get_gradient_color(avg_ratio)
    colors.append(f'rgba({color_rgba[0] * 255}, {color_rgba[1] * 255}, {color_rgba[2] * 255}, {color_rgba[3]})')

nodes_with_group = []
node_colors = []

for kw in keywords:
    nodes_with_group.append(remove_prefix(kw)) 
    node_colors.append("rgba(128, 128, 128, 0.75)")



fig = go.Figure(go.Sankey(
    node=dict(
        pad=15,
        thickness=30,
        line=dict(color="black", width=0),
        label=nodes_with_group,
        color=node_colors
    ),
    link=dict(
        source=sources,
        target=targets,
        value=values,
        color=colors 
    )))

fig.add_annotation(
    text="Urban Science",
    xref="paper", yref="paper",
    x=0.025, y=1.0475, 
    showarrow=False,
    font=dict(size=45, color="black")
)

fig.add_annotation(
    text="Real-World Problems",
    xref="paper", yref="paper",
    x=0.5, y=1.0475,
    showarrow=False,
    font=dict(size=45, color="black")
)

fig.add_annotation(
    text="Urban Technology",
    xref="paper", yref="paper",
    x=0.975, y=1.0475, 
    showarrow=False,
    font=dict(size=45, color="black")
)

fig.update_layout(
    xaxis=dict(showticklabels=False),
    yaxis=dict(showticklabels=False),
    font=dict(family="Times New Roman", size=36, color="black"),
    width=2000,
    height=2000,
    paper_bgcolor='rgba(0,0,0,0)',
    plot_bgcolor='rgba(0,0,0,0)'
)

fig.write_html(out_html)