In [None]:
# 不同资源分配策略，结构指标对比
import pandas as pd
import matplotlib.pyplot as plt
import os

# 设置文件路径
file_path = r"C:\Users\Administrator\Desktop\network_generate1\distribution\A-test\distribution_results.xlsx"

# 读取数据
try:
    df = pd.read_excel(file_path)
except Exception as e:
    print(f"读取文件出错: {e}")
    exit()

# 检查列名
required_columns = ['Strategy', 'grade=1', 'grade=2', 'grade=3']
if not all(col in df.columns for col in required_columns):
    print("列名不匹配，请检查数据文件")
    print("现有列名:", df.columns.tolist())
    exit()

# 创建图形
plt.figure(figsize=(10, 6))
ax = plt.gca()

# 设置坐标轴样式
ax.set_facecolor('white')  # 坐标轴区域白色背景
for spine in ax.spines.values():
    spine.set_color('black')  # 坐标轴黑色
    spine.set_linewidth(0.8)  # 坐标轴粗细

# 准备数据
strategies = df['Strategy']
grades = ['grade=1', 'grade=2', 'grade=3']
colors = ['#D2CBB0', '#ff7f0e', '#D80032']  # 自定义颜色

# 创建柱状图
bar_width = 0.25
x_pos = range(len(strategies))

for i, grade in enumerate(grades):
    ax.bar([x + i*bar_width for x in x_pos], 
           df[grade], 
           width=bar_width, 
           label=grade.replace('grade=', 'Grade '),
           color=colors[i],
           edgecolor='black',
           linewidth=0.8)

# 设置坐标轴标签
ax.set_xticks([x + bar_width for x in x_pos])
ax.set_xticklabels(strategies, rotation=45, ha='right')
ax.set_ylabel('Lost patients(LP)', fontsize=12)

# 去除网格线
ax.grid(False)

# 设置图例
ax.legend(frameon=False, fontsize=10)

# 调整布局
plt.tight_layout()

# 保存图片
output_dir = os.path.dirname(file_path)
output_path = os.path.join(output_dir, r'c:\Users\Administrator\Desktop\network_generate1\plots\distribution_plot.pdf')
plt.savefig(output_path, dpi=300, bbox_inches='tight', facecolor='white')

print(f"图表已保存至: {output_path}")
plt.show()

In [None]:
# 计算各级转诊的转诊量占比
import pandas as pd
import numpy as np
from pathlib import Path

def calculate_referral_ratios(edges_df, nodes_df):
    """
    计算单次模拟中转诊比例矩阵
    返回格式: {
        '11': grade1->grade1比例,
        '12': grade1->grade2比例,
        ...
        '33': grade3->grade3比例
    }
    """
    # 合并源节点和目标节点的grade信息
    merged_df = edges_df.merge(
        nodes_df[['Node', 'grade']],
        left_on='Source',
        right_on='Node',
        how='left'
    ).rename(columns={'grade': 'source_grade'})
    
    merged_df = merged_df.merge(
        nodes_df[['Node', 'grade']],
        left_on='Target',
        right_on='Node',
        how='left'
    ).rename(columns={'grade': 'target_grade'})
    
    # 计算总转诊量
    total_referral = merged_df['new_weight'].sum()
    if total_referral == 0:
        return {f"{i}{j}": 0 for i in range(1,4) for j in range(1,4)}
    
    # 计算各grade组合的比例
    ratio_dict = {}
    for source_grade in [1, 2, 3]:
        for target_grade in [1, 2, 3]:
            key = f"{source_grade}{target_grade}"
            mask = (merged_df['source_grade'] == source_grade) & \
                   (merged_df['target_grade'] == target_grade)
            ratio = merged_df.loc[mask, 'new_weight'].sum() / total_referral
            ratio_dict[key] = ratio
    
    return ratio_dict

def process_all_simulations(base_dir=r"c:\Users\Administrator\Desktop\network_generate1\distribution\model_results(top5_outdegree)", num_simulations=10):
    """
    处理所有模拟并计算平均转诊比例
    
    返回:
        final_ratios: 各组合10次模拟的平均比例
        all_results: 各次模拟的详细结果
    """
    all_results = []
    
    for sim_num in range(1, num_simulations + 1):
        folder = Path(f"{base_dir}({sim_num})")
        edges_file = folder / "edges.csv"
        nodes_file = folder / "nodes.csv"
        
        if not edges_file.exists() or not nodes_file.exists():
            print(f"警告: 模拟{sim_num}数据不完整，跳过")
            continue
        
        # 读取数据
        edges_df = pd.read_csv(edges_file)
        nodes_df = pd.read_csv(nodes_file)
        
        # 计算当前模拟的比例
        ratios = calculate_referral_ratios(edges_df, nodes_df)
        ratios['simulation'] = sim_num
        all_results.append(ratios)
    
    if not all_results:
        raise ValueError("未找到任何有效的模拟数据")
    
    # 转换为DataFrame并计算平均值
    results_df = pd.DataFrame(all_results)
    final_ratios = results_df.mean().to_dict()
    
    # 筛选只保留比例结果（去掉simulation列）
    final_ratios = {k: v for k, v in final_ratios.items() if k in [
        '11', '12', '13', '21', '22', '23', '31', '32', '33']}
    
    return final_ratios, results_df

# 使用示例
if __name__ == "__main__":
    # 处理数据
    final_ratios, detailed_results = process_all_simulations()
    
    # 打印结果
    print("="*50)
    print("10次模拟的平均转诊比例:")
    for key, value in final_ratios.items():
        print(f"{key}: {value:.4f} ({value*100:.2f}%)")
    
    print("\n" + "="*50)
    print("各次模拟详细结果:")
    print(detailed_results.to_string(index=False))
    
    # 保存结果
    pd.DataFrame([final_ratios]).to_csv(r"c:\Users\Administrator\Desktop\network_generate1\distribution\B-test\final_referral_ratios4.csv", index=False)
    detailed_results.to_csv(r"c:\Users\Administrator\Desktop\network_generate1\distribution\B-test\detailed_referral_results4.csv", index=False)
    
    print("\n结果已保存为:")
    print("- final_referral_ratios.csv")
    print("- detailed_referral_results.csv")

In [None]:
# 计算转诊总数
import pandas as pd
from pathlib import Path

def calculate_total_referrals(edges_df):
    """计算单次模拟的总转诊患者数"""
    return edges_df['new_weight'].sum()

def process_simulations(base_dir=r"c:\Users\Administrator\Desktop\network_generate1\distribution\model_results(0.01)", num_simulations=10):
    """
    处理所有模拟并计算总转诊患者数的均值
    
    返回:
        average_referrals: 10次模拟的平均转诊量
        all_results: 各次模拟的详细结果DataFrame
    """
    results = []
    
    for sim_num in range(1, num_simulations + 1):
        folder = Path(f"{base_dir}({sim_num})")
        edges_file = folder / "edges.csv"
        
        if not edges_file.exists():
            print(f"警告: {edges_file} 不存在，跳过")
            continue
        
        # 读取数据并计算
        edges_df = pd.read_csv(edges_file)
        total = calculate_total_referrals(edges_df)
        results.append({'simulation': sim_num, 'total_referrals': total})
    
    if not results:
        raise ValueError("未找到任何有效的模拟数据")
    
    # 转换为DataFrame
    results_df = pd.DataFrame(results)
    
    # 计算平均值
    average = results_df['total_referrals'].mean()
    
    return average, results_df

# 使用示例
if __name__ == "__main__":
    avg_referrals, detailed_results = process_simulations()
    
    print("="*50)
    print(f"10次模拟的平均总转诊患者数: {avg_referrals:.2f}")
    print("\n各次模拟结果:")
    print(detailed_results.to_string(index=False))
    
    # 保存结果
    #detailed_results.to_csv("referral_totals.csv", index=False)
    #with open("average_referrals.txt", "w") as f:
    #    f.write(f"平均总转诊患者数: {avg_referrals:.2f}")
    
    print("\n结果已保存为:")
    print("- referral_totals.csv (详细数据)")
    print("- average_referrals.txt (平均值)")

In [None]:
# 处理所有模拟文件夹并计算每个grade的最终LP均值
import pandas as pd
import os
from pathlib import Path

def process_all_simulations(base_dir=r"c:\Users\Administrator\Desktop\network_generate1\distribution\model_results(top5_outdegree)",num_simulations=10):
    """
    处理所有模拟文件夹并计算每个grade的最终LP均值
    
    参数:
        base_dir: 结果文件夹基础名称(默认"result")
        num_simulations: 模拟次数(默认10)
    
    返回:
        final_results: 包含每个grade最终均值的DataFrame
        all_results: 包含所有原始结果的DataFrame
    """
    # 收集所有结果
    all_data = []
    
    for sim_num in range(1, num_simulations + 1):
        folder = Path(f"{base_dir}({sim_num})")
        nodes_file = folder / "nodes.csv"
        
        if not nodes_file.exists():
            print(f"警告: {nodes_file} 不存在，跳过")
            continue
        
        # 读取数据并添加模拟编号
        nodes_df = pd.read_csv(nodes_file)
        nodes_df['simulation'] = sim_num
        all_data.append(nodes_df)
    
    if not all_data:
        raise ValueError("未找到任何有效的模拟数据")
    
    # 合并所有数据
    combined_df = pd.concat(all_data)
    
    # 计算每个grade在所有模拟中的LP均值
    final_results = (combined_df
                    .groupby('grade')['LP']
                    .mean()
                    .reset_index()
                    .rename(columns={'LP': 'mean_LP'}))
    
    # 计算各次模拟的结果（可选）
    all_results = (combined_df
                  .groupby(['simulation', 'grade'])['LP']
                  .mean()
                  .unstack()
                  .add_prefix('grade_')
                  .reset_index())
    
    return final_results, all_results

# 使用示例
if __name__ == "__main__":
    # 处理数据
    final_means, detailed_results = process_all_simulations()
    
    # 打印结果
    print("="*50)
    print("10次模拟的最终LP均值（按grade分组）:")
    print(final_means.to_string(index=False))
    
    print("\n" + "="*50)
    print("各次模拟详细结果:")
    print(detailed_results.to_string(index=False))
    
    # 保存结果
    final_means.to_csv(r"c:\Users\Administrator\Desktop\network_generate1\distribution\A-test\final_grade_LP_means4.csv", index=False)
    detailed_results.to_csv(r"c:\Users\Administrator\Desktop\network_generate1\distribution\A-test\detailed_simulation_results4.csv", index=False)
    
    print("\n结果已保存为:")
    print("- final_grade_LP_means.csv")
    print("- detailed_simulation_results.csv")

In [None]:
# T=500,模拟10次，先合并网络，再筛选保留前5%的连边，保持连边数与单次模拟结果一致
import pandas as pd
import os
from tqdm import tqdm

def merge_networks_global_top5(base_path, sim_count=10, top_percent=5):
    """合并多次模拟网络数据，保留权重最高的top_percent%边"""
    # 初始化输出路径
    output_dir = os.path.join(base_path, "merged_network_global_top5")
    os.makedirs(output_dir, exist_ok=True)
    print(f"结果将保存至: {output_dir}")

    # 加载样本数据检查列名和结构
    sample_edges = pd.read_csv(os.path.join(base_path, "T500(1)/edges.csv"))
    original_edge_count = len(sample_edges)
    target_edge_count = int(original_edge_count * top_percent / 100)
    
    # 确保边数据包含所有必要列
    required_edge_cols = ['Source', 'Target', 'weight', 'distance', 'preference']
    for col in required_edge_cols:
        if col not in sample_edges.columns:
            raise ValueError(f"边数据必须包含列: {col}")

    # ========== 节点数据处理 ==========
    print("\n正在合并节点数据...")
    # 原始节点列名（保持顺序）
    node_columns = [
        'Node', 'grade', 'cure_rate', 'location',
        'visit_limit', 'VP_initial', 'initial_distance',
        'VP_referral', 'referral_distance', 'LP',
        'cure_LP', 'free_capacity'
    ]
    
    # 加载所有节点数据
    all_nodes = []
    for i in tqdm(range(1, sim_count+1), desc="加载节点"):
        nodes = pd.read_csv(os.path.join(base_path, f"T500({i})/nodes.csv"))
        # 确保列顺序一致
        nodes = nodes[node_columns]
        all_nodes.append(nodes)
    
    # 合并节点数据（静态属性取第一个值，动态属性取均值）
    merged_nodes = all_nodes[0][['Node', 'grade', 'cure_rate', 'location']].copy()
    
    # 对数值列求均值
    numeric_cols = [
        'visit_limit', 'VP_initial', 'initial_distance',
        'VP_referral', 'referral_distance', 'LP',
        'cure_LP', 'free_capacity'
    ]
    
    for col in numeric_cols:
        merged_nodes[col] = sum(df[col] for df in all_nodes) / sim_count
    
    # ========== 边数据处理 ==========
    print("\n正在合并边数据...")
    all_edges = []
    for i in tqdm(range(1, sim_count+1), desc="加载边数据"):
        edges = pd.read_csv(os.path.join(base_path, f"T500({i})/edges.csv"))
        
        # 统一列名大小写
        edges = edges.rename(columns={
            'source': 'Source',
            'target': 'Target',
            'weight_normalized': 'weight'  # 如果权重列名不同
        })
        
        # 确保包含所有必要列
        missing_cols = set(required_edge_cols) - set(edges.columns)
        if missing_cols:
            raise ValueError(f"边数据{i}缺少列: {missing_cols}")
            
        all_edges.append(edges[required_edge_cols])

    # 合并所有边数据并计算平均权重
    global_edges = pd.concat(all_edges)
    
    # 分组计算平均值，保留原始边属性
    merged_edges = global_edges.groupby(['Source', 'Target']).agg({
        'weight': 'mean',
        'distance': 'first',  # 保持原始距离
        'preference': 'first'  # 保持原始偏好
    }).reset_index()

    # 筛选权重最高的边
    final_edges = merged_edges.nlargest(target_edge_count, 'weight')
    
    # 确保列顺序与原始一致
    final_edges = final_edges[required_edge_cols]

    # ========== 保存结果 ==========
    nodes_path = os.path.join(output_dir, "nodes.csv")
    edges_path = os.path.join(output_dir, "edges.csv")
    
    # 确保节点数据列顺序与原始一致
    merged_nodes[node_columns].to_csv(nodes_path, index=False)
    final_edges.to_csv(edges_path, index=False)
    
    print(f"\n合并完成！")
    print(f"节点数: {len(merged_nodes)} | 保留边数: {len(final_edges)}/{original_edge_count}")
    print(f"节点文件: {nodes_path}")
    print(f"边文件: {edges_path}")
    
    return merged_nodes, final_edges

if __name__ == "__main__":
    base_path = r"c:\Users\Administrator\Desktop\network_generate\network_generate"
    merge_networks_global_top5(base_path)

In [None]:
# 稳定网络的拓扑分析，指标计算并作图
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np

# ========== 第一步：读取节点和边数据 ==========
node_file = r'c:\Users\Administrator\Desktop\network_generate1\T500\T500(11)\nodes.csv'
edge_file = r'c:\Users\Administrator\Desktop\network_generate1\T500\T500(11)\edges.csv'

nodes_df = pd.read_csv(node_file)
edges_df = pd.read_csv(edge_file)

# ========= 构建图 =========
G = nx.DiGraph()
for _, row in nodes_df.iterrows():
    G.add_node(row['Node'], 
               grade=row['grade'],
               VP_initial=row['VP_initial'],
               initial_distance=row['initial_distance'],
               VP_referral=row['VP_referral'],
               referral_distance=row['referral_distance'],
               LP=row['LP'],
               visit_limit=row['visit_limit'])

for _, row in edges_df.iterrows():
    source = row['Source'] if 'Source' in row else row['source']
    target = row['Target'] if 'Target' in row else row['target']
    G.add_edge(source, target, weight=row['weight'], distance=row['distance'])

# ========== 第二步：计算每个节点出入度平均距离 ==========
avg_out_dist = {}
avg_in_dist = {}
for node in G.nodes():
    out_edges = list(G.out_edges(node, data=True))
    in_edges = list(G.in_edges(node, data=True))
    avg_out_dist[node] = np.mean([d['distance'] for _, _, d in out_edges]) if out_edges else 0
    avg_in_dist[node] = np.mean([d['distance'] for _, _, d in in_edges]) if in_edges else 0

nx.set_node_attributes(G, avg_out_dist, 'avg_out_distance')
nx.set_node_attributes(G, avg_in_dist, 'avg_in_distance')

# ========== 第三步：按等级统计各指标平均值 ==========
results = {
    '属性': ['出度', '入度', '度', '初诊范围', '转诊范围', '损失患者数', '就诊容量', '加权出度', '加权入度', '出度平均距离', '入度平均距离'],
    'grade=1': [],
    'grade=2': [],
    'grade=3': []
}

for grade in [1, 2, 3]:
    grade_nodes = [n for n in G.nodes() if G.nodes[n]['grade'] == grade]
    if not grade_nodes:
        results[f'grade={grade}'] = [None] * len(results['属性'])
        continue

    out_degrees = [G.out_degree(n) for n in grade_nodes]
    in_degrees = [G.in_degree(n) for n in grade_nodes]
    degrees = [G.degree(n) for n in grade_nodes]
    weighted_out = [sum(data['weight'] for _, _, data in G.out_edges(n, data=True)) for n in grade_nodes]
    weighted_in = [sum(data['weight'] for _, _, data in G.in_edges(n, data=True)) for n in grade_nodes]

    initial_areas = []
    referral_areas = []
    for n in grade_nodes:
        node_data = G.nodes[n]
        vp_initial = node_data['VP_initial'] if node_data['VP_initial'] != 0 else 1
        vp_referral = node_data['VP_referral'] if node_data['VP_referral'] != 0 else 1
        initial_areas.append(node_data['initial_distance'] / vp_initial)
        referral_areas.append(node_data['referral_distance'] / vp_referral)

    LPs = [G.nodes[n]['LP'] for n in grade_nodes]
    visit_limits = [G.nodes[n]['visit_limit'] for n in grade_nodes]
    avg_out_dists = [G.nodes[n]['avg_out_distance'] for n in grade_nodes]
    avg_in_dists = [G.nodes[n]['avg_in_distance'] for n in grade_nodes]

    def safe_mean(values):
        return round(np.mean(values), 2) if values else 0.0

    results[f'grade={grade}'].append(safe_mean(out_degrees))
    results[f'grade={grade}'].append(safe_mean(in_degrees))
    results[f'grade={grade}'].append(safe_mean(degrees))
    results[f'grade={grade}'].append(safe_mean(initial_areas))
    results[f'grade={grade}'].append(safe_mean(referral_areas))
    results[f'grade={grade}'].append(safe_mean(LPs))
    results[f'grade={grade}'].append(safe_mean(visit_limits))
    results[f'grade={grade}'].append(safe_mean(weighted_out)/500)
    results[f'grade={grade}'].append(safe_mean(weighted_in)/500)
    results[f'grade={grade}'].append(safe_mean(avg_out_dists))
    results[f'grade={grade}'].append(safe_mean(avg_in_dists))
# ========== 图表绘制（修改版） ==========

plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False

# 选定指标与显示顺序
metric_names = {
    '入度': 'In-degree',
    '加权入度': 'Received patient volume',
    '出度': 'Out-degree',
    '加权出度': 'Number of patients referral',
    '度': 'Average degree',
    '初诊范围': 'Initial consultation area',
    '入度平均距离': 'Referral service range',  # 用“入度平均距离”替代“转诊服务范围”
    '损失患者数': 'Lost patients (LP)'
}

selected_metrics = list(metric_names.values())
result_df = pd.DataFrame(results)

# 替换中文为英文指标名称
result_df['属性'] = result_df['属性'].replace(metric_names)
result_df = result_df[result_df['属性'].isin(selected_metrics)]
result_df = result_df.set_index('属性').loc[selected_metrics].reset_index()

# 设置图形布局为 2x4
cols = 4
rows = 2
fig, axes = plt.subplots(rows, cols, figsize=(cols * 4, rows * 4))
axes = axes.flatten()
plt.subplots_adjust(wspace=0.4, hspace=0.4)

grade_labels = ['1', '2', '3']
colors = ['#D2CBB0', '#ff7f0e', '#D80032']

for idx, (metric, ax) in enumerate(zip(selected_metrics, axes)):
    values = result_df.loc[result_df['属性'] == metric, ['grade=1', 'grade=2', 'grade=3']].values[0]
    ax.bar(grade_labels, values, color=colors, width=0.6, edgecolor='black')
    ax.set_title(metric, fontsize=20)
    ax.tick_params(axis='both', labelsize=18)
    
    # 设置特定指标的固定 y 轴上限和刻度间距（最大刻度线略低于最高点）
    if metric == 'Number of patients referral':
        ax.set_ylim(0, 85)
        ax.set_yticks(np.arange(0, 81, 10))
    elif metric == 'Lost patients (LP)':
        ax.set_ylim(0, 425)  # 400 + 25（间隔一半）
        ax.set_yticks(np.arange(0, 401, 50))
    elif metric == 'Received patient volume':
        ax.set_ylim(0, 425)  # 400 + 25（间隔一半）
        ax.set_yticks(np.arange(0, 401, 50))
    elif metric == 'Average degree':
        ax.set_ylim(0, 85)   # 80 + 5（间隔一半）
        ax.set_yticks(np.arange(0, 81, 10))
    elif metric == 'Out-degree':
        ax.set_ylim(0, 15)   # 80 + 5（间隔一半）
        ax.set_yticks(np.arange(0, 15, 2))
    elif metric == 'Initial consultation area':
        ax.set_ylim(0, 9)    # 8 + 1（间隔一半）
        ax.set_yticks(np.arange(0, 9, 2))
    else:
        ax.set_ylim(0, max(values) * 1.15 if max(values) > 0 else 1)
    ax.grid(False)  

# 删除多余坐标轴（防止 idx 不满8）
for j in range(idx + 1, len(axes)):
    fig.delaxes(axes[j])

# 添加图例
handles = [plt.Rectangle((0, 0), 1, 1, color=colors[i]) for i in range(3)]
fig.legend(handles, ['Grade 1', 'Grade 2', 'Grade 3'],
           loc='lower center', ncol=3, frameon=False, bbox_to_anchor=(0.5, -0.02), fontsize=20)

plt.tight_layout(rect=[0, 0.03, 1, 1])

# 保存为 PDF 文件
output_pdf = r'c:\Users\Administrator\Desktop\network_generate1\plots\network_metrics.pdf'
plt.savefig(output_pdf, dpi=300, bbox_inches='tight', format='pdf')
print(f"图表已保存为 PDF 格式: {output_pdf}")
plt.show()
