# 网络数据处理与分析完整流程

本 notebook 将 algorithms 文件夹中的各个步骤串联在一起，实现从原始专利数据到网络分析的完整流程。

## 流程概述

1. **数据预处理**
   - 清洗专利数据
   - 删除个人申请

2. **网络构建**
   - 单层网络构建
   - 跨层耦合网络构建

3. **网络层权重计算**

4. **结构洞耦合分析和关键性指数计算**
   - 结构洞耦合计算
   - 结构洞耦合数据库构建
   - 关键性指数计算

5. **中心度耦合分析和核心性指数计算**
   - 中心度耦合计算
   - 中心度耦合数据库构建
  - 核心性指数计算

6. **关键性与核心性指标权重计算**
   - 关键性-核心性数据库构建
   - 指标权重计算

7. **节点关键核心性计算**

## 环境设置和导入

In [None]:
# 导入基础步骤的函数
try:
    from algorithms.step_1_clean_patent_data import clean_patent_data
    from algorithms.step_1_remove_personal_application import remove_personal_applications

# 导入知识、技术、合作研发网络构建函数
    from algorithms.step_2_knowledge_network_construction import construct_knowledge_network
    from algorithms.step_2_technology_network_construction import construct_technology_network
    from algorithms.step_2_collaborative_RD_network_construction import construct_collaborative_RD_network
    
# 导入跨层网络构建函数
    from algorithms.step_2_knowledge_technology_network_construction import construct_knowledge_technology_network
    from algorithms.step_2_technology_collaborative_RD_network_construction import construct_technology_collaborative_RD_network
    from algorithms.step_2_knowledge_collaborative_RD_network_construction import construct_knowledge_collaborative_RD_network
    print("✓ 基础模块导入成功")
except ImportError as e:
    print(f"✗ 基础模块导入失败: {e}")

In [None]:
# 导入分析步骤的函数
try:
    from algorithms.step_3_network_layer_weights import calculate_network_weights
    from algorithms.step_4_structural_hole_coupling_calculation import calculate_structural_hole
    from algorithms.step_4_structural_hole_coupling_database_construction import build_structural_hole_database
    from algorithms.step_4_criticality_index_calculation import calculate_criticality
    from algorithms.step_5_centrality_coupling_calculation import calculate_centrality_coupling
    from algorithms.step_5_centrality_coupling_database_construction import build_centrality_coupling_database
    from algorithms.step_5_centrality_index_calculation import calculate_centrality_index
    from algorithms.step_6_criticality_and_centrality_database_construction import build_criticality_centrality_database
    print("✓ 分析模块导入成功")
except ImportError as e:
    print(f"✗ 分析模块导入失败: {e}")

print("\n所有模块导入完成！")

## 第一步：数据预处理

### 1.1 清洗专利数据
将所有英文字母转化为小写字母（防止漏掉重复值）,只保留“公开（公告）号、引文专利公开号、施引专利公开号、IPC分类、专利权人”五列数据
删除无IPC号和乱码数据
删除“公开（公告）号”列的重复值

In [None]:
import pandas as pd
from pathlib import Path
import sys

# 设置路径（根据实际项目结构调整）
PROJECT_ROOT = Path.cwd().parent if Path.cwd().name == 'algorithms' else Path.cwd()

# 输入输出路径配置
input_file = PROJECT_ROOT / 'data' / 'input' / 'original_patent_data.xlsx'  # 默认Excel输入
cleaned_output = PROJECT_ROOT / 'data' / 'step1_output' / 'patent_data_cleaned.csv'

# 1. 自动处理输入文件
if not input_file.exists():
    # 尝试查找CSV版本
    csv_input = input_file.with_suffix('.csv')
    if csv_input.exists():
        print(f"使用CSV输入文件: {csv_input}")
        input_file = csv_input
    else:
        raise FileNotFoundError(f"输入文件不存在: {input_file} 或 {csv_input}")
else:
    print(f"使用Excel输入文件: {input_file}")

# 2. 确保输出目录存在
cleaned_output.parent.mkdir(parents=True, exist_ok=True)

# 3. 执行数据清洗
print("\n=== 开始数据清洗 ===")
from algorithms.step_1_clean_patent_data import clean_patent_data

try:
    # 调用清洗函数（参数名与函数定义保持一致）
    result = clean_patent_data(
        input_path=str(input_file),    # 参数名对应函数定义的input_path
        output_path=str(cleaned_output) # 参数名对应output_path
    )
    
    # 打印处理结果
    print("\n清洗结果:")
    print(result)
    
    # 验证输出
    if cleaned_output.exists():
        df = pd.read_csv(cleaned_output)
        print(f"\n输出验证: 共{len(df)}条记录")
        print("清洗后数据样例:")
        print(df.head(3))
    else:
        print("警告: 输出文件未生成")

except Exception as e:
    print(f"\n清洗失败: {str(e)}", file=sys.stderr)
    sys.exit(1)

print("\n✅ 数据清洗完成")

### 1.2 删除个人申请专利

In [None]:
print("=== 步骤 1.2: 去除个人申请 ===")
try:
    result_1_2 = remove_personal_applications(input_path=".\data\step1_output\patent_data_cleaned.csv", output_path=".\data\step1_output\patent_data_selected_columns.csv")
    print(f"结果: {result_1_2}")
except Exception as e:
    print(f"执行失败: {e}")
    result_1_2 = f"执行失败: {e}"
print("\n" + "="*50 + "\n")

## 第二步：网络构建

### 2.1 单层网络构建

In [None]:
print("=== 步骤 2.1: 知识网络构建 ===")
try:
    result_2_1 = construct_knowledge_network(input_path=".\data\step1_output\patent_data_selected_columns.csv", output_dir=".\data\step2_output")
    print(f"结果: {result_2_1}")
except Exception as e:
    print(f"执行失败: {e}")
    result_2_1 = f"执行失败: {e}"
print("\n" + "="*50 + "\n")

In [None]:
print("=== 步骤 2.2: 技术网络构建 ===")
try:
    result_2_2 = construct_technology_network(input_path=".\data\step1_output\patent_data_selected_columns.csv", output_dir=".\data\step2_output")
    print(f"结果: {result_2_2}")
except Exception as e:
    print(f"执行失败: {e}")
    result_2_2 = f"执行失败: {e}"
print("\n" + "="*50 + "\n")

In [None]:
print("=== 步骤 2.3: 协作研发网络构建 ===")
try:
    result_2_3 = construct_collaborative_RD_network(input_path=".\data\step1_output\patent_data_selected_columns.csv", output_dir=".\data\step2_output")
    print(f"结果: {result_2_3}")
except Exception as e:
    print(f"执行失败: {e}")
    result_2_3 = f"执行失败: {e}"
print("\n" + "="*50 + "\n")

### 2.2 跨层耦合网络构建

In [None]:
print("=== 步骤 2.4: 知识-技术耦合网络构建 ===")
try:
    if construct_knowledge_technology_network:
        result_2_4 = construct_knowledge_technology_network(input_path=".\data\step1_output\patent_data_selected_columns.csv", output_dir=".\data\step2_output")
        print(f"结果: {result_2_4}")
    else:
        result_2_4 = "函数导入失败"
        print("函数导入失败")
except Exception as e:
    print(f"执行失败: {e}")
    result_2_4 = f"执行失败: {e}"
print("\n" + "="*50 + "\n")

In [None]:
from algorithms.step_2_technology_collaborative_RD_network_construction import \
    construct_technology_collaborative_RD_network

print("=== 步骤 2.5: 技术-协作研发耦合网络构建 ===")
try:
    if construct_technology_collaborative_RD_network:
        result_2_5 = construct_technology_collaborative_RD_network(input_path=".\data\step1_output\patent_data_selected_columns.csv", output_dir=".\data\step2_output")
        print(f"结果: {result_2_5}")
    else:
        result_2_5 = "函数导入失败"
        print("函数导入失败")
except Exception as e:
    print(f"执行失败: {e}")
    result_2_5 = f"执行失败: {e}"
print("\n" + "="*50 + "\n")

In [None]:
print("=== 步骤 2.6: 知识-协作研发耦合网络构建 ===")
try:
    if construct_knowledge_collaborative_RD_network:
        result_2_6 = construct_knowledge_collaborative_RD_network(input_path=".\data\step1_output\patent_data_selected_columns.csv", output_dir=".\data\step2_output")
        print(f"结果: {result_2_6}")
    else:
        result_2_6 = "函数导入失败"
        print("函数导入失败")
except Exception as e:
    print(f"执行失败: {e}")
    result_2_6 = f"执行失败: {e}"
print("\n" + "="*50 + "\n")

## 第三步：网络层权重计算

In [None]:
print("=== 步骤 3: 网络层权重计算 ===")
try:
    result_3 = calculate_network_weights(input_dir=".\data\step2_output", output_dir=".\data\step3_output")
    print(f"结果: {result_3}")
except Exception as e:
    print(f"执行失败: {e}")
    result_3 = f"执行失败: {e}"
print("\n" + "="*50 + "\n")

## 第四步：结构洞耦合分析和关键性指数计算

### 4.1 结构洞耦合计算

In [None]:
from pathlib import Path

print("=== 步骤 4.1.1: 知识网络结构洞耦合计算 ===")
try:
    # 使用Path对象处理路径
    input_dir = Path("./data/step2_output").resolve()
    output_dir = Path("./data/step4_output").resolve()
    
    # 正确参数名称为network_type
    result_4_1_1 = calculate_structural_hole(
        network_type="knowledge",  # 修正参数名称
        input_dir=input_dir,
        output_dir=output_dir
    )
    print(f"结果: {result_4_1_1}")
except Exception as e:
    print(f"执行失败: {e}")
    result_4_1_1 = f"执行失败: {e}"
print("\n" + "="*50 + "\n")

print("=== 步骤 4.1.2: 技术网络结构洞耦合计算 ===")
try:
    result_4_1_2 = calculate_structural_hole(
        network_type="technology",  # 修正参数名称
        input_dir=input_dir,
        output_dir=output_dir
    )
    print(f"结果: {result_4_1_2}")
except Exception as e:
    print(f"执行失败: {e}")
    result_4_1_2 = f"执行失败: {e}"
print("\n" + "="*50 + "\n")

print("=== 步骤 4.1.3: 合作研发网络结构洞耦合计算 ===")
try:
    result_4_1_3 = calculate_structural_hole(
        network_type="collaborative_R&D",  # 注意保持连字符
        input_dir=input_dir,
        output_dir=output_dir
    )
    print(f"结果: {result_4_1_3}")
except Exception as e:
    print(f"执行失败: {e}")
    result_4_1_3 = f"执行失败: {e}"
print("\n" + "="*50 + "\n")

### 4.2 结构洞耦合数据库构建

In [None]:
from pathlib import Path

print("=== 步骤 4.2: 结构洞耦合数据库构建 ===")
try:
    # 使用Path对象处理路径
    step3_dir = Path("./data/step3_output").resolve()
    step4_dir = Path("./data/step4_output").resolve()
    
    # 输入文件预验证
    required_files = [
        step3_dir / "network_layer_weights.txt",
        step4_dir / "knowledge_network_structural_hole_coupling.csv",
        step4_dir / "technology_network_structural_hole_coupling.csv",
        step4_dir / "collaborative_R&D_network_structural_hole_coupling.csv"
    ]
    
    missing_files = [f for f in required_files if not f.exists()]
    if missing_files:
        raise FileNotFoundError(f"缺失必要文件：\n" + "\n".join([str(f) for f in missing_files]))

    # 执行数据库构建
    result_4_2 = build_structural_hole_database(
        step3_dir=step3_dir,
        step4_dir=step4_dir
    )
    print(f"结果: {result_4_2}")
    
    # 结果文件验证
    output_file = step4_dir / "structural_hole_coupling_database.csv"
    if output_file.exists():
        df = pd.read_csv(output_file)
        print(f"\n数据库预览：")
        print(df.head(3))
        print(f"\n总计 {len(df)} 条记录")
    else:
        print("警告：输出文件未生成")

except Exception as e:
    print(f"执行失败: {str(e)}")
    result_4_2 = f"执行失败: {str(e)}"
finally:
    print("\n" + "="*50 + "\n")

### 4.3 关键性指数计算

In [None]:
from pathlib import Path
import pandas as pd

print("=== 步骤 4.3: 多网络关键性指数计算 ===")
try:
    # ==================== 路径配置 ====================
    # 解析绝对路径
    step2_dir = Path("./data/step2_output").resolve()
    step4_dir = Path("./data/step4_output").resolve()
    
    # ==================== 输入验证 ====================
    # 必要输入文件列表
    required_files = [
        step4_dir / "structural_hole_coupling_database.csv",
        step2_dir / "knowledge_network_nodes.csv",
        step2_dir / "technology_network_nodes.csv",
        step2_dir / "collaborative_R&D_network_nodes.csv",
        step2_dir / "knowledge-technology_network_edges.csv",
        step2_dir / "knowledge-collaborative_R&D_network_edges.csv",
        step2_dir / "technology-collaborative_R&D_network_edges.csv"
    ]
    
    # 检查文件缺失情况
    missing_files = [f for f in required_files if not f.exists()]
    if missing_files:
        raise FileNotFoundError(
            "缺失必要文件:\n" + "\n".join([f"• {f.name}" for f in missing_files]))
    
    # ==================== 执行计算 ====================
    result_4_3 = calculate_criticality(
        step2_dir=step2_dir,
        step4_dir=step4_dir
    )
    
    # ==================== 结果验证 ====================
    print("\n" + "-"*40 + " 结果验证 " + "-"*40)
    
    # 定义预期输出文件
    output_files = [
        "knowledge_network_criticality_index.csv",
        "technology_network_criticality_index.csv",
        "collaborative_R&D_network_criticality_index.csv"
    ]
    
    # 检查输出文件并展示样例
    for fname in output_files:
        output_path = step4_dir / fname
        if output_path.exists():
            df = pd.read_csv(output_path)
            print(f"\n✅ {fname}（前3行样例）:")
            print(df.head(3))
            print(f"记录总数: {len(df)}")
        else:
            print(f"\n❌ {fname} 文件未生成")

    print(f"\n结果: {result_4_3}")

except Exception as e:
    print(f"执行失败: {str(e)}")
    result_4_3 = f"执行失败: {str(e)}"
finally:
    print("\n" + "="*50 + "\n")

## 第五步：中心度耦合分析和核心指数计算

### 5.1 中心度耦合计算

In [None]:
from pathlib import Path
import pandas as pd

print("=== 步骤 5.1: 中心度耦合指标计算 ===")
try:
    # ==================== 路径配置 ====================
    # 解析绝对路径
    input_dir = Path("./data/step2_output").resolve()
    output_dir = Path("./data/step5_output").resolve()
    
    # ==================== 输入验证 ====================
    # 必要输入文件列表
    required_files = [
        input_dir / "knowledge_network_nodes.csv",
        input_dir / "knowledge_network_edges.csv",
        input_dir / "technology_network_nodes.csv",
        input_dir / "technology_network_edges.csv",
        input_dir / "collaborative_R&D_network_nodes.csv",
        input_dir / "collaborative_R&D_network_edges.csv"
    ]
    
    # 检查文件缺失情况
    missing_files = [f for f in required_files if not f.exists()]
    if missing_files:
        raise FileNotFoundError(
            "缺失必要文件:\n" + "\n".join([f"• {f.name}" for f in missing_files])
        )

    # ==================== 执行计算 ====================
    result_5_1 = calculate_centrality_coupling(
        input_dir=input_dir,
        output_dir=output_dir
    )
    
    # ==================== 结果验证 ====================
    print("\n" + "-"*40 + " 结果验证 " + "-"*40)
    
    # 定义预期输出文件
    output_files = [
        "knowledge_network_centrality_coupling.csv",
        "technology_network_centrality_coupling.csv",
        "collaborative_R&D_network_centrality_coupling.csv"
    ]
    
    # 检查输出文件并展示样例
    for fname in output_files:
        output_path = output_dir / fname
        if output_path.exists():
            df = pd.read_csv(output_path)
            print(f"\n✅ {fname}（前3行样例）:")
            print(df.head(3))
            print(f"记录总数: {len(df)}")
            print(f"中心度范围: {df['centrality_coupling'].min()} ~ {df['centrality_coupling'].max()}")
        else:
            print(f"\n❌ {fname} 文件未生成")

    print(f"\n结果: {result_5_1}")

except Exception as e:
    print(f"执行失败: {str(e)}")
    result_5_1 = f"执行失败: {str(e)}"
finally:
    print("\n" + "="*50 + "\n")

### 5.2 中心性耦合数据库构建

In [None]:
from pathlib import Path
import pandas as pd

print("=== 步骤 5.2: 中心度耦合数据库构建 ===")
try:
    # ==================== 路径配置 ====================
    step3_dir = Path("./data/step3_output").resolve()
    step5_dir = Path("./data/step5_output").resolve()
    
    # ==================== 输入验证 ====================
    required_files = [
        step3_dir / "network_layer_weights.txt",
        step5_dir / "knowledge_network_centrality_coupling.csv",
        step5_dir / "technology_network_centrality_coupling.csv",
        step5_dir / "collaborative_R&D_network_centrality_coupling.csv"
    ]
    
    missing_files = [f for f in required_files if not f.exists()]
    if missing_files:
        raise FileNotFoundError(
            "缺失必要文件:\n" + "\n".join([f"• {f.name}" for f in missing_files]))
    
    # ==================== 执行计算 ====================
    result_5_2 = build_centrality_coupling_database(
        step3_dir=step3_dir,
        step5_dir=step5_dir
    )
    
    # ==================== 结果验证 ====================
    print("\n" + "-"*40 + " 结果验证 " + "-"*40)
    
    # 验证主数据库文件
    database_path = step5_dir / "centrality_coupling_database.csv"
    if database_path.exists():
        db_df = pd.read_csv(database_path)
        print(f"\n✅ 中心度耦合数据库（前3行样例）:")
        print(db_df.head(3))
        print(f"总记录数: {len(db_df)}")
        
        # 各网络记录统计
        print("\n各网络记录分布:")
        for layer_id, name in [(1,'知识网络'), (2,'技术网络'), (3,'合作研发网络')]:
            count = len(db_df[db_df['网络层'] == layer_id])
            print(f"• {name}: {count} 条")
    else:
        print("❌ 主数据库文件未生成")

    # 验证各网络输出文件
    network_files = [
        "knowledge_network_centrality_coupling.csv",
        "technology_network_centrality_coupling.csv",
        "collaborative_R&D_network_centrality_coupling.csv"
    ]
    for fname in network_files:
        output_path = step5_dir / fname
        if output_path.exists():
            df = pd.read_csv(output_path)
            print(f"\n✅ {fname}（前3行样例）:")
            print(df.head(3))
        else:
            print(f"\n❌ {fname} 文件未生成")

    print(f"\n结果: {result_5_2}")

except Exception as e:
    print(f"执行失败: {str(e)}")
    result_5_2 = f"执行失败: {str(e)}"
finally:
    print("\n" + "="*50 + "\n")

### 5.3 核心性指数计算

In [None]:
from pathlib import Path
import pandas as pd

print("=== 步骤 5.2: 核心性指数计算 ===")

try:
    # ==================== 路径配置 ====================
    step2_dir = Path("./data/step2_output").resolve()
    step5_dir = Path("./data/step5_output").resolve()
    
    # ==================== 输入验证 ====================
    required_files = [
        step2_dir / "knowledge_network_nodes.csv",
        step2_dir / "technology_network_nodes.csv",
        step2_dir / "collaborative_R&D_network_nodes.csv",
        step2_dir / "knowledge-technology_network_edges.csv",
        step2_dir / "knowledge-collaborative_R&D_network_edges.csv", 
        step2_dir / "technology-collaborative_R&D_network_edges.csv",
        step5_dir / "centrality_coupling_database.csv"
    ]
    
    missing_files = [f for f in required_files if not f.exists()]
    if missing_files:
        raise FileNotFoundError(
            "缺失必要输入文件:\n" + "\n".join([f"• {f.name}" for f in missing_files]))
    
    # ==================== 执行计算 ====================
    from algorithms.step_5_centrality_index_calculation import calculate_centrality_index
    result_5_3 = calculate_centrality_index(
        step2_dir=step2_dir,
        step5_dir=step5_dir
    )
    
    # ==================== 结果验证 ====================
    print("\n" + "-"*40 + " 结果验证 " + "-"*40)
    
    # 检查各网络输出文件
    output_files = [
        "knowledge_network_centrality_index.csv",
        "technology_network_centrality_index.csv",
        "collaborative_R&D_network_centrality_index.csv"
    ]
    
    for name in output_files:
        output_path = step5_dir / fname
        if output_path.exists():
            df = pd.read_csv(output_path)
            print(f"\n✅ {fname}（统计信息）:")
            print(f"- 记录数: {len(df)}")
            print(f"- 中心度指数范围: {df['centrality_index'].min():.4f} ~ {df['centrality_index'].max():.4f}")
            print(f"- 平均值: {df['centrality_index'].mean():.4f}")
            print("前3行样例:")
            print(df.head(3))
        else:
            print(f"\n❌ {fname} 文件未生成")
    
    print(f"\n处理结果: {result_5_2}")

except Exception as e:
    print(f"\n❌ 执行失败: {str(e)}")
    result_5_3 = f"执行失败: {str(e)}"
    
finally:
    print("\n" + "="*50 + "\n")

## 第六步：关键性与核心性指标权重计算

### 6.1 关键性-核心性数据库构建

In [None]:
from pathlib import Path
import pandas as pd

print("=== 步骤6.1: 关键性-核心性数据库构建 ===")

try:
    # ==================== 路径配置 ====================
    step4_dir = Path("./data/step4_output").resolve()
    step5_dir = Path("./data/step5_output").resolve()
    step6_dir = Path("./data/step6_output").resolve()
    
    # ==================== 输入验证 ====================
    required_files = [
        step4_dir / "knowledge_network_criticality_index.csv",
        step4_dir / "technology_network_criticality_index.csv",
        step4_dir / "collaborative_R&D_network_criticality_index.csv",
        step5_dir / "knowledge_network_centrality_index.csv",
        step5_dir / "technology_network_centrality_index.csv",
        step5_dir / "collaborative_R&D_network_centrality_index.csv"
    ]
    
    missing_files = [f for f in required_files if not f.exists()]
    if missing_files:
        raise FileNotFoundError(
            "缺失必要输入文件:\n" + "\n".join([f"• {f.name}" for f in missing_files]))
    
    # ==================== 执行计算 ====================
    from algorithms.step_6_criticality_and_centrality_database_construction import build_criticality_centrality_database  # 替换为实际模块路径
    result = build_criticality_centrality_database(
        step4_dir=step4_dir,
        step5_dir=step5_dir,
        output_dir=step6_dir
    )
    
    # ==================== 结果验证 ====================
    print("\n" + "-"*40 + " 结果验证 " + "-"*40)
    
    # 主数据库验证
    db_path = step6_dir / "criticality_and_centrality_database.csv"
    if db_path.exists():
        db_df = pd.read_csv(db_path)
        print(f"\n✅ 主数据库（关键统计指标）:")
        print(f"- 总记录数: {len(db_df)}")
        print(f"- 知识网络节点: {len(db_df[db_df['网络层'] == 1])}")
        print(f"- 技术网络节点: {len(db_df[db_df['网络层'] == 2])}")
        print(f"- 合作研发网络节点: {len(db_df[db_df['网络层'] == 3])}")
        print(f"- 关键性均值: {db_df['关键性'].mean():.4f}")
        print(f"- 核心性均值: {db_df['核心性'].mean():.4f}")
        print("\n前3行样例:")
        print(db_df.head(3))
    else:
        print("\n❌ 主数据库文件未生成")
    
    # 网络层数据验证
    print("\n网络层数据完整性检查:")
    for layer, name in [(1,'知识网络'), (2,'技术网络'), (3,'合作研发网络')]:
        layer_df = db_df[db_df['网络层'] == layer]
        print(f"• {name}: {len(layer_df)}条记录 | "
              f"关键性[{layer_df['关键性'].min():.2f}-{layer_df['关键性'].max():.2f}] | "
              f"核心性[{layer_df['核心性'].min():.2f}-{layer_df['核心性'].max():.2f}]")
    
    print(f"\n处理结果: {result}")

except Exception as e:
    print(f"\n❌ 执行失败: {str(e)}")
    result = f"执行失败: {str(e)}"
    
finally:
    print("\n" + "="*50 + "\n")

### 6.2 指标权重计算

In [None]:
from pathlib import Path
import pandas as pd
import numpy as np

print("=== 步骤6.2: 指标权重计算 ===")

try:
    # ==================== 路径配置 ====================
    step6_dir = Path("./data/step6_output").resolve()
    
    # ==================== 输入验证 ====================
    required_file = step6_dir / "criticality_and_centrality_database.csv"
    if not required_file.exists():
        raise FileNotFoundError(f"❌ 缺失必要输入文件: {required_file.name}")
    
    # ==================== 执行计算 ====================
    from algorithms.step_6_index_weights import calculate_index_weights  # 替换为实际模块路径
    result = calculate_index_weights(
        input_dir=step6_dir,
        output_dir=step6_dir
    )
    
    # ==================== 结果验证 ====================
    print("\n" + "-"*40 + " 结果验证 " + "-"*40)
    
    # 权重文件验证
    weights_file = step6_dir / "index_weights.txt"
    if weights_file.exists():
        print(f"\n✅ 权重计算结果:")
        with open(weights_file, 'r') as f:
            for line in f:
                print(f"  • {line.strip()}")
        
        # 验证权重逻辑一致性
        weights = {}
        with open(weights_file, 'r') as f:
            for line in f:
                key, value = line.strip().split(': ')
                weights[key] = float(value)
        
        print(f"\n权重验证:")
        print(f"  • 关键性权重: {weights['criticality_weight']:.4f}")
        print(f"  • 核心性权重: {weights['centrality_weight']:.4f}")
        print(f"  • 权重总和: {weights['criticality_weight'] + weights['centrality_weight']:.4f} (应≈1.0)")
        
        # 加载原始数据验证计算逻辑
        df = pd.read_csv(required_file)
        print(f"\n原始数据统计:")
        print(f"  • 关键性范围: {df['关键性'].min():.2f} - {df['关键性'].max():.2f}")
        print(f"  • 核心性范围: {df['核心性'].min():.2f} - {df['核心性'].max():.2f}")
        print(f"  • 关键性/核心性相关系数: {df['关键性'].corr(df['核心性']):.4f}")
    else:
        print("\n❌ 权重文件未生成")
    
    print(f"\n处理结果: {result}")

except Exception as e:
    print(f"\n❌ 执行失败: {str(e)}")
    result = f"执行失败: {str(e)}"
    
finally:
    print("\n" + "="*50 + "\n")

## 第七步： 节点关键核心性计算

In [None]:
from pathlib import Path
import pandas as pd

print("=== 步骤7: 节点关键核心性计算 ===")

try:
    # ==================== 路径配置 ====================
    step6_dir = Path("./data/step6_output").resolve()
    step7_dir = Path("./data/step7_output").resolve()
    
    # ==================== 输入验证 ====================
    required_files = [
        step6_dir / "criticality_and_centrality_database.csv",
        step6_dir / "index_weights.txt"
    ]
    
    missing_files = [f for f in required_files if not f.exists()]
    if missing_files:
        raise FileNotFoundError(
            "缺失必要输入文件:\n" + "\n".join([f"• {f.name}" for f in missing_files]))
    
    # ==================== 执行计算 ====================
    from algorithms.step_7_criticality_centrality_index_calculation import calculate_critical_centrality_index  # 替换为实际模块路径
    result = calculate_critical_centrality_index(
        input_dir=step6_dir,
        output_dir=step7_dir
    )
    
    # ==================== 结果验证 ====================
    print("\n" + "-"*40 + " 结果验证 " + "-"*40)
    
    # 主输出文件验证
    output_file = step7_dir / "criticality-centrality_index.csv"
    if output_file.exists():
        df = pd.read_csv(output_file)
        print(f"\n✅ 关键-核心性指数数据库:")
        print(f"- 总记录数: {len(df)}")
        print(f"- 关键性权重: {df['关键性权重'].iloc[0]:.4f}")
        print(f"- 核心性权重: {df['核心性权重'].iloc[0]:.4f}")
        print(f"- 关键核心性范围: {df['关键核心性'].min():.4f} ~ {df['关键核心性'].max():.4f}")
        print(f"- 平均值: {df['关键核心性'].mean():.4f}")
        
        # 各网络层统计
        print("\n分网络统计:")
        for layer, name in [(1,'知识网络'), (2,'技术网络'), (3,'合作研发网络')]:
            layer_df = df[df['网络层'] == layer]
            print(f"  • {name}: {len(layer_df)}节点 | "
                  f"关键核心性[{layer_df['关键核心性'].min():.4f}-{layer_df['关键核心性'].max():.4f}]")
        
        print("\n前3行样例:")
        print(df.head(3))
    else:
        print("\n❌ 输出文件未生成")
    
    print(f"\n处理结果: {result}")

except Exception as e:
    print(f"\n❌ 执行失败: {str(e)}")
    result = f"执行失败: {str(e)}"
    
finally:
    print("\n" + "="*50 + "\n")