# csv和xlsx转化: converter文件夹下

In [4]:
import os
import pandas as pd
from pathlib import Path

def csv_to_xlsx():
    """
    将 converter 目录下的所有 CSV 文件转换为 XLSX 文件
    使用 utf-8-sig 编码读取 CSV，原始文件保留
    """
    converter_dir = Path('converter')
    
    if not converter_dir.exists():
        print(f"目录 {converter_dir} 不存在")
        return
    
    for csv_file in converter_dir.glob('*.csv'):
        xlsx_file = csv_file.with_suffix('.xlsx')
        
        try:
            # 明确指定 utf-8-sig 编码读取 CSV
            df = pd.read_csv(csv_file, encoding='utf-8-sig')
            df.to_excel(xlsx_file, index=False)
            print(f"转换成功: {csv_file} -> {xlsx_file}")
        except Exception as e:
            print(f"转换失败 {csv_file}: {str(e)}")

def xlsx_to_csv():
    """
    将 converter 目录下的所有 XLSX 文件转换为 CSV 文件
    使用 utf-8-sig 编码写入 CSV，原始文件保留
    """
    converter_dir = Path('converter')
    
    if not converter_dir.exists():
        print(f"目录 {converter_dir} 不存在")
        return
    
    for xlsx_file in converter_dir.glob('*.xlsx'):
        csv_file = xlsx_file.with_suffix('.csv')
        
        try:
            df = pd.read_excel(xlsx_file)
            # 明确指定 utf-8-sig 编码写入 CSV
            df.to_csv(csv_file, index=False, encoding='utf-8-sig')
            print(f"转换成功: {xlsx_file} -> {csv_file}")
        except Exception as e:
            print(f"转换失败 {xlsx_file}: {str(e)}")


In [5]:
print("开始 CSV 转 XLSX...")
csv_to_xlsx()


开始 CSV 转 XLSX...


In [6]:
print("\n开始 XLSX 转 CSV...")
xlsx_to_csv()



开始 XLSX 转 CSV...


# relation合并到MEU转xlsx

In [3]:
import os
import pandas as pd
from collections import defaultdict

def process_files(
        relation_dir=r'law_to_MEU/st_4_MEU_relations', 
        meu_dir=r'law_to_MEU/st_3_0_MEU/GT', 
        output_dir=r'law_to_MEU/st_4_MEU_relations/MEU_with_relation'
        ):
    # 确保输出目录存在
    os.makedirs(output_dir, exist_ok=True)

    # 遍历relation目录中的所有csv文件
    for filename in os.listdir(meu_dir):
        if not filename.endswith(".csv"):
            continue

        # 构造完整文件路径
        relation_path = os.path.join(relation_dir, filename)
        meu_path = os.path.join(meu_dir, filename)
        output_path = os.path.join(output_dir, filename)

        # 检查MEU文件是否存在
        if not os.path.exists(meu_path):
            print(f"警告：未找到对应的MEU文件 {meu_path}")
            continue

        # 读取relation数据
        relation_df = pd.read_csv(relation_path, encoding="utf-8-sig")
        
        # 读取MEU数据
        meu_df = pd.read_csv(meu_path, encoding="utf-8-sig")

        # 创建关系字典 {source: {relation: [targets]}}
        relation_dict = defaultdict(lambda: defaultdict(list))
        for _, row in relation_df.iterrows():
            source = row["source"]
            relation = row["relation"]
            target = row["target"]
            relation_dict[source][relation].append(target)

        # 合并数据
        merged_rows = []
        for _, meu_row in meu_df.iterrows():
            meu_id = meu_row["MEU_id"]
            base_info = meu_row.to_dict()

            if meu_id in relation_dict:
                # 处理每个关系
                for relation, targets in relation_dict[meu_id].items():
                    new_row = base_info.copy()
                    new_row["relation"] = relation
                    new_row["target"] = ";".join(targets)
                    merged_rows.append(new_row)
            else:
                # 没有关系的条目
                new_row = base_info.copy()
                new_row["relation"] = ""
                new_row["target"] = ""
                merged_rows.append(new_row)

        # 创建最终DataFrame
        result_df = pd.DataFrame(merged_rows)

        result_df['confirmed'] = 0
        result_df['comments_relation'] = ''
        
        # 定义列顺序
        columns = ["MEU_id", "subject", "condition", "constraint", "contextual_info", "relation", "target", 'confirmed', 'comments_relation']
        
        # 基于MEU_id进行排序
        def sort_key(meu_id):
            parts = meu_id.split('_')
            n = int(parts[1])
            k = int(parts[2])
            return (n, k)
        
        result_df = result_df.sort_values(by="MEU_id", key=lambda x: x.map(sort_key))

        # 保存为XLSX文件
        output_path_excel = output_path.replace('.csv', '.xlsx')
        result_df[columns].to_excel(
            output_path_excel, 
            index=False, 
            engine='openpyxl'  # 指定引擎，确保支持xlsx格式
        )
        print(f"已处理文件：{output_path_excel}")


In [4]:
# 执行处理
process_files()

已处理文件：law_to_MEU/st_4_MEU_relations/MEU_with_relation/北京证券交易所上市公司持续监管指引第8号——股份减持和持股管理.xlsx
已处理文件：law_to_MEU/st_4_MEU_relations/MEU_with_relation/北京证券交易所上市公司持续监管指引第5号——要约收购.xlsx
已处理文件：law_to_MEU/st_4_MEU_relations/MEU_with_relation/北京证券交易所上市公司持续监管指引第1号——独立董事.xlsx
已处理文件：law_to_MEU/st_4_MEU_relations/MEU_with_relation/北京证券交易所上市公司持续监管指引第4号——股份回购.xlsx
已处理文件：law_to_MEU/st_4_MEU_relations/MEU_with_relation/北京证券交易所上市公司持续监管指引第10号——权益分派.xlsx


# 地址结构

In [12]:
import os

def print_directory_tree(
    start_path=None, 
    indent="", 
    show_files=True, 
    prefix=""
):
    """
    打印目录结构树，可选择是否包含文件
    
    Args:
        start_path (str): 起始路径，默认为当前工作目录
        indent (str): 缩进字符（内部递归使用）
        show_files (bool): 是否显示文件，默认 True
        prefix (str): 每行前缀（内部递归使用）
    """
    if start_path is None:
        start_path = os.getcwd()
    
    # 打印当前目录名
    dir_name = os.path.basename(start_path)
    print(prefix + indent + dir_name + "/")
    
    new_indent = indent + "    "
    try:
        items = sorted(os.listdir(start_path))
    except PermissionError:
        print(prefix + new_indent + "[Permission Denied]")
        return
    
    for item in items:
        item_path = os.path.join(start_path, item)
        if os.path.isdir(item_path):
            # 递归处理子目录
            print_directory_tree(
                item_path, 
                new_indent, 
                show_files, 
                prefix
            )
        elif show_files:
            # 如果是文件，并且 show_files=True，则打印
            print(prefix + new_indent + item)


print_directory_tree(
    start_path=None, 
    indent="", 
    show_files=False, 
    # show_files=True, 
    prefix=""
)

meu_graph_v2/
    MEU_to_code/
        MEU_code/
            GT/
            raw_response/
        MEU_selected_with_relation_GT/
    __pycache__/
    converter/
    data_simulation/
        data_generated/
        data_labeled/
    law_to_MEU/
        st_0_law_docx/
            with_appendix/
        st_1_law_csv/
        st_2_law_keywords_definitions/
            GT/
            raw_response/
        st_3_0_MEU/
            GT/
            raw_response/
        st_3_1_inner_relations/
            GT/
            raw_response/
        st_4_MEU_relations/
            MEU_with_relation/
                GT/
            raw_response/
        st_5_MEU_Graph_HTML/
            GT/
        st_6_MEU_evaluate/
            GT/
            raw_response/
