In [2]:
import pandas as pd
from openpyxl import load_workbook
import time

# ---------- 基础函数 ----------
def load_excel(file_path, sheet_name=0):
    """读入文件：默认第1个sheet，无需指定"""
    return pd.read_excel(file_path, sheet_name=sheet_name, dtype=str)

def write_fields_to_row(ws, row_num, data, field_mapping):
    for field, col in field_mapping.items():
        if field in data:
            ws[f"{col}{row_num}"] = data[field]

def consolidate_source_data(df, fields_to_consolidate):
    """
    按 ID 聚合指定字段；如果某字段在源表里根本不存在，则给它留空字符串，不报错。
    """
    consolidated = {}
    total = len(df)
    # 先拿到源表真实列名（统一大写，避免大小写问题）
    real_cols = {c.upper() for c in df.columns}

    print(f"开始整合源数据，共 {total} 行...")
    for idx, row in df.iterrows():
        name = str(row['ID']).strip().upper() if pd.notna(row['ID']) else None
        if not name:
            continue

        # 首次遇到该 ID
        consolidated.setdefault(name, {f: set() for f in fields_to_consolidate})

        # 只处理“源表真正存在”的字段
        for f in fields_to_consolidate:
            if f.upper() not in real_cols:   # 源表没有这一列，直接跳过
                continue
            val = row.get(f)                # 用 get 防止 KeyError
            if pd.notna(val) and str(val).strip():
                consolidated[name][f].add(str(val).strip())

        # 进度条
        if idx % 1000 == 0:
            print(f"整合进度: {idx+1}/{total} ({(idx+1)/total*100:.1f}%)")

    # 集合 → 字符串
    for name, data in consolidated.items():
        for f in fields_to_consolidate:
            consolidated[name][f] = "; ".join(data[f]) if data[f] else ""

    print(f"整合完成，共 {len(consolidated)} 个唯一ID")
    return consolidated

def print_progress(current, total, start_time, operation="处理"):
    elapsed = time.time() - start_time
    progress = (current / total) * 100
    remaining = elapsed * total / current - elapsed if current else 0
    print(f"{operation}进度: {current}/{total} ({progress:.1f}%) - 已用: {elapsed:.1f}s, 剩余: {remaining:.1f}s")

# ---------- 字段清单（V00 基线，按用户给定顺序） ----------
version='V99'
field_list = [
    f"{version}EDDVSPR",
    f"{version}ELKVSPR",
    f"{version}ERKVSPR"
]
#field_list=["V99ELKVSRP","V99ERKVSRP","V99EDDVSPR"]
def col_letter(n):
    letter = ""
    while n:
        n, rem = divmod(n - 1, 26)
        letter = chr(65 + rem) + letter
    return letter


field_mapping = {f.upper(): col_letter(i + 4) for i, f in enumerate(field_list)}
fields_to_consolidate = list(field_mapping.keys())


# ---------- 路径 ----------
src_file = r"C:\Users\DXW\OAI data\OAIdatabase\General\Outcomes99.xlsx"
tgt_file = r"C:\Users\DXW\Desktop\半月板手术_信息.xlsx"
tgt_sheet = '12m'   # ← 指定目标 sheet 名称

# ---------- 主流程 ----------
start_time = time.time()
print("=" * 50)
print("程序开始执行")
print("=" * 50)

# 读源（不指定 sheet，默认第 1 个）
df = load_excel(src_file)
df.columns = df.columns.str.strip().str.upper()  
consolidated_data = consolidate_source_data(df, fields_to_consolidate)

# 读目标（指定 sheet）
wb = load_workbook(tgt_file)
ws = wb[tgt_sheet]


# 构建 ID→行号映射（不区分大小写）
acc_number_to_rows = {}
for row_num, row in enumerate(ws.iter_rows(min_row=2, min_col=1, max_col=2), start=2):
    acc = str(row[0].value).strip().upper() if row[0].value is not None else None
    if acc and acc != 'NONE':
        acc_number_to_rows.setdefault(acc, []).append(row_num)

matched_names = [n for n in consolidated_data if n in acc_number_to_rows]
total_matched_rows = sum(len(acc_number_to_rows[n]) for n in matched_names)
print(f"\n匹配到 {len(matched_names)} 个ID，共 {total_matched_rows} 行待写入")

# 写入
write_start = time.time()
i = 0
for name_idx, (name, data) in enumerate(consolidated_data.items()):
    if name in acc_number_to_rows:
        for row_num in acc_number_to_rows[name]:
            i += 1
            write_fields_to_row(ws, row_num, data, field_mapping)
            if i % 100 == 0:
                print_progress(i, total_matched_rows, write_start, "写入")

if i > 0:
    print_progress(i, total_matched_rows, write_start, "写入")

# 保存
print("\n正在保存...")
wb.save(tgt_file)
print(f"保存完成！总耗时 {time.time() - start_time:.1f}秒")
print("=" * 50)

程序开始执行
开始整合源数据，共 4796 行...
整合进度: 1/4796 (0.0%)
整合进度: 1001/4796 (20.9%)
整合进度: 2001/4796 (41.7%)
整合进度: 3001/4796 (62.6%)
整合进度: 4001/4796 (83.4%)
整合完成，共 4796 个唯一ID

匹配到 68 个ID，共 68 行待写入
写入进度: 68/68 (100.0%) - 已用: 0.0s, 剩余: 0.0s

正在保存...
保存完成！总耗时 15.2秒


In [3]:
import pandas as pd
from openpyxl import load_workbook
import time

# ---------- 基础函数 ----------
def load_excel(file_path, sheet_name=0):
    """读入文件：默认第1个sheet，无需指定"""
    return pd.read_excel(file_path, sheet_name=sheet_name, dtype=str)

def write_fields_to_row(ws, row_num, data, field_mapping):
    for field, col in field_mapping.items():
        if field in data:
            ws[f"{col}{row_num}"] = data[field]

def consolidate_source_data(df, fields_to_consolidate, versions=["V00", "P01"]):
    """
    按 ID 聚合指定字段；尝试多个版本匹配字段
    """
    consolidated = {}
    total = len(df)
    # 先拿到源表真实列名（统一大写，避免大小写问题）
    real_cols = {c.upper() for c in df.columns}
    
    # 提取基础字段名（去掉版本前缀）
    base_fields = []
    field_version_map = {}  # 基础字段名 -> 完整字段名（带V00前缀）
    
    for full_field in fields_to_consolidate:
        # 找到基础字段名（去掉前3个字符的版本前缀）
        for version in versions:
            if full_field.startswith(version):
                base_field = full_field[len(version):]
                base_fields.append(base_field)
                field_version_map[base_field] = full_field
                break
    
    print(f"开始整合源数据，共 {total} 行，使用版本: {versions}...")
    
    for idx, row in df.iterrows():
        name = str(row['ID']).strip().upper() if pd.notna(row['ID']) else None
        if not name:
            continue

        # 首次遇到该 ID
        if name not in consolidated:
            consolidated[name] = {field_version_map[bf]: set() for bf in base_fields}

        # 处理每个基础字段
        for base_field in base_fields:
            full_field = field_version_map[base_field]
            
            # 按版本顺序尝试匹配
            for version in versions:
                field_to_check = version + base_field
                if field_to_check.upper() in real_cols:  # 检查源表是否有这个字段
                    val = row.get(field_to_check)  # 用 get 防止 KeyError
                    if pd.notna(val) and str(val).strip():
                        consolidated[name][full_field].add(str(val).strip())
                        break  # 找到值就跳出版本循环

        # 进度条
        if idx % 1000 == 0:
            print(f"整合进度: {idx+1}/{total} ({(idx+1)/total*100:.1f}%)")

    # 集合 → 字符串
    for name, data in consolidated.items():
        for full_field in data:
            consolidated[name][full_field] = "; ".join(data[full_field]) if data[full_field] else ""

    print(f"整合完成，共 {len(consolidated)} 个唯一ID")
    return consolidated

def print_progress(current, total, start_time, operation="处理"):
    elapsed = time.time() - start_time
    progress = (current / total) * 100
    remaining = elapsed * total / current - elapsed if current else 0
    print(f"{operation}进度: {current}/{total} ({progress:.1f}%) - 已用: {elapsed:.1f}s, 剩余: {remaining:.1f}s")

# ---------- 字段清单（使用V00作为主版本，但会尝试P01） ----------
version='V00'
versions_to_try = ["V00", "P01"]  # 定义要尝试的版本顺序

field_list = [
    f"{version}SF1",
    f"{version}SF2",
    f"{version}SF3",
    f"{version}SF4",
    f"{version}SF5",
    f"{version}SF6",
    f"{version}SF7",
    f"{version}SF8",
    f"{version}SF9",
    f"{version}SF10",
    f"{version}SF11",
    f"{version}SF12",
    f"{version}KPNR12",
    f"{version}KPNR12M",
    f"{version}KPNL12",
    f"{version}KPNL12M",
    f"{version}KPACT30",
    f"{version}WPRKN1",
    f"{version}WPRKN2",
    f"{version}WPRKN3",
    f"{version}WPRKN4",
    f"{version}WPRKN5",
    f"{version}KPRKN1",
    f"{version}KPRKN2",
    f"{version}KPRKN3",
    f"{version}P7RKFR",
    f"{version}WSRKN1",
    f"{version}WSRKN2",
    f"{version}KSXRKN1",
    f"{version}KSXRKN2",
    f"{version}KSXRKN3",
    f"{version}KSXRKN4",
    f"{version}KSXRKN5",
    f"{version}DIRKN1",
    f"{version}DIRKN2",
    f"{version}DIRKN3",
    f"{version}DIRKN4",
    f"{version}DIRKN5",
    f"{version}DIRKN6",
    f"{version}DIRKN7",
    f"{version}DIRKN8",
    f"{version}DIRKN9",
    f"{version}DIRKN10",
    f"{version}DIRKN11",
    f"{version}DIRKN12",
    f"{version}DIRKN13",
    f"{version}DIRKN14",
    f"{version}DIRKN15",
    f"{version}DIRKN16",
    f"{version}DIRKN17",
    f"{version}WPLKN1",
    f"{version}WPLKN2",
    f"{version}WPLKN3",
    f"{version}WPLKN4",
    f"{version}WPLKN5",
    f"{version}KPLKN1",
    f"{version}KPLKN2",
    f"{version}KPLKN3",
    f"{version}P7LKFR",
    f"{version}WSLKN1",
    f"{version}WSLKN2",
    f"{version}KSXLKN1",
    f"{version}KSXLKN2",
    f"{version}KSXLKN3",
    f"{version}KSXLKN4",
    f"{version}KSXLKN5",
    f"{version}DILKN1",
    f"{version}DILKN2",
    f"{version}DILKN3",
    f"{version}DILKN4",
    f"{version}DILKN5",
    f"{version}DILKN6",
    f"{version}DILKN7",
    f"{version}DILKN8",
    f"{version}DILKN9",
    f"{version}DILKN10",
    f"{version}DILKN11",
    f"{version}DILKN12",
    f"{version}DILKN13",
    f"{version}DILKN14",
    f"{version}DILKN15",
    f"{version}DILKN16",
    f"{version}DILKN17",
    f"{version}KOOSFX1",
    f"{version}KOOSFX2",
    f"{version}KOOSFX3",
    f"{version}KOOSFX4",
    f"{version}KOOSFX5",
    f"{version}KQOL1",
    f"{version}KQOL2",
    f"{version}KQOL3",
    f"{version}KQOL4",
    f"{version}KGLRS",
    f"{version}HPNR12",
    f"{version}HPNRIL",
    f"{version}HPNROL",
    f"{version}HPNRFL",
    f"{version}HPNRB",
    f"{version}HPNRLB",
    f"{version}HPNRDK",
    f"{version}HPNL12",
    f"{version}HPNLIL",
    f"{version}HPNLOL",
    f"{version}HPNLFL",
    f"{version}HPNLB",
    f"{version}HPNLLB",
    f"{version}HPNLDK",
    f"{version}BP30",
    f"{version}BP30OFT",
    f"{version}BPBAD",
    f"{version}BPUB",
    f"{version}BPMB",
    f"{version}BPLB",
    f"{version}BPB",
    f"{version}BPDK",
    f"{version}OJPNRS",
    f"{version}OJPNLS",
    f"{version}OJPNRE",
    f"{version}OJPNLE",
    f"{version}OJPNRW",
    f"{version}OJPNLW",
    f"{version}OJPNRH",
    f"{version}OJPNLH",
    f"{version}OJPNRA",
    f"{version}OJPNLA",
    f"{version}OJPNRF",
    f"{version}OJPNLF",
    f"{version}OJPNNK",
    f"{version}OJPNNO",
    f"{version}WOMTSL",
    f"{version}LKSX",
    f"{version}KPA30CV",
    f"{version}KPL12CV",
    f"{version}BPBEDCV",
    f"{version}BPTOT",
    f"{version}RKP30CV",
    f"{version}KPACDCV",
    f"{version}WOMSTFL",
    f"{version}PMLKRCV",
    f"{version}KOOSYML",
    f"{version}HPR12CV",
    f"{version}KOOSKPL",
    f"{version}KOOSFSR",
    f"{version}P7LKRCV",
    f"{version}BPDAYCV",
    f"{version}RKSX",
    f"{version}WOMSTFR",
    f"{version}KOOSYMR",
    f"{version}KOOSKPR",
    f"{version}HSMSS",
    f"{version}WOMKPR",
    f"{version}KOOSQOL",
    f"{version}KPR30CV",
    f"{version}WOMADLR",
    f"{version}HPL12CV",
    f"{version}KPR12CV",
    f"{version}WOMTSR",
    f"{version}KPACTCV",
    f"{version}WOMKPL",
    f"{version}LKP30CV",
    f"{version}PMRKRCV",
    f"{version}HSPSS",
    f"{version}WOMADLL",
    f"{version}P7RKRCV",
    f"{version}KPL30CV",
    f"{version}BPACTCV",
    f"{version}KSX",
    f"{version}BONFX",
    f"{version}FALL",
    f"{version}CESD1",
    f"{version}CESD2",
    f"{version}CESD3",
    f"{version}CESD4",
    f"{version}CESD5",
    f"{version}CESD6",
    f"{version}CESD7",
    f"{version}CESD8",
    f"{version}CESD9",
    f"{version}CESD10",
    f"{version}CESD11",
    f"{version}CESD12",
    f"{version}CESD13",
    f"{version}CESD14",
    f"{version}CESD15",
    f"{version}CESD16",
    f"{version}CESD17",
    f"{version}CESD18",
    f"{version}CESD19",
    f"{version}CESD20",
    f"{version}ARTH12",
    f"{version}ARTDOC",
    f"{version}RAIA12",
    f"{version}KPMED",
    f"{version}INJR12",
    f"{version}INJL12",
    f"{version}HRS12",
    f"{version}TYLEN",
    f"{version}NSAIDS",
    f"{version}NSAIDRX",
    f"{version}COXIBS",
    f"{version}NARCOT",
    f"{version}SAME",
    f"{version}MSM",
    f"{version}DOXYCYC",
    f"{version}PNMEDT",
    f"{version}CHON",
    f"{version}GLUC",
    f"{version}KNINJ",
    f"{version}HYALKN",
    f"{version}STERKN",
    f"{version}TEST",
    f"{version}TESTUSE",
    f"{version}ESTR",
    f"{version}ESTRUSE",
    f"{version}GNRH",
    f"{version}GNRHUSE",
    f"{version}PTH",
    f"{version}PTHUSE",
    f"{version}BISPHOS",
    f"{version}BISPYRS",
    f"{version}BISPUSE",
    f"{version}RX30",
    f"{version}RX30NUM",
    f"{version}RXFLUOR",
    f"{version}RXCLCTN",
    f"{version}RXBISPH",
    f"{version}STRINJR",
    f"{version}HYAINJL",
    f"{version}STINJCV",
    f"{version}RXCLCXB",
    f"{version}CESD",
    f"{version}OTA12CV",
    f"{version}OAH12CV",
    f"{version}RXIHYAL",
    f"{version}HYAINJR",
    f"{version}RXNTRAT",
    f"{version}OAO12CV",
    f"{version}STRINJL",
    f"{version}RXASPRN",
    f"{version}RXCOX2",
    f"{version}OAF12CV",
    f"{version}HYINJCV",
    f"{version}GLCFQCV",
    f"{version}RXVLCXB",
    f"{version}CHNFQCV",
    f"{version}FALLCV",
    f"{version}OAD12CV",
    f"{version}RXISTRD",
    f"{version}RXMSM",
    f"{version}RXNARC",
    f"{version}RXRFCXB",
    f"{version}RXCHOND",
    f"{version}RXACTM",
    f"{version}RASTAFU",
    f"{version}RXGLCSM",
    f"{version}KPMEDCV",
    f"{version}OAB12CV",
    f"{version}BISPTYP",
    f"{version}ARTDRCV",
    f"{version}RXANALG",
    f"{version}RXSALIC",
    f"{version}RXNSAID",
    f"{version}RXTPRTD",
    f"{version}RXSAME",
    f"{version}RXRALOX",
    f"{version}GT12CV",
    f"{version}RXOSTRD",
    f"{version}RXVIT_D",
    f"{version}RXOTHAN",
    f"{version}P30VT1",
    f"{version}P30VT2",
    f"{version}P30VT3",
    f"{version}P30VT4",
    f"{version}P30VT5",
    f"{version}P30VT6",
    f"{version}P30VT7",
    f"{version}P30VT8",
    f"{version}P30VT9",
    f"{version}P30VT10",
    f"{version}P30VITC",
    f"{version}VITCAMT",
    f"{version}P30VITD",
    f"{version}VITDAMT",
    f"{version}P30VITE",
    f"{version}VITEAMT",
    f"{version}BPSYS",
    f"{version}BPDIAS",
    f"{version}CSTSGL",
    f"{version}CSTREP1",
    f"{version}CSTIME1",
    f"{version}CSTREP2",
    f"{version}CSTIME2",
    f"{version}KPRK20B",
    f"{version}KPLK20B",
    f"{version}WLK20T1",
    f"{version}STEPST1",
    f"{version}TIMET1",
    f"{version}WLK20T2",
    f"{version}STEPST2",
    f"{version}TIMET2",
    f"{version}WLKAID",
    f"{version}KPRK20D",
    f"{version}KPLK20D",
    f"{version}20MPACE",
    f"{version}WEIGHT",
    f"{version}BMI",
    f"{version}HLTHCAR",
    f"{version}HLTHCOV",
    f"{version}MEDINS",
    f"{version}PA230",
    f"{version}PA330",
    f"{version}PA530",
    f"{version}PA430",
    f"{version}PA130",
    f"{version}PASE1",
    f"{version}PASE1HR",
    f"{version}PASE2",
    f"{version}PASE2HR",
    f"{version}PASE3",
    f"{version}PASE3HR",
    f"{version}PASE4",
    f"{version}PASE4HR",
    f"{version}PASE5",
    f"{version}PASE5HR",
    f"{version}PASE6",
    f"{version}PASE6HR",
    f"{version}HOUACT1",
    f"{version}HOUACT2",
    f"{version}HOUACT3",
    f"{version}HOUACT4",
    f"{version}HOUACT5",
    f"{version}HOUACT6",
    f"{version}WORK7",
    f"{version}WORKAMT",
    f"{version}CUREMP",
    f"{version}WEEKWK",
    f"{version}HOURWK",
    f"{version}MISSWK",
    f"{version}PA530CV",
    f"{version}PA330CV",
    f"{version}PA130CV",
    f"{version}CEMPLOY",
    f"{version}PASE",
    f"{version}AGE",
    f"{version}PA430CV",
    f"{version}PA230CV",
    f"{version}WKHR7CV"
]

def col_letter(n):
    letter = ""
    while n:
        n, rem = divmod(n - 1, 26)
        letter = chr(65 + rem) + letter
    return letter

# 创建字段映射（使用V00版本的字段名作为输出列）
field_mapping = {f.upper(): col_letter(i + 7) for i, f in enumerate(field_list)}
fields_to_consolidate = list(field_mapping.keys())

# ---------- 路径 ----------
src_file = r"C:\Users\DXW\OAI data\OAIdatabase\Allclinical\Allclinical00.xlsx"
tgt_file = r"C:\Users\DXW\Desktop\半月板手术_信息.xlsx"
tgt_sheet = '12m'   # ← 指定目标 sheet 名称

# ---------- 主流程 ----------
start_time = time.time()
print("=" * 50)
print("程序开始执行")
print("=" * 50)

# 读源（不指定 sheet，默认第 1 个）
df = load_excel(src_file)
df.columns = df.columns.str.strip().str.upper()

# 使用多版本整合数据
consolidated_data = consolidate_source_data(df, fields_to_consolidate, versions=["V00", "P01"])

# 读目标（指定 sheet）
wb = load_workbook(tgt_file)
ws = wb[tgt_sheet]

# 构建 ID→行号映射（不区分大小写）
acc_number_to_rows = {}
for row_num, row in enumerate(ws.iter_rows(min_row=2, min_col=1, max_col=2), start=2):
    acc = str(row[0].value).strip().upper() if row[0].value is not None else None
    if acc and acc != 'NONE':
        acc_number_to_rows.setdefault(acc, []).append(row_num)

matched_names = [n for n in consolidated_data if n in acc_number_to_rows]
total_matched_rows = sum(len(acc_number_to_rows[n]) for n in matched_names)
print(f"\n匹配到 {len(matched_names)} 个ID，共 {total_matched_rows} 行待写入")

# 写入
write_start = time.time()
i = 0
for name_idx, (name, data) in enumerate(consolidated_data.items()):
    if name in acc_number_to_rows:
        for row_num in acc_number_to_rows[name]:
            i += 1
            write_fields_to_row(ws, row_num, data, field_mapping)
            if i % 100 == 0:
                print_progress(i, total_matched_rows, write_start, "写入")

if i > 0:
    print_progress(i, total_matched_rows, write_start, "写入")

# 保存
print("\n正在保存...")
wb.save(tgt_file)
print(f"保存完成！总耗时 {time.time() - start_time:.1f}秒")
print("=" * 50)

程序开始执行
开始整合源数据，共 4796 行，使用版本: ['V00', 'P01']...
整合进度: 1/4796 (0.0%)
整合进度: 1001/4796 (20.9%)
整合进度: 2001/4796 (41.7%)
整合进度: 3001/4796 (62.6%)
整合进度: 4001/4796 (83.4%)
整合完成，共 4796 个唯一ID

匹配到 68 个ID，共 68 行待写入
写入进度: 68/68 (100.0%) - 已用: 0.2s, 剩余: 0.0s

正在保存...
保存完成！总耗时 203.7秒


In [6]:
import pandas as pd
from openpyxl import load_workbook
import time

# ---------- 基础函数 ----------
def load_excel(file_path, sheet_name=0):
    """读入文件：默认第1个sheet，无需指定"""
    return pd.read_excel(file_path, sheet_name=sheet_name, dtype=str)

def write_fields_to_row(ws, row_num, data, field_mapping):
    for field, col in field_mapping.items():
        if field in data:
            ws[f"{col}{row_num}"] = data[field]

def consolidate_source_data(df, fields_to_consolidate, versions=["V00", "P01"]):
    """
    按 ID 聚合指定字段；尝试多个版本匹配字段
    """
    consolidated = {}
    total = len(df)
    # 先拿到源表真实列名（统一大写，避免大小写问题）
    real_cols = {c.upper() for c in df.columns}
    
    # 提取基础字段名（去掉版本前缀）
    base_fields = []
    field_version_map = {}  # 基础字段名 -> 完整字段名（带V00前缀）
    
    for full_field in fields_to_consolidate:
        # 找到基础字段名（去掉前3个字符的版本前缀）
        for version in versions:
            if full_field.startswith(version):
                base_field = full_field[len(version):]
                base_fields.append(base_field)
                field_version_map[base_field] = full_field
                break
    
    print(f"开始整合源数据，共 {total} 行，使用版本: {versions}...")
    
    for idx, row in df.iterrows():
        name = str(row['ID']).strip().upper() if pd.notna(row['ID']) else None
        if not name:
            continue

        # 首次遇到该 ID
        if name not in consolidated:
            consolidated[name] = {field_version_map[bf]: set() for bf in base_fields}

        # 处理每个基础字段
        for base_field in base_fields:
            full_field = field_version_map[base_field]
            
            # 按版本顺序尝试匹配
            for version in versions:
                field_to_check = version + base_field
                if field_to_check.upper() in real_cols:  # 检查源表是否有这个字段
                    val = row.get(field_to_check)  # 用 get 防止 KeyError
                    if pd.notna(val) and str(val).strip():
                        consolidated[name][full_field].add(str(val).strip())
                        break  # 找到值就跳出版本循环

        # 进度条
        if idx % 1000 == 0:
            print(f"整合进度: {idx+1}/{total} ({(idx+1)/total*100:.1f}%)")

    # 集合 → 字符串
    for name, data in consolidated.items():
        for full_field in data:
            consolidated[name][full_field] = "; ".join(data[full_field]) if data[full_field] else ""

    print(f"整合完成，共 {len(consolidated)} 个唯一ID")
    return consolidated

def print_progress(current, total, start_time, operation="处理"):
    elapsed = time.time() - start_time
    progress = (current / total) * 100
    remaining = elapsed * total / current - elapsed if current else 0
    print(f"{operation}进度: {current}/{total} ({progress:.1f}%) - 已用: {elapsed:.1f}s, 剩余: {remaining:.1f}s")

# ---------- 字段清单（使用V00作为主版本，但会尝试P01） ----------

versions_to_try = ["V00"]  # 定义要尝试的版本顺序

field_list = [
    f"{version}MRSIDE",
    f"{version}MRCOMP",
]

def col_letter(n):
    letter = ""
    while n:
        n, rem = divmod(n - 1, 26)
        letter = chr(65 + rem) + letter
    return letter

# 创建字段映射（使用V00版本的字段名作为输出列）
field_mapping = {f.upper(): col_letter(i + 8) for i, f in enumerate(field_list)}
fields_to_consolidate = list(field_mapping.keys())

# ---------- 路径 ----------
src_file = r"C:\Users\DXW\Desktop\新建 Microsoft Excel 工作表.xlsx"
tgt_file = r"C:\Users\DXW\Desktop\半月板手术_信息.xlsx"
tgt_sheet = '12m'   # ← 指定目标 sheet 名称

# ---------- 主流程 ----------
start_time = time.time()
print("=" * 50)
print("程序开始执行")
print("=" * 50)

# 读源（不指定 sheet，默认第 1 个）
df = load_excel(src_file)
df.columns = df.columns.str.strip().str.upper()

# 使用多版本整合数据
consolidated_data = consolidate_source_data(df, fields_to_consolidate, versions_to_try)

# 读目标（指定 sheet）
wb = load_workbook(tgt_file)
ws = wb[tgt_sheet]

# 构建 ID→行号映射（不区分大小写）
acc_number_to_rows = {}
for row_num, row in enumerate(ws.iter_rows(min_row=2, min_col=1, max_col=2), start=2):
    acc = str(row[0].value).strip().upper() if row[0].value is not None else None
    if acc and acc != 'NONE':
        acc_number_to_rows.setdefault(acc, []).append(row_num)

matched_names = [n for n in consolidated_data if n in acc_number_to_rows]
total_matched_rows = sum(len(acc_number_to_rows[n]) for n in matched_names)
print(f"\n匹配到 {len(matched_names)} 个ID，共 {total_matched_rows} 行待写入")

# 写入
write_start = time.time()
i = 0
for name_idx, (name, data) in enumerate(consolidated_data.items()):
    if name in acc_number_to_rows:
        for row_num in acc_number_to_rows[name]:
            i += 1
            write_fields_to_row(ws, row_num, data, field_mapping)
            if i % 100 == 0:
                print_progress(i, total_matched_rows, write_start, "写入")

if i > 0:
    print_progress(i, total_matched_rows, write_start, "写入")

# 保存
print("\n正在保存...")
wb.save(tgt_file)
print(f"保存完成！总耗时 {time.time() - start_time:.1f}秒")
print("=" * 50)

程序开始执行
开始整合源数据，共 64513 行，使用版本: ['V00']...
整合进度: 1/64513 (0.0%)
整合进度: 1001/64513 (1.6%)
整合进度: 2001/64513 (3.1%)
整合进度: 3001/64513 (4.7%)
整合进度: 4001/64513 (6.2%)
整合进度: 5001/64513 (7.8%)
整合进度: 6001/64513 (9.3%)
整合进度: 7001/64513 (10.9%)
整合进度: 8001/64513 (12.4%)
整合进度: 9001/64513 (14.0%)
整合进度: 10001/64513 (15.5%)
整合进度: 11001/64513 (17.1%)
整合进度: 12001/64513 (18.6%)
整合进度: 13001/64513 (20.2%)
整合进度: 14001/64513 (21.7%)
整合进度: 15001/64513 (23.3%)
整合进度: 16001/64513 (24.8%)
整合进度: 17001/64513 (26.4%)
整合进度: 18001/64513 (27.9%)
整合进度: 19001/64513 (29.5%)
整合进度: 20001/64513 (31.0%)
整合进度: 21001/64513 (32.6%)
整合进度: 22001/64513 (34.1%)
整合进度: 23001/64513 (35.7%)
整合进度: 24001/64513 (37.2%)
整合进度: 25001/64513 (38.8%)
整合进度: 26001/64513 (40.3%)
整合进度: 27001/64513 (41.9%)
整合进度: 28001/64513 (43.4%)
整合进度: 29001/64513 (45.0%)
整合进度: 30001/64513 (46.5%)
整合进度: 31001/64513 (48.1%)
整合进度: 32001/64513 (49.6%)
整合进度: 33001/64513 (51.2%)
整合进度: 34001/64513 (52.7%)
整合进度: 35001/64513 (54.3%)
整合进度: 36001/64513 (55.8%)
整合进度: 37001/6451

In [12]:
import pandas as pd
from openpyxl import load_workbook
import time
import re

# ---------- 基础函数 ----------
def load_excel(file_path, sheet_name=0):
    return pd.read_excel(file_path, sheet_name=sheet_name, dtype=str)

def write_fields_to_row(ws, row_num, data, field_mapping):
    for field, col in field_mapping.items():
        if field in data:
            ws[f"{col}{row_num}"] = data[field]

# 仅保留数字1或2
SIDE_PATTERN = re.compile(r'[12]')
def normalize_side(side: str) -> str:
    m = SIDE_PATTERN.search(str(side).upper()) if side else None
    return m.group(0) if m else ''

def consolidate_source_data(df, fields_to_consolidate, versions):
    """
    按 (ID, SIDE) 复合键聚合指定字段；SIDE仅保留1/2
    """
    consolidated = {}
    real_cols = {c.upper() for c in df.columns}

    base_fields, fv_map = [], {}
    for full_field in fields_to_consolidate:
        for v in versions:
            if full_field.startswith(v):
                bf = full_field[len(v):]
                base_fields.append(bf)
                fv_map[bf] = full_field
                break

    total = len(df)
    print(f"开始整合源数据，共 {total} 行，使用版本: {versions}...")
    for idx, row in df.iterrows():
        uid = str(row['ID']).strip().upper() if pd.notna(row['ID']) else None
        side_raw = str(row['SIDE']).strip() if pd.notna(row['SIDE']) else ''
        side = normalize_side(side_raw)
        if not uid or not side:
            continue

        key = (uid, side)
        if key not in consolidated:
            consolidated[key] = {fv_map[bf]: set() for bf in base_fields}

        for bf in base_fields:
            full_field = fv_map[bf]
            for v in versions:
                chk = v + bf
                if chk.upper() in real_cols:
                    val = row.get(chk)
                    if pd.notna(val) and str(val).strip():
                        consolidated[key][full_field].add(str(val).strip())
                        break

        if idx % 1000 == 0:
            print(f"整合进度: {idx+1}/{total} ({(idx+1)/total*100:.1f}%)")

    for k, data in consolidated.items():
        for f in data:
            consolidated[k][f] = "; ".join(data[f]) if data[f] else ""
    print(f"整合完成，共 {len(consolidated)} 组 (ID,SIDE)")
    return consolidated

def print_progress(current, total, start_time, operation="处理"):
    elapsed = time.time() - start_time
    progress = (current / total) * 100
    remaining = elapsed * total / current - elapsed if current else 0
    print(f"{operation}进度: {current}/{total} ({progress:.1f}%) - 已用: {elapsed:.1f}s, 剩余: {remaining:.1f}s")

# ---------- 字段清单（保持原样） ----------
versions_to_try = ["V05"]
field_list = [
    f"{versions_to_try[0]}CFWDTH",
    f"{versions_to_try[0]}MCMJSW",
    f"{versions_to_try[0]}JSW175",
    f"{versions_to_try[0]}JSW200",
    f"{versions_to_try[0]}JSW250",
    f"{versions_to_try[0]}BARCDJD",
    f"{versions_to_try[0]}JSW300",
    f"{versions_to_try[0]}JSW225",
    f"{versions_to_try[0]}TPCFDS",
    f"{versions_to_try[0]}BMANG",
    f"{versions_to_try[0]}JSW150",
    f"{versions_to_try[0]}JSW275",
    f"{versions_to_try[0]}LJSW850",
    f"{versions_to_try[0]}LJSW900",
    f"{versions_to_try[0]}LJSW700",
    f"{versions_to_try[0]}LJSW825",
    f"{versions_to_try[0]}LJSW750",
    f"{versions_to_try[0]}LJSW875",
    f"{versions_to_try[0]}LJSW725",
    f"{versions_to_try[0]}LJSW775",
    f"{versions_to_try[0]}LJSW800",
    f"{versions_to_try[0]}XMJSW"
]

def col_letter(n):
    letter = ""
    while n:
        n, rem = divmod(n - 1, 26)
        letter = chr(65 + rem) + letter
    return letter

field_mapping = {f.upper(): col_letter(i + 8) for i, f in enumerate(field_list)}
fields_to_consolidate = list(field_mapping.keys())

# ---------- 路径（保持原样） ----------
src_file = r"C:\Users\DXW\OAI data\OAIdatabase\X-ray Quant\kxr_qjsw_rel_duryea05.xlsx"
tgt_file = r"C:\Users\DXW\Desktop\半月板手术_信息.xlsx"
tgt_sheet = '24m'

# ---------- 主流程 ----------
start_time = time.time()
print("=" * 50)
print("程序开始执行")
print("=" * 50)

df = load_excel(src_file)
df.columns = df.columns.str.strip().str.upper()

consolidated_data = consolidate_source_data(df, fields_to_consolidate, versions_to_try)

wb = load_workbook(tgt_file)
ws = wb[tgt_sheet]

# 目标表 SIDE 已是严格 1/2，直接按原值建索引
key_to_rows = {}
for row_num, row in enumerate(ws.iter_rows(min_row=2, min_col=1, max_col=3), start=2):
    uid = str(row[0].value).strip().upper() if row[0].value else None
    side = str(row[1].value).strip() if row[1].value else ''
    if uid and side in {'1', '2'}:
        key_to_rows.setdefault((uid, side), []).append(row_num)

matched_keys = [k for k in consolidated_data if k in key_to_rows]
total_rows = sum(len(key_to_rows[k]) for k in matched_keys)
print(f"\n匹配到 {len(matched_keys)} 组 (ID,SIDE)，共 {total_rows} 行待写入")

write_start = time.time()
i = 0
for key, data in consolidated_data.items():
    if key in key_to_rows:
        for row_num in key_to_rows[key]:
            i += 1
            write_fields_to_row(ws, row_num, data, field_mapping)
            if i % 100 == 0:
                print_progress(i, total_rows, write_start, "写入")
if i > 0:
    print_progress(i, total_rows, write_start, "写入")

print("\n正在保存...")
wb.save(tgt_file)
print(f"保存完成！总耗时 {time.time() - start_time:.1f}秒")
print("=" * 50)

程序开始执行
开始整合源数据，共 144 行，使用版本: ['V05']...
整合进度: 1/144 (0.7%)
整合完成，共 72 组 (ID,SIDE)

匹配到 1 组 (ID,SIDE)，共 1 行待写入
写入进度: 1/1 (100.0%) - 已用: 0.0s, 剩余: 0.0s

正在保存...
保存完成！总耗时 0.4秒
