In [3]:
# -*- coding: utf-8 -*-
"""
膝关节同侧/对侧变量重排（枚举法 + 一次性向量化赋值）
不清洗、不保留原左右列、不 apply
"""
import pandas as pd
import numpy as np

# ========== 用户只需改这里 ==========
input_file = r"C:\Users\DXW\Desktop\半月板部分切除术\OAI test\test 2\半月板手术_predictor_processed.xlsx"
id_col     = 'ID'
side_col   = 'side'
# ====================================

# ---------- 1. 枚举变量 ----------
general_vars = """
SF1,SF2,SF3,SF4,SF5,SF6,SF7,SF8,SF9,SF10,SF11,SF12,
KPACT30,
KQOL1,KQOL2,KQOL3,KQOL4,
KGLRS,
BP30,BP30OFT,BPBAD,BPUB,BPMB,BPLB,BPB,
OJPNRS,OJPNLS,OJPNRE,OJPNLE,OJPNRW,OJPNLW,OJPNRH,OJPNLH,OJPNRA,OJPNLA,OJPNRF,OJPNLF,OJPNNK,OJPNNO,
KSX,
FALL,
CESD1,CESD2,CESD3,CESD4,CESD5,CESD6,CESD7,CESD8,CESD9,CESD10,
CESD11,CESD12,CESD13,CESD14,CESD15,CESD16,CESD17,CESD18,CESD19,CESD20,
ARTDOC,
KPMED,
TYLEN,NSAIDS,NSAIDRX,COXIBS,NARCOT,SAME,MSM,DOXYCYC,PNMEDT,CHON,GLUC,KNINJ,
PTH,BISPHOS,
RX30,RX30NUM,RXFLUOR,RXCLCTN,RXBISPH,
STINJCV,
CESD,
RXIHYAL,HYINJCV,RXNTRAT,
STRINJL,
RXASPRN,RXCOX2,
HYINJCV,GLCFQCV,RXVLCXB,CHNFQCV,FALLCV,
RXISTRD,RXMSM,RXNARC,RXRFCXB,RXCHOND,RXACTM,RXGLCSM,KPMEDCV,
BISPTYP,ARTDRCV,RXANALG,RXSALIC,RXNSAID,RXTPRTD,RXSAME,RXRALOX,
RXOSTRD,RXVIT_D,RXOTHAN,
BPSYS,BPDIAS,
CSTSGL,CSTREP1,CSTIME1,CSTREP2,CSTIME2,
WLK20T1,STEPST1,TIMET1,WLK20T2,STEPST2,TIMET2,WLKAID,20MPACE,
WEIGHT,BMI,
HLTHCAR,HLTHCOV,MEDINS,
PA230,PA330,PA530,PA430,PA130,
PASE1,PASE1HR,PASE2,PASE2HR,PASE3,PASE4,PASE5,PASE6,
HOUACT1,HOUACT2,HOUACT3,HOUACT4,HOUACT5,HOUACT6,
WORK7,WORKAMT,CUREMP,WEEKWK,HOURWK,MISSWK,
PA530CV,PA330CV,PA130CV,CEMPLOY,PASE,AGE,PA430CV,PA230CV,WKHR7CV
""".replace('\n', '').strip().split(',')

right_vars = """
KPNR12,KPNR12M,
WPRKN1,WPRKN2,WPRKN3,WPRKN4,WPRKN5,
KPRKN1,KPRKN2,KPRKN3,
P7RKFR,
WSRKN1,WSRKN2,
KSXRKN1,KSXRKN2,KSXRKN3,KSXRKN4,KSXRKN5,
DIRKN1,DIRKN2,DIRKN3,DIRKN4,DIRKN5,DIRKN6,DIRKN7,DIRKN8,DIRKN9,DIRKN10,DIRKN11,DIRKN12,DIRKN13,DIRKN14,DIRKN15,DIRKN16,DIRKN17,
WOMSTFR,WOMKPR,KOOSYMR,KOOSKPR,WOMADLR,
KPR30CV,P7RKRCV,KPR12CV,WOMTSR,
RKP30CV,KPACDCV,
HPR12CV,
STRINJR,HYAINJR
""".replace('\n', '').strip().split(',')

left_vars = """
KPNL12,KPNL12M,
WPLKN1,WPLKN2,WPLKN3,WPLKN4,WPLKN5,
KPLKN1,KPLKN2,KPLKN3,
P7LKFR,
WSLKN1,WSLKN2,
KSXLKN1,KSXLKN2,KSXLKN3,KSXLKN4,KSXLKN5,
DILKN1,DILKN2,DILKN3,DILKN4,DILKN5,DILKN6,DILKN7,DILKN8,DILKN9,DILKN10,DILKN11,DILKN12,DILKN13,DILKN14,DILKN15,DILKN16,DILKN17,
WOMTSL,WOMKPL,KOOSYML,KOOSKPL,WOMADLL,
LKP30CV,P7LKRCV,KPL12CV,
PMLKRCV,
HPL12CV,
STRINJL,HYAINJL
""".replace('\n', '').strip().split(',')

# ---------- 2. 读数据 ----------
df = pd.read_excel(input_file)

# ---------- 3. 构建新 DataFrame ----------
# 基础列：ID + side + 通用变量
base = df[[id_col, side_col] + general_vars].copy()


# 为每对左右变量生成 ips/con 列
for rv, lv in zip(right_vars, left_vars):
    r_vals = df.get(rv, np.nan)
    l_vals = df.get(lv, np.nan)
    
    # 同侧：就诊侧的数据
    # 0=右边 → 取右眼值；1=左边 → 取左眼值
    base[f'ips_{rv}'] = np.where(df[side_col] == 0, r_vals, l_vals)
    
    # 对侧：就诊侧对面的数据
    # 0=右边 → 取左眼值；1=左边 → 取右眼值
    base[f'con_{rv}'] = np.where(df[side_col] == 0, l_vals, r_vals)

# ---------- 4. 写回 ----------
with pd.ExcelWriter(input_file, mode='a', engine='openpyxl', if_sheet_exists='replace') as writer:
    base.to_excel(writer, sheet_name='KneeReformatted', index=False)

print('✅ 已写入新 sheet：KneeReformatted（仅 ips/con 变量）')

✅ 已写入新 sheet：KneeReformatted（仅 ips/con 变量）
