In [None]:
import pandas as pd

def pad_secondary_structures(df,
                             seq_col='Sequence_of_origin_tRNA',
                             struct_col='Secondary structure'):
    """
    对每一行：
    - 计算 seq_col 的长度 L_seq
    - 读取 struct_col（可能不存在或为空），当作空串处理，计算其长度 L_struct
    - 如果 L_seq > L_struct，则在 struct_col 末尾追加 (L_seq - L_struct) 个 '.'
    """
    # 如果没有 Secondary structure 列，先创建一列全是空串
    if struct_col not in df.columns:
        df[struct_col] = ''

    # 定义一个内部函数用于对单个值进行处理
    def pad_row(structure, sequence):
        # 处理 NaN
        if pd.isna(structure):
            structure = ''
        L_seq = len(sequence or '')
        L_struct = len(structure)
        if L_seq > L_struct:
            return structure + '.' * (L_seq - L_struct)
        else:
            return structure

    # 应用到整个表
    df[struct_col] = df.apply(
        lambda row: pad_row(row[struct_col], row[seq_col]), axis=1
    )

    return df

if __name__ == '__main__':
    # 1) 读取 CSV
    input_path = 'tRNAtherapeutics.csv'
    df = pd.read_csv(input_path, dtype=str)  # 全部当作字符串读取，防止序列里有数字被当数值

    # 2) 补齐 Secondary structure 列
    df_padded = pad_secondary_structures(df,
                                         seq_col='Sequence_of_origin_tRNA',
                                         struct_col='Secondary structure')

    # 3) 保存结果
    output_path = 'tRNAtherapeutics_padded.csv'
    df_padded.to_csv(output_path, index=False, encoding='utf-8')
    print(f"Saved padded table to {output_path}")

Loaded 1028 records from tRNAtherapeutics.csv
[1/1028] Sequence length=74 -> No structure found
[2/1028] Sequence length=74 -> (((((((.(((........)))..(((((.......))))).....(((((.......)))))))))))).
[3/1028] Sequence length=74 -> (((((((.(((........)))..(((((.......))))).....(((((.......)))))))))))).
[4/1028] Sequence length=75 -> No structure found
[5/1028] Sequence length=74 -> No structure found
[6/1028] Sequence length=74 -> (((((((.(((........)))..(((((.......))))).....(((((.......)))))))))))).
[7/1028] Sequence length=74 -> No structure found
[8/1028] Sequence length=74 -> No structure found
[9/1028] Sequence length=74 -> No structure found
[10/1028] Sequence length=74 -> No structure found
[11/1028] Sequence length=73 -> No structure found
[12/1028] Sequence length=75 -> (((((((..((((.......)))).(((((.......))))).....(((((.......)))))))))))).
[13/1028] Sequence length=76 -> No structure found
[14/1028] Sequence length=74 -> No structure found
[15/1028] Sequence length=74 -> No s

KeyboardInterrupt: 