In [1]:
def generate_mutation_file(pdb_file, mutation_file):
    mutations = []
    
    # 读取PDB文件，提取氨基酸序列信息
    with open(pdb_file, 'r') as f:
        lines = f.readlines()
        sequence = ''
        for line in lines:
            if line.startswith('ATOM') or line.startswith('HETATM'):
                amino_acid = line[17:20].strip()
                sequence += amino_acid
    
    # 生成突变文件中的每个突变
    with open(mutation_file, 'w') as f:
        for i, amino_acid in enumerate(sequence, start=1):
            if amino_acid == 'ALA':
                mutation = f'A{i}S,B{i}S;'
                mutations.append(mutation)
        
        # 写入突变文件
        f.write('\n'.join(mutations))



In [4]:
def generate_mutation_file(pdb_file, mutation_file):
    mutations = []

    # 创建氨基酸三字母代码到单字母代码的映射
    aa_mapping = {
        'ALA': 'A',
        'CYS': 'C',
        'ASP': 'D',
        'GLU': 'E',
        'PHE': 'F',
        'GLY': 'G',
        'HIS': 'H',
        'ILE': 'I',
        'LYS': 'K',
        'LEU': 'L',
        'MET': 'M',
        'ASN': 'N',
        'PRO': 'P',
        'GLN': 'Q',
        'ARG': 'R',
        'SER': 'S',
        'THR': 'T',
        'VAL': 'V',
        'TRP': 'W',
        'TYR': 'Y'
    }

    # 读取PDB文件，提取氨基酸序列信息
    with open(pdb_file, 'r') as f:
        lines = f.readlines()
        sequence = ''
        for line in lines:
            if line.startswith('ATOM') or line.startswith('HETATM'):
                amino_acid = line[17:20].strip()
                sequence += aa_mapping.get(amino_acid, 'X')  # 如果找不到映射，默认使用X表示

    # 生成突变文件中的每个突变
    with open(mutation_file, 'w') as f:
        for i, amino_acid in enumerate(sequence, start=1):
            if amino_acid == 'A':
                mutation = f'A{i}S,B{i}S;'
                mutations.append(mutation)

        # 写入突变文件
        f.write('\n'.join(mutations))


In [29]:
def generate_mutation_file(pdb_file, mutation_file, target_base, mutated_base):
    mutations = []

    # 创建氨基酸三字母代码到单字母代码的映射
    aa_mapping = {
        'ALA': 'A',
        'CYS': 'C',
        'ASP': 'D',
        'GLU': 'E',
        'PHE': 'F',
        'GLY': 'G',
        'HIS': 'H',
        'ILE': 'I',
        'LYS': 'K',
        'LEU': 'L',
        'MET': 'M',
        'ASN': 'N',
        'PRO': 'P',
        'GLN': 'Q',
        'ARG': 'R',
        'SER': 'S',
        'THR': 'T',
        'VAL': 'V',
        'TRP': 'W',
        'TYR': 'Y'
    }

    # 读取PDB文件，提取氨基酸序列信息和残基编号
    sequence = ''
    residue_numbers = []
    with open(pdb_file, 'r') as f:
        for line in f:
            if line.startswith('ATOM'):
                residue_number = line[22:26].strip()
                residue_numbers.append(residue_number)
                amino_acid = line[17:20].strip()
                sequence += aa_mapping.get(amino_acid, 'X')  # 如果找不到映射，默认使用X表示

    # 生成突变文件中的每个突变
#     residue_numbers = list(set(residue_numbers))
    
    with open(mutation_file, 'w') as f:
        for i, residue_number in enumerate(residue_numbers, start=1):
            amino_acid = sequence[i - 1]
            if amino_acid == target_base:
                mutation = f'{target_base}A{residue_number}{mutated_base};'
                mutations.append(mutation)

        # 写入突变文件
        f.write('\n'.join(mutations))

        
def remove_duplicate_lines(file_path):
    lines_seen = set()  # 用于存储已经出现过的行
    output_lines = []

    with open(file_path, 'r') as file:
        for line in file:
            if line not in lines_seen:  # 如果行不在已出现行的集合中，则将其添加到输出列表中
                lines_seen.add(line)
                output_lines.append(line)

    with open(file_path, 'w') as file:
        file.writelines(output_lines)


In [30]:
# 示例用法

pdb_file = '3dmv.pdb'
mutation_file = 'mutations.txt'

generate_mutation_file(pdb_file, mutation_file, 'A', 'S')
remove_duplicate_lines(mutation_file)

['1', '1', '1', '1', '1', '1', '1', '1', '2', '2', '2', '2', '2', '2', '2', '2', '3', '3', '3', '3', '3', '3', '3', '3', '4', '4', '4', '4', '4', '4', '4', '4', '4', '4', '4', '5', '5', '5', '5', '5', '5', '5', '5', '5', '6', '6', '6', '6', '6', '6', '6', '6', '7', '7', '7', '7', '7', '7', '7', '7', '8', '8', '8', '8', '8', '8', '8', '8', '8', '8', '8', '9', '9', '9', '9', '9', '9', '9', '9', '10', '10', '10', '10', '10', '10', '10', '10', '11', '11', '11', '11', '11', '11', '11', '11', '11', '12', '12', '12', '12', '13', '13', '13', '13', '13', '13', '13', '13', '14', '14', '14', '14', '14', '14', '14', '14', '14', '14', '14', '15', '15', '15', '15', '15', '15', '15', '15', '16', '16', '16', '16', '16', '16', '16', '16', '16', '16', '16', '16', '16', '16', '16', '16', '16', '16', '17', '17', '17', '17', '17', '17', '17', '17', '18', '18', '18', '18', '18', '18', '18', '18', '18', '18', '18', '18', '19', '19', '19', '19', '19', '19', '19', '19', '19', '20', '20', '20', '20', '20', '20'

In [20]:
residue_numbers

NameError: name 'residue_numbers' is not defined