In [1]:
import os
import pickle
import numpy as np
import pandas as pd
from Bio import PDB
from Bio.PDB.Polypeptide import three_to_one

### 1. Load data

In [2]:
PDBbind_df = pd.read_csv("./preprocessed_data/step4_PDBbind_data.tsv", sep = "\t")
CASF2016_df = pd.read_csv("./preprocessed_data/step4_CASF2016_data.tsv", sep = "\t")
CASF2013_df = pd.read_csv("./preprocessed_data/step4_CASF2013_data.tsv", sep = "\t")
CSAR2014_df = pd.read_csv("./preprocessed_data/step4_CSAR2014_data.tsv", sep = "\t")
CSAR2012_df = pd.read_csv("./preprocessed_data/step4_CSAR2012_data.tsv", sep = "\t")
CSARset1_df = pd.read_csv("./preprocessed_data/step4_CSARset1_data.tsv", sep = "\t")
CSARset2_df = pd.read_csv("./preprocessed_data/step4_CSARset2_data.tsv", sep = "\t")
Astex_df = pd.read_csv("./preprocessed_data/step4_Astex_data.tsv", sep = "\t")
COACH420_df = pd.read_csv("./preprocessed_data/step4_COACH420_data.tsv", sep = "\t")
HOLO4K_df = pd.read_csv("./preprocessed_data/step4_HOLO4K_data.tsv", sep = "\t")

### 2. Load PLIP results

In [3]:
pdb_parser = PDB.PDBParser(QUIET = True)

In [4]:
def read_file(file):
    return file.readlines()

In [5]:
amino_acids_short = {"ALA":"A", "ARG":"R", "ASN":"N", "ASP":"D", "CYS":"C", "GLU":"E", "GLN":"Q", "GLY":"G", "HIS":"H", "ILE":"I", "LEU":"L", "LYS":"K", "MET":"M", "PHE":"F", "PRO":"P", "SER":"S", "THR":"T", "TRP":"W", "TYR":"Y", "VAL":"V", "SEC":"U", "PYL":"O"}

In [6]:
def read_file(file):
    return file.readlines()

def get_ligand_atoms_from_pdb(pdbid, ligand_code):
    
    atom_idx_list, atom_name_list, model_flag = list(), list(), 0
    lines = read_file(open(f"{pdb_path}/{pdbid}.pdb", "r"))
    
    for line in lines:
        if "ANISOU" not in line:
            if line[:5].strip() == "MODEL":
                if model_flag == 1:
                    break
                model_flag = 1  
                
            if "HETATM" in line:
                serial_number, atom_name, residue_name = line[6:11].strip(), line[12:16].strip(), line[17:20].strip() 
                
                if residue_name == ligand_code:
                    atom_idx_list.append(int(serial_number))
                    atom_name_list.append(atom_name)
    
    return atom_idx_list, atom_name_list

def get_mol_from_ideal_ligandpdb(ligand):

    name_order_list, name_to_idx_dict, name_to_element_dict = list(), dict(), dict()
    
    structure = pdb_parser.get_structure(ligand, f"{ideal_path}/{ligand}_ideal.pdb")

    if ligand == "AA":
        ligand = "AA_"    
    
    for chain_name in list(structure[0].child_dict.keys()):
        chain = structure[0][chain_name]
        
        for residue in chain.get_residues():
            if residue.resname == ligand:
                for atom in residue:
                    name_order_list.append(atom.get_id())
                    name_to_element_dict[atom.get_id()] = atom.element
                    name_to_idx_dict[atom.get_id()] = atom.get_serial_number() - 1
    
    if len(name_to_idx_dict) == 0:
        return None, None, None
    else:
        return name_order_list, name_to_idx_dict, name_to_element_dict

In [7]:
def get_bonds(pdbid, ligand_code, ligand_atom_idx_list):
    bond_list = list()
    
    try:
        lines = read_file(open(f"{plip_results_path}/{pdbid}_output.txt", "r"))
    except:
        print(f"No PLIP results for {pdbid}")
        return None

    isheader = False
    
    for line in lines:
        if line[0] == "*":
            bond_type = line.strip().replace("*", "")
            isheader = True
        
        if line[0] == "|":
            if isheader:
                header = line.replace(' ','').split('|')
                isheader = False 
                continue
            line = line.replace(' ','').split('|')
            if ligand_code not in line[5]:
                continue
                
            aa_id, aa_name, aa_chain, ligand_idx, ligand_name, ligand_chain = int(line[1]), line[2], line[3], int(line[4]), line[5], line[6]
            
            if bond_type in ['Hydrogen Bonds', 'Water Bridges']:
                atom_idx1, atom_idx2 = int(line[12]), int(line[14])
                
                if atom_idx1 in ligand_atom_idx_list and atom_idx2 in ligand_atom_idx_list:   # discard ligand-ligand interaction
                    continue                
                
                if atom_idx1 in ligand_atom_idx_list:
                    atom_idx_ligand, atom_idx_protein = atom_idx1, atom_idx2
                elif atom_idx2 in ligand_atom_idx_list:
                    atom_idx_ligand, atom_idx_protein = atom_idx2, atom_idx1
                else:
                    print(f"[Hydrogen Bonds, Water Bridges] Error: atom index in plip result not in ligand atom idx ({pdbid}, {ligand_code})")
                    print(atom_idx1, atom_idx2)
                    return None
                bond_list.append((bond_type + "_" + str(len(bond_list)), aa_chain, aa_name, aa_id, [atom_idx_protein], ligand_chain, ligand_name, ligand_idx, [atom_idx_ligand]))
                                
            elif bond_type == 'Hydrophobic Interactions':
                atom_idx_ligand, atom_idx_protein = int(line[8]), int(line[9])

                if atom_idx_ligand not in ligand_atom_idx_list:
                    print(f"[Hydrophobic Interactions] Error: atom index in plip result not in ligand atom idx ({pdbid}, {ligand_code})")
                    print('Hydrophobic Interactions', atom_idx_ligand, atom_idx_protein)
                    return None
                bond_list.append((bond_type + "_" + str(len(bond_list)), aa_chain, aa_name, aa_id, [atom_idx_protein], ligand_chain, ligand_name, ligand_idx, [atom_idx_ligand]))
                                
            elif bond_type in ['pi-Stacking', 'pi-Cation Interactions']:
                try:
                    atom_idx_ligand_list = list(map(int, line[12].split(','))) # 수정 했음 원래 11 index 였음
                except:
                    print(f"[pi-Stacking, pi-Cation Interactions] Error: atom index in plip results")
                    return None
                if len(set(atom_idx_ligand_list).intersection(set(ligand_atom_idx_list))) != len(atom_idx_ligand_list):
                    print(f"[pi-Stacking, pi-Cation Interactions] Error: atom index in plip result not in ligand atom idx ({pdbid}, {ligand_code})")
                    print(atom_idx_ligand_list)
                    return None
                bond_list.append((bond_type + "_" + str(len(bond_list)), aa_chain, aa_name, aa_id, list(), ligand_chain, ligand_name, ligand_idx, atom_idx_ligand_list))

            elif bond_type == 'Salt Bridges':
                atom_idx_ligand_list = list(set(map(int, line[10].split(','))))
                if len(set(atom_idx_ligand_list).intersection(set(ligand_atom_idx_list))) != len(atom_idx_ligand_list):
                    print(f"[Salt Bridges] Error: atom index in plip results not in ligand atom idx ({pdbid}, {ligand_code})")
                    print('Salt Bridges', atom_idx_ligand_list, set(atom_idx_ligand_list).intersection(set(atom_idx_ligand_list)))
                    return None
                bond_list.append((bond_type + "_" + str(), aa_chain, aa_name, aa_id, list(), ligand_chain, ligand_name, ligand_idx, atom_idx_ligand_list))
                
            elif bond_type == 'Halogen Bonds':
                
                atom_idx1, atom_idx2 = int(line[11]), int(line[13])
                if atom_idx1 in ligand_atom_idx_list and atom_idx2 in ligand_atom_idx_list: # discard ligand-ligand interaction
                    continue
                if atom_idx1 in ligand_atom_idx_list:
                    atom_idx_ligand, atom_idx_protein = atom_idx1, atom_idx2
                elif atom_idx2 in ligand_atom_idx_list:
                    atom_idx_ligand, atom_idx_protein = atom_idx2, atom_idx1
                else:
                    print(f"[Halogen Bonds] Error: atom index in plip results not in ligand atom idx ({pdbid}, {ligand_code})")
                    print("Halogen bonds", atom_idx1, atom_idx2)
                    return None
                bond_list.append((bond_type+'_'+str(len(bond_list)), aa_chain, aa_name, aa_id, [atom_idx_protein], ligand_chain, ligand_name, ligand_idx, [atom_idx_ligand]))
                
            else:
                print('bond_type',bond_type)
                return None 
                
    if len(bond_list) != 0:
        return bond_list
    else:
        print(f"No bonds are founded in {pdbid}")
        return None

In [8]:
def get_interact_atom_name(atom_idx_list, atom_name_list, bond_list):
    interact_atom_name_list, interact_bond_type_list, interact_atom_name_set = list(), list(), set()
    
    assert len(atom_idx_list) == len(atom_name_list)
    
    for bond in bond_list:
        for atom_idx in bond[-1]:
            atom_name = atom_name_list[atom_idx_list.index(atom_idx)]
            interact_atom_name_set.add(atom_name)
            interact_atom_name_list.append(atom_name)
            interact_bond_type_list.append((atom_name, bond[0]))
    
    return interact_atom_name_list, interact_bond_type_list

def get_interact_atom_list(name_order_list, atom_name_to_idx_dict, atom_name_to_element_dict,interact_atom_name_list):
    atom_idx_list, atom_name_list, atom_element_list, atom_interact_list = list(), list(), list(), list()
    
    for name in name_order_list:
        idx = atom_name_to_idx_dict[name]
        atom_idx_list.append(idx)
        atom_name_list.append(name)
        atom_element_list.append(atom_name_to_element_dict[name])
        atom_interact_list.append(int(name in interact_atom_name_list))
    
    return atom_idx_list, atom_name_list, atom_element_list, atom_interact_list

def get_dict(chain, chain_seqs, chain_indexes):
    seqs_dict, indexes_dict = dict(), dict()
    chain_list, chain_seqs_list, chain_indexes_list = chain.split(","), chain_seqs.split(","), chain_indexes.split(";")
    
    for i, j, k in zip(chain_list, chain_seqs_list, chain_indexes_list):
        seqs_dict[i] = (j, k)
        
        for x, y in zip(k.split(","), j):
            indexes_dict[i + x] = y
    
    return seqs_dict, indexes_dict

def get_interact_residue(idx_to_aa_dict, bond_list):
    interact_residue = list()
    
    for bond in bond_list:
        if bond[1] + str(bond[3]) not in idx_to_aa_dict:
            continue
        
        aa = idx_to_aa_dict[bond[1]+str(bond[3])]
        assert three_to_one(bond[2]) == aa
        interact_residue.append((bond[1]+str(bond[3]), aa, bond[0]))
        
    if len(interact_residue) != 0:
        return interact_residue
    
    else:
        return None        

In [9]:
def get_interactions_info(df):

    convert_dict = {"1d4p_BPP": ['C1','C2','C3','C4','C5','C6','C7','C8','N1','C9','N2','N3','C10','O1','N4','C11','C12','C13','C14','C15','C16','C17','C18','C19','C20','C21','C22'],
                    "1c9u_PQQ": ['N1', 'C1', 'C2', 'O1', 'O2', 'C3', 'C4', 'C5', 'C6', 'O3', 'C7', 'O4', 'C8', 'N2', 'C9', 'C10', 'O5', 'O6', 'C11', 'C12', 'C13', 'O7', 'O8', 'C14', 'N1', 'C1', 'C2', 'O1', 'O2', 'C3', 'C4', 'C5', 'C6', 'O3', 'C7', 'O4', 'C8', 'N2', 'C9', 'C10', 'O5', 'O6', 'C11', 'C12', 'C13', 'O7', 'O8', 'C14'],
                    "1dd6_MCI": ['C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9', 'C10', 'S1', 'C11', 'O1', 'N1', 'C12', 'C13', 'O2', 'O3', 'C14', 'C15', 'C16', 'C17', 'S2', 'C18', 'N2', 'C19', 'N3', 'N4', 'N5', 'C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9', 'C10', 'S1', 'C11', 'O1', 'N1', 'C12', 'C13', 'O2', 'O3', 'C14', 'C15', 'C16', 'C17', 'S2', 'C18', 'N2', 'N2', 'C19', 'C19', 'N3', 'N3', 'N4', 'N4', 'N5', 'N5'],
                    "1e9h_INR": ['C1','C2','C6','N1','C3','C7','C4','C5','O1','C8','C9','C10','C11','O2','N2','C12','C16','C15','C14','S1','C13','O5','O4','O3','C1','C2','C6','N1','C3','C7','C4','C5','O1','C8','C9','C10','C11','O2','N2','C12','C16','C15','C14','S1','C13','O5','O4','O3'],
                    "1fel_FEN": ['C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9', 'C10', 'C11', 'C12', 'C13', 'C14', 'C15', 'C16', 'C17', 'C18', 'C19', 'C20', "C21", "C22", "C23", "C24", "C25", "C26", 'O1', 'N1', "O2"],
                    "1g72_PQQ": ['N1', 'C1', 'C2', 'O1', 'O2', 'C3', 'C4', 'C5', 'C6', 'O3', 'C7', 'O4', 'C8', 'N2', 'C9', 'C10', 'O5', 'O6', 'C11', 'C12', 'C13', 'O7', 'O8', 'C14','N1', 'C1', 'C2', 'O1', 'O2', 'C3', 'C4', 'C5', 'C6', 'O3', 'C7', 'O4', 'C8', 'N2', 'C9', 'C10', 'O5', 'O6', 'C11', 'C12', 'C13', 'O7', 'O8', 'C14'],
                    "1gse_EAA": ['C1', 'C1', 'O1', 'O1', 'C2', 'C2', 'Cl1', 'Cl1', 'C3', 'C3', 'Cl2', 'Cl2', 'C4', 'C4', 'C5', 'C5', 'C6', 'C6', 'C7', 'C7', 'O2', 'O2', 'C8', 'C8', 'C9', 'C9', 'C10', 'C10', 'C11', 'C11', 'C12', 'C12', 'C13', 'C13', 'O3', 'O3', 'O4', 'O4', 'C1', 'C1', 'O1', 'O1', 'C2', 'C2', 'Cl1', 'Cl1', 'C3', 'C3', 'Cl2', 'Cl2', 'C4', 'C4', 'C5', 'C5', 'C6', 'C6', 'C7', 'C7', 'O2', 'O2', 'C8', 'C8', 'C9', 'C9', 'C10', 'C10', 'C11', 'C11', 'C12', 'C12', 'C13', 'C13', 'O3', 'O3', 'O4', 'O4'],
                    "1h4i_PQQ": ['N1', 'C1', 'C2', 'O1', 'O2', 'C3', 'C4', 'C5', 'C6', 'O3', 'C7', 'O4', 'C8', 'N2', 'C9', 'C10', 'O5', 'O6', 'C11', 'C12', 'C13', 'O7', 'O8', 'C14', 'N1', 'C1', 'C2', 'O1', 'O2', 'C3', 'C4', 'C5', 'C6', 'O3', 'C7', 'O4', 'C8', 'N2', 'C9', 'C10', 'O5', 'O6', 'C11', 'C12', 'C13', 'O7', 'O8', 'C14'],
                    "1ipf_TNE": ['C1','C2','C3','O1','C4','C5','C6','C7','N1','C8', 'C1','C2','C3','O1','C4','C5','C6','C7','N1','C8'],
                    "1lrw_PQQ": ['N1', 'C1', 'C2', 'O1', 'O2', 'C3', 'C4', 'C5', 'C6', 'O3', 'C7', 'O4', 'C8', 'N2', 'C9', 'C10', 'O5', 'O6', 'C11', 'C12', 'C13', 'O7', 'O8', 'C14', 'N1', 'C1', 'C2', 'O1', 'O2', 'C3', 'C4', 'C5', 'C6', 'O3', 'C7', 'O4', 'C8', 'N2', 'C9', 'C10', 'O5', 'O6', 'C11', 'C12', 'C13', 'O7', 'O8', 'C14'],
                    "1nx8_N7P": ['C1','C2','O2','O3','N1','C6','O1','C7','C5','C4','C3', 'C1','C2','O2','O3','N1','C6','O1','C7','C5','C4','C3'],
                    "1o9v_SNG": ['C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9', 'N1', 'O3', 'O2', 'O3', 'O4', 'O5', 'Se1'],
                    "1pax_DHQ": ['C1', 'O1', 'N1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9', 'C10'],
                    "2gss_EAA": ['C1', 'O1', 'C2', 'Cl1', 'C3', 'Cl2', 'C4', 'C5', 'C6', 'C7', 'O2', 'C8', 'C9', 'C10', 'C11', 'C12', 'C13', 'O3', 'O4', 'C1', 'O1', 'C2', 'Cl1', 'C3', 'Cl2', 'C4', 'C5', 'C6', 'C7', 'O2', 'C8', 'C9', 'C10', 'C11', 'C12', 'C13', 'O3', 'O4'],
                    "2trt_TAC": ['C1', 'O1', 'C2', 'C3', 'O2', 'N1', 'C4', 'O3', 'C5', 'N2', 'C6', 'C7', 'C8', 'C9', 'C10', 'C11', 'C12', 'O4', 'C13', 'C14', 'C15', 'C16', 'C17', 'O5', 'C18', 'C19', 'O6', 'C20', 'C21', 'O7', 'C22', 'O8'],
                    "3gss_EAA": ['C1', 'O1', 'C2', 'Cl1', 'C3', 'Cl2', 'C4', 'C5', 'C6', 'C7', 'O2', 'C8', 'C9', 'C10', 'C11', 'C12', 'C13', 'O3', 'O4', 'C1', 'O1', 'C2', 'Cl1', 'C3', 'Cl2', 'C4', 'C5', 'C6', 'C7', 'O2', 'C8', 'C9', 'C10', 'C11', 'C12', 'C13', 'O3', 'O4'],
                    "4aah_PQQ": ['N1', 'C1', 'C2', 'O1', 'O2', 'C3', 'C4', 'C5', 'C6', 'O3', 'C7', 'O4', 'C8', 'N2', 'C9', 'C10', 'O5', 'O6', 'C11', 'C12', 'C13', 'O7', 'O8', 'C14', 'N1', 'C1', 'C2', 'O1', 'O2', 'C3', 'C4', 'C5', 'C6', 'O3', 'C7', 'O4', 'C8', 'N2', 'C9', 'C10', 'O5', 'O6', 'C11', 'C12', 'C13', 'O7', 'O8', 'C14'],
                    "6o4x_AA": ['C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9', 'N1', 'C10', 'C11', 'C12', 'C13', 'N2', 'C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9', 'N1', 'C10', 'C11', 'C12', 'C13', 'N2'],
                    "6hcu_FYB": ['C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9', 'C10', 'C11', 'C12', 'F1', 'C13', 'C14', 'C15', 'O1', 'O2', 'O3', 'N1', 'C16', 'C17', 'O4', 'F2', 'F3', 'C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9', 'C10', 'C11', 'C12', 'F1', 'C13', 'C14', 'C15', 'O1', 'O2', 'O3', 'N1', 'C16', 'C17', 'O4', 'F2', 'F3'],
                    "3pd9_HA5": ['N1', 'O1', 'C1', 'C2', 'N2', 'C3', 'C4', 'O2', 'O3', 'C5', 'C6', 'C7', 'O4','N1', 'O1', 'C1', 'C2', 'N2', 'C3', 'C4', 'O2', 'O3', 'C5', 'C6', 'C7', 'O4'],
                    "5j2x_6DL": ['C1', 'C1', 'C2', 'C2', 'C3', 'C3', 'C4', 'C4', 'C5', 'C5', 'C6', 'C6', 'C7', 'C7', 'C8', 'C8', 'C9', 'C9', 'C10', 'C10', 'C11', 'C11', 'C12', 'C12', 'O1', 'O1', 'O2', 'O2', 'N1', 'N1', 'N2', 'N2', 'N3', 'N3', 'C13', 'C13', 'C14', 'C14', 'O3', 'O3', 'F1', 'F1', 'Br1', 'Br1'],
                    "2srt_8MI": ['C1', 'C2', 'C3', 'O1', 'O2', 'N1', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9', 'C10', 'C11', 'C12', 'C13', 'O3', 'N2', 'C14', 'C15', 'C16', 'C17', 'N3', 'C18', 'N4', 'N5', 'C19', 'O4', 'N6', 'C20', 'C21', 'C22', 'C23', 'C24', 'C25','H1', 'H2', 'H3', 'H4', 'H5', 'H6', 'H7', 'H8', 'H9', 'H10', 'H11', 'H12', 'H13', 'H14', 'H15', 'H16', 'H17', 'H18', 'H19', 'H20', 'H21', 'H22', 'H23', 'H24', 'H25', 'H26', 'H27', 'H28', 'H29', 'H30', 'H31', 'H32', 'H33', 'H34', 'H35'],
                    "3dgq_EAA": ['C1', 'O1', 'C2', 'Cl1', 'C3', 'Cl2', 'C4', 'C5', 'C6', 'C7', 'O2', 'C8', 'C9', 'C10', 'C11', 'C12', 'C13', 'O3', 'O4'],
                    "3f7g_389": ['N1', 'C1', 'C2', 'C3', 'O1', 'N2', 'C4', 'C5', 'C6', 'C7', 'C8', 'O2', 'N3', 'C9', 'C10', 'C11', 'C12', 'C13', 'O3', 'C14', 'N4', 'C15', 'C16', 'C17', 'C18', 'C19', 'C20', 'C21', 'C22', 'C23', 'C24', 'C25', 'C26', 'C27'],
                    "3f7i_G13": ['O1', 'C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9', 'C10', 'C11', 'N1', 'C12', 'C13', 'C14', 'C15', 'C16', 'C17', 'C18', 'N2', 'C19', 'O2', 'C20', 'C21', 'C22', 'C23', 'C24', 'N3', 'C25', 'O3', 'C26', 'C27', 'N4', 'C28', 'O1', 'C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9', 'C10', 'C11', 'N1', 'C12', 'C13', 'C14', 'C15', 'C16', 'C17', 'C18', 'N2', 'C19', 'O2', 'C20', 'C21', 'C22', 'C23', 'C24', 'N3', 'C25', 'O3', 'C26', 'C27', 'N4', 'C28'],
                    "5i9i_5HV": ['C1', 'C2', 'N1', 'C3', 'C4', 'C5', 'C6', 'N2', 'C7', 'C8', 'C9', 'C10', 'C11', 'C12', 'C13', 'C14', 'C15', 'C16', 'C17', 'C18', 'C19', 'C20', 'F1', 'F2', 'F3', 'C21', 'O1', 'C22', 'N3', 'C23', 'C24', 'C25', 'C26', 'C27', 'C28', 'O2', 'N4', 'C29', 'S1', 'C30', 'C31', 'C32', 'C33', 'C34', 'F4', 'C35', 'C36', 'C1', 'C2', 'N1', 'C3', 'C4', 'C5', 'C6', 'N2', 'C7', 'C8', 'C9', 'C10', 'C11', 'C12', 'C13', 'C14', 'C15', 'C16', 'C17', 'C18', 'C19', 'C20', 'F1', 'F2', 'F3', 'C21', 'O1', 'C22', 'N3', 'C23', 'C24', 'C25', 'C26', 'C27', 'C28', 'O2', 'N4', 'C29', 'S1', 'C30', 'C31', 'C32', 'C33', 'C34', 'F4', 'C35', 'C36'],
                    "2euf_LQQ": ['O1', 'C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'N1', 'C8', 'N2', 'C9', 'N3', 'C10', 'C11', 'C12', 'C13', 'C14', 'C15', 'O2', 'N4', 'C16', 'N5', 'C17', 'C18', 'C19', 'C20', 'N6', 'C21', 'C22', 'N7', 'C23', 'C24'],
                    "6afg_72V": ['O1', 'C1', 'C2', 'O2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'N1', 'C9', 'O1', 'C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'N1', 'C9'],
                    "6hwu_GE5": ['C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9', 'C10', 'C11', 'C12', 'C13', 'C14', 'C15', 'C16', 'C17', 'C18', 'C19', 'C20', 'C21', 'C22', 'C23', 'C24', 'C25', 'C26', 'C27', 'C28', 'C29', 'C30', 'C31', 'F1', 'N1', 'N2', 'N3', 'N4', 'N5', 'O1', 'O2', 'O3', 'S1'],
                    "3abt_2PF": ["C1", "C2", "O2", "C3", "O2", "C4", "O3", "C5", "O4", 'C6', 'F1', 'P1', 'C7', 'F2', 'O5', 'C8', 'F3', 'O6', 'C9', 'F4', 'C10', 'F5', 'C11', 'N1', 'C12', 'N2', 'C13', 'N3', 'C14', 'N4', 'C15', 'N5', 'C16', 'O7', 'P2', 'O8', 'O9', 'O10', 'C17', 'C18', 'O11', 'C19', 'O12', 'C20', 'O13', 'C21', 'N6', 'C22', 'N7', 'C23', 'O14', 'N8', 'C24', 'O15', 'C25', 'N9', 'C26', 'C27', 'C28', 'C29', 'C30', 'C31', 'C32', 'C33', 'C34', 'O16', 'C35', 'C36'],
                    "2i72_VA1": ['O1', 'C1', 'N1', 'C2', 'O2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9', 'C10', 'S1', 'C11', 'B1', 'O3', 'O4', 'O1', 'C1', 'N1', 'C2', 'O2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9', 'C10', 'S1', 'C11', 'B1', 'O3', 'O4'],
                    "3f7u_AG4": ['O1', 'C1', 'N1', 'C2', 'C3', 'C4', 'O2', 'C5', 'C6', 'C7', 'C8', 'N2', 'C9', 'C10', 'S1', 'O3', 'O4', 'N3', 'S2', 'C11', 'C12', 'C13', 'C14', 'C15', 'C16', 'O1', 'C1', 'N1', 'C2', 'C3', 'C4', 'O2', 'C5', 'C6', 'C7', 'C8', 'N2', 'C9', 'C10', 'S1', 'O3', 'O4', 'N3', 'S2', 'C11', 'C12', 'C13', 'C14', 'C15', 'C16', 'O1', 'C1', 'N1', 'C2', 'C3', 'C4', 'O2', 'C5', 'C6', 'C7', 'C8', 'N2', 'C9', 'C10', 'S1', 'O3', 'O4', 'N3', 'S2', 'C11', 'C12', 'C13', 'C14', 'C15', 'C16', 'O1', 'C1', 'N1', 'C2', 'C3', 'C4', 'O2', 'C5', 'C6', 'C7', 'C8', 'N2', 'C9', 'C10', 'S1', 'O3', 'O4', 'N3', 'S2', 'C11', 'C12', 'C13', 'C14', 'C15', 'C16'],
                    "6cw8_FGY": ['C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9', 'C10', 'C11', 'C12', 'C13', 'C14', 'C15', 'C16', 'C17', 'C18', 'C19', 'C20', 'C21', 'C22', 'C23', 'C24', 'C25', 'C26', 'C27', 'N1', 'N2', 'N3', 'N4', 'N5', 'O1', 'O2', 'O3', 'O4', 'O5', 'O6', 'C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9', 'C10', 'C11', 'C12', 'C13', 'C14', 'C15', 'C16', 'C17', 'C18', 'C19', 'C20', 'C21', 'C22', 'C23', 'C24', 'C25', 'C26', 'C27', 'N1', 'N2', 'N3', 'N4', 'N5', 'O1', 'O2', 'O3', 'O4', 'O5', 'O6'],
                    "5f37_N5J": ['O1', 'C1', 'N1', 'C2', 'N2', 'C3', 'N3', 'C4', 'C5', 'C6', 'C7', 'O1', 'C1', 'N1', 'C2', 'N2', 'C3', 'N3', 'C4', 'C5', 'C6', 'C7', 'O1', 'C1', 'N1', 'C2', 'N2', 'C3', 'N3', 'C4', 'C5', 'C6', 'C7','O1', 'C1', 'N1', 'C2', 'N2', 'C3', 'N3', 'C4', 'C5', 'C6', 'C7'],
                    "5g10_6DK": ['N1', 'C1', 'O1', 'C2', 'O2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9', 'C10', 'C11', 'C12', 'C13', 'C14', 'F1', 'C15', 'F2', 'F3', 'O3', 'N1', 'C1', 'O1', 'C2', 'O2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9', 'C10', 'C11', 'C12', 'C13', 'C14', 'F1', 'C15', 'F2', 'F3', 'O3'],
                    "5jyo_63J": ['C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9', 'C10', 'C11', 'C12', 'N1', 'N2', 'C13', 'N3', 'C14', 'C15', 'C16', 'C17', 'N4', 'O1', 'S1', 'C18', 'C19', 'N5', 'C20', 'C21', 'C22', 'C23', 'C24', 'C25', 'O2', 'C26', 'F1', 'F2', 'F3', 'O3', 'N6', 'N7', 'C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9', 'C10', 'C11', 'C12', 'N1', 'N2', 'C13', 'N3', 'C14', 'C15', 'C16', 'C17', 'N4', 'O1', 'S1', 'C18', 'C19', 'N5', 'C20', 'C21', 'C22', 'C23', 'C24', 'C25', 'O2', 'C26', 'F1', 'F2', 'F3', 'O3', 'N6', 'N7', 'C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9', 'C10', 'C11', 'C12', 'N1', 'N2', 'C13', 'N3', 'C14', 'C15', 'C16', 'C17', 'N4', 'O1', 'S1', 'C18', 'C19', 'N5', 'C20', 'C21', 'C22', 'C23', 'C24', 'C25', 'O2', 'C26', 'F1', 'F2', 'F3', 'O3', 'N6', 'N7', 'C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9', 'C10', 'C11', 'C12', 'N1', 'N2', 'C13', 'N3', 'C14', 'C15', 'C16', 'C17', 'N4', 'O1', 'S1', 'C18', 'C19', 'N5', 'C20', 'C21', 'C22', 'C23', 'C24', 'C25', 'O2', 'C26', 'F1', 'F2', 'F3', 'O3', 'N6', 'N7'],
                    "1v0o_INR": ['C1', 'C2', 'C3', 'N1', 'C4', 'C5', 'C6', 'C7', 'O1', 'C8', 'C9', 'C10', 'C11', 'O2', 'N2', 'C12', 'C13', 'C14', 'C15', 'S1', 'C16', 'O3', 'O4', 'O5', 'C1', 'C2', 'C3', 'N1', 'C4', 'C5', 'C6', 'C7', 'O1', 'C8', 'C9', 'C10', 'C11', 'O2', 'N2', 'C12', 'C13', 'C14', 'C15', 'S1', 'C16', 'O3', 'O4', 'O5'],
                    "1ork_ATC": ['C1', 'O1', 'C2', 'C3', 'O2', 'N1', 'C4', 'O3', 'C5', 'N2', 'C6', 'C7', 'C8', 'C9', 'C10', 'C11', 'C12', 'C13', 'C14', 'C15', 'C16', 'O4', 'C17', 'C18', 'O5', 'C19', 'C20', 'O6', 'C21', 'O7', 'N3', 'C22', 'O8', 'C23', 'N4', 'C24', 'C25'],
                    "6guf_23D": ['C1', 'C2', 'Cl1', 'C3', 'C4', 'C5', 'C6', 'N1', 'C7', 'N2', 'C8', 'N3', 'C9', 'N4', 'C10', 'C11', 'C12', 'N5', 'C13', 'N6', 'C14', 'C15', 'C16', 'C17', 'C18', 'C19', 'N7', 'C1', 'C2', 'Cl1', 'C3', 'C4', 'C5', 'C6', 'N1', 'C7', 'N2', 'C8', 'N3', 'C9', 'N4', 'C10', 'C11', 'C12', 'N5', 'C13', 'N6', 'C14', 'C15', 'C16', 'C17', 'C18', 'C19', 'N7'],
                    "6hwv_GEW": ['C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9', 'C10', 'C11', 'C12', 'C13', 'C14', 'C15', 'C16', 'C17', 'C18', 'C19', 'C20', 'C21', 'C22', 'C23', 'C24', 'C25', 'C26', 'C27', 'C28', 'C29', 'C30', 'C31', 'C32', 'F1', 'F2', 'N1', 'N2', 'N3', 'N4', 'N5', 'N6', 'O1', 'O2', 'O3'],
                    "3hjo_EAA": ['C1', 'O1', 'C2', 'Cl1', 'C3', 'Cl2', 'C4', 'C5', 'C6', 'C7', 'O2', 'C8', 'C9', 'C10', 'C11', 'C12', 'C13', 'O3', 'O4', 'C1', 'O1', 'C2', 'Cl1', 'C3', 'Cl2', 'C4', 'C5', 'C6', 'C7', 'O2', 'C8', 'C9', 'C10', 'C11', 'C12', 'C13', 'O3', 'O4'],
                    "5tz3_7OM": ['C1', 'C2', 'C3', 'N1', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9', 'C10', 'C11', 'C12', 'C13', 'C14', 'C15', 'C16', 'C17', 'C18', 'C19', 'C20', 'C21', 'C22', 'N2', 'N3', 'N4', 'N5', 'N6', 'O1', 'C1', 'C2', 'C3', 'N1', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9', 'C10', 'C11', 'C12', 'C13', 'C14', 'C15', 'C16', 'C17', 'C18', 'C19', 'C20', 'C21', 'C22', 'N2', 'N3', 'N4', 'N5', 'N6', 'O1', 'C1', 'C2', 'C3', 'N1', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9', 'C10', 'C11', 'C12', 'C13', 'C14', 'C15', 'C16', 'C17', 'C18', 'C19', 'C20', 'C21', 'C22', 'N2', 'N3', 'N4', 'N5', 'N6', 'O1', 'C1', 'C2', 'C3', 'N1', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9', 'C10', 'C11', 'C12', 'C13', 'C14', 'C15', 'C16', 'C17', 'C18', 'C19', 'C20', 'C21', 'C22', 'N2', 'N3', 'N4', 'N5', 'N6', 'O1'],
                    "5l2i_LQQ": ['O1', 'C2', 'C1', 'C3', 'C4', 'C5', 'C6', 'C7', 'N1', 'C8', 'N2', 'C9', 'N3', 'C10', 'C11', 'C12', 'C13', 'C14', 'C15', 'O2', 'N4', 'C16', 'N5', 'C17', 'C18', 'C19', 'C20', 'N6', 'C21', 'C22', 'N7', 'C23', 'C24', 'H1', 'H2', 'H3', 'H4', 'H5', 'H6', 'H7', 'H8', 'H9', 'H10', 'H11', 'H12', 'H13', 'H14', 'H15', 'H16', 'H17', 'H18', 'H19', 'H20', 'H21', 'H22', 'H23', 'H24', 'H25', 'H26', 'H27', 'H28', 'H29'],
                    "5n3v_8L5": ['C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'O1', 'P1', 'N1', 'C8', 'C9', 'C10', 'C11', 'C12', 'C13', 'O2', 'O3', 'C14', 'N2', 'C15', 'O4', 'O5', 'N3', 'C16', 'N4', 'N4','H1', 'H2', 'H3', 'H4', 'H5', 'H6', 'H7', 'H8', 'H9', 'H10', 'H11', 'H12', 'H13', 'H14', 'H15', 'H16', 'H17', 'H18', 'H19', 'H20', 'H21', 'H22', 'H23', 'H24', 'H25', 'H26', 'H27', 'H28'],
                    "5g11_7H1": ['O1', 'C1', 'N1', 'O2', 'C2', 'F1', 'F2', 'C3', 'F3', 'F4', 'C4', 'F5', 'F6', 'C5', 'F7', 'F8', 'C6', 'F9', 'F10', 'C7', 'F11', 'F12', 'C8', 'O3', 'N2', 'C9', 'C10', 'C11', 'C12', 'C13', 'C14', 'O1', 'C1', 'N1', 'O2', 'C2', 'F1', 'F2', 'C3', 'F3', 'F4', 'C4', 'F5', 'F6', 'C5', 'F7', 'F8', 'C6', 'F9', 'F10', 'C7', 'F11', 'F12', 'C8', 'O3', 'N2', 'C9', 'C10', 'C11', 'C12', 'C13', 'C14'],
                    "5j8m_6DL": ['C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9', 'C10', 'C11', 'C12', 'O1', 'O2', 'N1', 'N2', 'N3', 'C13', 'C14', 'O3', 'F1', 'BR1', 'C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9', 'C10', 'C11', 'C12', 'O1', 'O2', 'N1', 'N2', 'N3', 'C13', 'C14', 'O3', 'F1', 'BR1'],
                    "5j8u_6DL": ['C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9', 'C10', 'C11', 'C12', 'O1', 'O2', 'N1', 'N2', 'N3', 'C13', 'C14', 'O3', 'F1', 'C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9', 'C10', 'C11', 'C12', 'O1', 'O2', 'N1', 'N2', 'N3', 'C13', 'C14', 'O3', 'F1'],
                    "2rcx_SM4": ['O1', 'C1', 'O2', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9', 'C10', 'B1', 'O3', 'O4', 'N1', 'C11', 'O5', 'C12', 'C13', 'C14', 'C15', 'C16', 'S1', 'O1', 'C1', 'O2', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9', 'C10', 'B1', 'O3', 'O4', 'N1', 'C11', 'O5', 'C12', 'C13', 'C14', 'C15', 'C16', 'S1'],
                    "5c4o_4F1": ['N1', 'C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9', 'C10', 'C11', 'O1', 'C12', 'C13', 'F1', 'F2', 'F3', 'C14', 'C15', 'Cl1', 'N2', 'C16', 'C17', 'C18', 'C19', 'C20', 'C21', 'C22', 'O2', 'O3'],
                    "3pjc_PJC": ['C1', 'C2', 'C3', 'C4', 'N1', 'C5', 'C6', 'C7', 'F1', 'C8', 'F2', 'F3', 'N2', 'N3', 'C9', 'C10', 'C11', 'O1', 'C12', 'C13', 'N4', 'C14', 'O2', 'O3', 'C15', 'C16', 'N5', 'C17', 'C18', 'C19', 'C20', 'C21', 'C22'],
                    "6hmr_GE5": ['C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9', 'C10', 'C11', 'C12', 'C13', 'C14', 'C15', 'C16', 'C17', 'C18', 'C19', 'C20', 'C21', 'C22', 'C23', 'C24', 'C25', 'C26', 'C27', 'C28', 'C29', 'C30', 'C31', 'F1', 'N1', 'N2', 'N3', 'N4', 'N5', 'O1', 'O2', 'O3', 'S1', 'H1', 'H2', 'H3', 'H4', 'H4', 'H6', 'H7', 'H8', 'H9', 'H10', 'H11', 'H12', 'H13', 'H14', 'H15', 'H16', 'H17', 'H18', 'H19', 'H20', 'H21', 'H22', 'H23', 'H24', 'H25', 'H26', 'C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9', 'C10', 'C11', 'C12', 'C13', 'C14', 'C15', 'C16', 'C17', 'C18', 'C19', 'C20', 'C21', 'C22', 'C23', 'C24', 'C25', 'C26', 'C27', 'C28', 'C29', 'C30', 'C31', 'F1', 'N1', 'N2', 'N3', 'N4', 'N5', 'O1', 'O2', 'O3', 'S1', 'H1', 'H2', 'H3', 'H4', 'H5', 'H6', 'H7', 'H8', 'H9', 'H10', 'H11', 'H12', 'H13', 'H14', 'H15', 'H16', 'H17', 'H18', 'H19', 'H20', 'H21', 'H22', 'H23', 'H24', 'H25', 'H26'],
                    "2vgp_AD6": ['N1', 'C1', 'C2', 'S1', 'C3', 'N2', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9', 'C10', 'O1', 'N3', 'C11', 'Br1', 'N1', 'C1', 'C2', 'S1', 'C3', 'N2', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9', 'C10', 'O1', 'N3', 'C11', 'Br1'],
                    "5g1a_7H1": ['O1', 'C1', 'N1', 'O2', 'C2', 'F1', 'F2', 'C3', 'F3', 'F4', 'C4', 'F5', 'F6', 'C5', 'F7', 'F8', 'C6', 'F9', 'F10', 'C7', 'F11', 'F12', 'C8', 'O3', 'N2', 'C9', 'C10', 'C11', 'C12', 'C13', 'C14', 'O1', 'C1', 'N1', 'O2', 'C2', 'F1', 'F2', 'C3', 'F3', 'F4', 'C4', 'F5', 'F6', 'C5', 'F7', 'F8', 'C6', 'F9', 'F10', 'C7', 'F11', 'F12', 'C8', 'O3', 'N2', 'C9', 'C10', 'C11', 'C12', 'C13', 'C14'],
                    "6hmp_GEW": ['C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9', 'C10', 'C11', 'C12', 'C13', 'C14', 'C15', 'C16', 'C17', 'C18', 'C19', 'C20', 'C21', 'C22', 'C23', 'C24', 'C25', 'C26', 'C27', 'C28', 'C29', 'C30', 'C31', 'C32', 'F1', 'F2', 'N1', 'N2', 'N3', 'N4', 'N5', 'N6', 'O1', 'O2', 'O3', 'H1', 'H2', 'H3', 'H4', 'H5', 'H6', 'H7', 'H8', 'H9', 'H10', 'H11', 'H12', 'H13', 'H14', 'H15', 'H16', 'H17', 'H18', 'H19', 'H20', 'H21', 'H22', 'H23', 'H24', 'H25', 'H26', 'H27', 'H28', 'C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9', 'C10', 'C11', 'C12', 'C13', 'C14', 'C15', 'C16', 'C17', 'C18', 'C19', 'C20', 'C21', 'C22', 'C23', 'C24', 'C25', 'C26', 'C27', 'C28', 'C29', 'C30', 'C31', 'C32', 'F1', 'F2', 'N1', 'N2', 'N3', 'N4', 'N5', 'N6', 'O1', 'O2', 'O3', 'H1', 'H2', 'H3', 'H4', 'H5', 'H6', 'H7', 'H8', 'H9', 'H10', 'H11', 'H12', 'H13', 'H14', 'H15', 'H16', 'H17', 'H18', 'H19', 'H20', 'H21', 'H22', 'H23', 'H24', 'H25', 'H26', 'H27', 'H28'],
                    "3hdz_PD6": ['N1', 'C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9', 'C10', 'C11', 'C12', 'C13', 'N2', 'N3', 'C14', 'O1', 'C15', 'C16', 'C17', 'C18'],
                    "1i41_HEN": ['N1', 'C1', 'C2', 'C3', 'O1', 'C4', 'C5', 'C6', 'C7', 'C8', 'O2', 'P1', 'O3', 'O4', 'O5', 'N2', 'C9', 'C10', 'O6', 'O7', 'C11', 'C12', 'C13', 'P2', 'O8', 'O9', 'O10', 'N1', 'C1', 'C2', 'C3', 'O1', 'C4', 'C5', 'C6', 'C7', 'C8', 'O2', 'P1', 'O3', 'O4', 'O5', 'N2', 'C9', 'C10', 'O6', 'O7', 'C11', 'C12', 'C13', 'P2', 'O8', 'O9', 'O10', 'N1', 'C1', 'C2', 'C3', 'O1', 'C4', 'C5', 'C6', 'C7', 'C8', 'O2', 'P1', 'O3', 'O4', 'O5', 'N2', 'C9', 'C10', 'O6', 'O7', 'C11', 'C12', 'C13', 'P2', 'O8', 'O9', 'O10', 'N1', 'C1', 'C2', 'C3', 'O1', 'C4', 'C5', 'C6', 'C7', 'C8', 'O2', 'P1', 'O3', 'O4', 'O5', 'N2', 'C9', 'C10', 'O6', 'O7', 'C11', 'C12', 'C13', 'P2', 'O8', 'O9', 'O10', 'N1', 'C1', 'C2', 'C3', 'O1', 'C4', 'C5', 'C6', 'C7', 'C8', 'O2', 'P1', 'O3', 'O4', 'O5', 'N2', 'C9', 'C10', 'O6', 'O7', 'C11', 'C12', 'C13', 'P2', 'O8', 'O9', 'O10', 'N1', 'C1', 'C2', 'C3', 'O1', 'C4', 'C5', 'C6', 'C7', 'C8', 'O2', 'P1', 'O3', 'O4', 'O5', 'N2', 'C9', 'C10', 'O6', 'O7', 'C11', 'C12', 'C13', 'P2', 'O8', 'O9', 'O10', 'N1', 'C1', 'C2', 'C3', 'O1', 'C4', 'C5', 'C6', 'C7', 'C8', 'O2', 'P1', 'O3', 'O4', 'O5', 'N2', 'C9', 'C10', 'O6', 'O7', 'C11', 'C12', 'C13', 'P2', 'O8', 'O9', 'O10', 'N1', 'C1', 'C2', 'C3', 'O1', 'C4', 'C5', 'C6', 'C7', 'C8', 'O2', 'P1', 'O3', 'O4', 'O5', 'N2', 'C9', 'C10', 'O6', 'O7', 'C11', 'C12', 'C13', 'P2', 'O8', 'O9', 'O10', 'N1', 'C1', 'C2', 'C3', 'O1', 'C4', 'C5', 'C6', 'C7', 'C8', 'O2', 'P1', 'O3', 'O4', 'O5', 'N2', 'C9', 'C10', 'O6', 'O7', 'C11', 'C12', 'C13', 'P2', 'O8', 'O9', 'O10', 'N1', 'C1', 'C2', 'C3', 'O1', 'C4', 'C5', 'C6', 'C7', 'C8', 'O2', 'P1', 'O3', 'O4', 'O5', 'N2', 'C9', 'C10', 'O6', 'O7', 'C11', 'C12', 'C13', 'P2', 'O8', 'O9', 'O10', 'N1', 'C1', 'C2', 'C3', 'O1', 'C4', 'C5', 'C6', 'C7', 'C8', 'O2', 'P1', 'O3', 'O4', 'O5', 'N2', 'C9', 'C10', 'O6', 'O7', 'C11', 'C12', 'C13', 'P2', 'O8', 'O9', 'O10', 'N1', 'C!', 'C2', 'C3', 'O1', 'C4', 'C5', 'C6', 'C7', 'C8', 'O2', 'P1', 'O3', 'O4', 'O5', 'N2', 'C9', 'C10', 'O6', 'O7', 'C11', 'C12', 'C13', 'P2', 'O8', 'O9', 'O10'],
                    "6hcw_FYB": ['C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9', 'C10', 'C11', 'C12', 'F1', 'C13', 'C14', 'C15', 'O1', 'O2', 'O3', 'N1', 'C16', 'C17', 'O4', 'F2', 'F3', 'C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9', 'C10', 'C11', 'C12', 'F1', 'C13', 'C14', 'C15', 'O1', 'O2', 'O3', 'N1', 'C16', 'C17', 'O4', 'F2', 'F3'],
                    "2wq5_MIY": ['O1', 'C1', 'N1', 'C2', 'C3', 'O2', 'C4', 'O3', 'C5', 'N2', 'C6', 'C7', 'C8', 'C9', 'O4', 'C10', 'O5', 'C11', 'C12', 'C13', 'C14', 'O6', 'C15', 'C16', 'C17', 'C18', 'O7', 'C19', 'C20', 'C21', 'N3', 'C22', 'C23'],
                    "4r3b_3GE": ['O1', 'C1', 'C2', 'C3', 'O2', 'O2', 'C4', 'N1', 'C5', 'C6', 'O3', 'O4', 'C7', 'C8', 'C9', 'S1', 'O5', 'O6']}    
                    
    interaction_dict = dict()
    ligand_file_error, bond_error, ideal_ligand_file_error, sequence_error = 0, 0, 0, 0

    pdbid_list, ligand_code_list = df.PDB.values, df.Lig_code.values
    
    for idx, (pdbid, ligand_code) in enumerate(zip(pdbid_list, ligand_code_list)):
        try:
            ''' 
            1) Read ligand info 
            ''' 
            ligand_atom_idx_list, ligand_atom_name_list = get_ligand_atoms_from_pdb(pdbid, ligand_code)

            if f"{pdbid}_{ligand_code}" in convert_dict:
                ligand_atom_name_list = convert_dict[f"{pdbid}_{ligand_code}"]

            if ligand_atom_idx_list == None:
                ligand_file_error += 1
                print(f"No such ligand {ligand_code} in PDB: {pdbid}")
                continue

            '''
            2) Read bond info
            '''
            bond_list = get_bonds(pdbid, ligand_code, ligand_atom_idx_list)
            if bond_list == None:
                bond_error += 1
                print(f"Empty bond list: PDB - {pdbid}, Ligand - {ligand_code}")
                continue

            '''
            3) mapping ligand atom info and interaction atom
            '''
            interact_atom_name_list, interact_bond_type_list = get_interact_atom_name(ligand_atom_idx_list, ligand_atom_name_list, bond_list)

            '''
            4) Read ideal ligand info
            '''
            name_order_list, atom_name_to_idx_dict, atom_name_to_element_dict = get_mol_from_ideal_ligandpdb(ligand_code)

            if atom_name_to_idx_dict == None:
                ideal_ligand_file_error += 1
                print(f"Ideal ligand is None: {pdbid}_{ligand_code}")
                continue

            '''
            5) Add interaction info to ideal ligand info
            '''
            ligand_atom_idx_list, ligand_atom_name_list, ligand_atom_element_list, ligand_atom_interact_list = get_interact_atom_list(name_order_list, atom_name_to_idx_dict,
                           atom_name_to_element_dict,interact_atom_name_list)

            '''
            6) Read protein info
            '''
            seq_dict, idx_to_aa_dict = get_dict(df.Chain.values[idx], df.PDB_seqs.values[idx], df.Residue_index.values[idx])

            '''
            7) Mapping interaction info to protein info
            '''
            interact_residue_list = get_interact_residue(idx_to_aa_dict, bond_list)
            if interact_residue_list is None:
                sequence_error += 1
                print(f"Sequence Error: PDB - {pdbid}")
                continue  

            '''
            8) Save info
            '''
            interaction_dict[pdbid+'_'+ligand_code] = {}
            interaction_dict[pdbid+'_'+ligand_code]['bond'] = bond_list
            interaction_dict[pdbid+'_'+ligand_code]['atom_idx'] = ligand_atom_idx_list
            interaction_dict[pdbid+'_'+ligand_code]['atom_name'] = ligand_atom_name_list
            interaction_dict[pdbid+'_'+ligand_code]['atom_element'] = ligand_atom_element_list
            interaction_dict[pdbid+'_'+ligand_code]['atom_interact'] = ligand_atom_interact_list
            interaction_dict[pdbid+'_'+ligand_code]['atom_bond_type'] = interact_bond_type_list

            interaction_dict[pdbid+'_'+ligand_code]['sequence'] = seq_dict
            interaction_dict[pdbid+'_'+ligand_code]['residue_interact'] = interact_residue_list           

        except:
            print(f"Error for {pdbid}")
            pass
        
    print(f"{ligand_file_error}")
    print(f"{bond_error}")
    print(f"{ideal_ligand_file_error}")
    print(f"{sequence_error}")

    return interaction_dict

In [10]:
ideal_path = "./data/PDB/ligand"
pdb_path = "./data/PDB/protein"
plip_results_path = "./data/PLIP_results"

#### 2.1 PDBbind dataset

In [11]:
PDBbind_interact_results = get_interactions_info(PDBbind_df)
print(f"[PDBbind]: {len(PDBbind_interact_results)}")

No bonds are founded in 1fwe
Empty bond list: PDB - 1fwe, Ligand - HAE
No bonds are founded in 5zwh
Empty bond list: PDB - 5zwh, Ligand - 9N9
No bonds are founded in 6n95
Empty bond list: PDB - 6n95, Ligand - YXS
bond_type Metal Complexes
Empty bond list: PDB - 3q4c, Ligand - RSW
[pi-Stacking, pi-Cation Interactions] Error: atom index in plip result not in ligand atom idx (5oah, 95B)
[6791, 6792, 6793, 6794, 6764]
Empty bond list: PDB - 5oah, Ligand - 95B
Error for 1o41
bond_type Metal Complexes
Empty bond list: PDB - 2foy, Ligand - B30
No bonds are founded in 4hkp
Empty bond list: PDB - 4hkp, Ligand - 16B
No bonds are founded in 2z8e
Empty bond list: PDB - 2z8e, Ligand - MES
No bonds are founded in 5jim
Empty bond list: PDB - 5jim, Ligand - P8S
No bonds are founded in 3ig1
Empty bond list: PDB - 3ig1, Ligand - JTH
bond_type Metal Complexes
Empty bond list: PDB - 2fou, Ligand - B22
bond_type Metal Complexes
Empty bond list: PDB - 4fea, Ligand - 0TE
No bonds are founded in 4kw6
Empty bo

#### 2.2 CASF2016 dataset

In [12]:
CASF2016_interact_results = get_interactions_info(CASF2016_df)
print(f"[CASF2016]: {len(CASF2016_interact_results)}")

0
0
0
0
[CASF2016]: 269


#### 2.3 CASF2013 dataset

In [13]:
CASF2013_interact_results = get_interactions_info(CASF2013_df)
print(f"[CASF2013]: {len(CASF2013_interact_results)}")

0
0
0
0
[CASF2013]: 172


#### 2.4 CSAR2014 dataset

In [14]:
CSAR2014_interact_results = get_interactions_info(CSAR2014_df)
print(f"[CSAR2014]: {len(CSAR2014_interact_results)}")

0
0
0
0
[CSAR2014]: 46


#### 2.5 CSAR2012 dataset

In [15]:
CSAR2012_interact_results = get_interactions_info(CSAR2012_df)
print(f"[CSAR2012]: {len(CSAR2012_interact_results)}")

0
0
0
0
[CSAR2012]: 55


#### 2.6 CSARset1 dataset

In [16]:
CSARset1_interact_results = get_interactions_info(CSARset1_df)
print(f"[CSARset1]: {len(CSARset1_interact_results)}")

0
0
0
0
[CSARset1]: 152


#### 2.7 CSARset2 dataset

In [17]:
CSARset2_interact_results = get_interactions_info(CSARset2_df)
print(f"[CSARset2]: {len(CSARset2_interact_results)}")

0
0
0
0
[CSARset2]: 135


#### 2.8 Astex dataset

In [18]:
Astex_interact_results = get_interactions_info(Astex_df)
print(f"[Astex]: {len(Astex_interact_results)}")

0
0
0
0
[Astex]: 74


#### 2.9 COACH420 dataset

In [19]:
COACH420_interact_results = get_interactions_info(COACH420_df)
print(f"[COACH420]: {len(COACH420_interact_results)}")

No bonds are founded in 1cqf
Empty bond list: PDB - 1cqf, Ligand - BGC
No bonds are founded in 1vps
Empty bond list: PDB - 1vps, Ligand - NAG
0
2
0
0
[COACH420]: 323


#### 2.10 HOLO4K dataset

In [20]:
HOLO4K_interact_results = get_interactions_info(HOLO4K_df)
print(f"[HOLO4K]: {len(HOLO4K_interact_results)}")

No bonds are founded in 1abf
Empty bond list: PDB - 1abf, Ligand - FCB
No bonds are founded in 1apb
Empty bond list: PDB - 1apb, Ligand - FCB
No bonds are founded in 1ct1
Empty bond list: PDB - 1ct1, Ligand - BGC
bond_type Metal Complexes
Empty bond list: PDB - 1e2e, Ligand - AF3
No bonds are founded in 1e2g
Empty bond list: PDB - 1e2g, Ligand - TMP
No bonds are founded in 1el7
Empty bond list: PDB - 1el7, Ligand - MTD
No bonds are founded in 1el8
Empty bond list: PDB - 1el8, Ligand - MSF
No bonds are founded in 1el9
Empty bond list: PDB - 1el9, Ligand - MTG
No bonds are founded in 1eyw
Empty bond list: PDB - 1eyw, Ligand - TEN
No bonds are founded in 1fbh
Empty bond list: PDB - 1fbh, Ligand - FBP
No bonds are founded in 1fwu
Empty bond list: PDB - 1fwu, Ligand - FUC
No bonds are founded in 1fwv
Empty bond list: PDB - 1fwv, Ligand - FUC
bond_type Metal Complexes
Empty bond list: PDB - 1g3c, Ligand - FE
bond_type Metal Complexes
Empty bond list: PDB - 1gfi, Ligand - ALF
No bonds are fou

### 3. Save data

In [21]:
with open("./preprocessed_data/step5_PDBbind_interaction.pkl", "wb") as f:
    pickle.dump(PDBbind_interact_results, f)

In [22]:
with open("./preprocessed_data/step5_CASF2016_interaction.pkl", "wb") as f:
    pickle.dump(CASF2016_interact_results, f)

In [23]:
with open("./preprocessed_data/step5_CASF2013_interaction.pkl", "wb") as f:
    pickle.dump(CASF2013_interact_results, f)

In [24]:
with open("./preprocessed_data/step5_CSAR2014_interaction.pkl", "wb") as f:
    pickle.dump(CSAR2014_interact_results, f)

In [25]:
with open("./preprocessed_data/step5_CSAR2012_interaction.pkl", "wb") as f:
    pickle.dump(CSAR2012_interact_results, f)

In [26]:
with open("./preprocessed_data/step5_CSARset1_interaction.pkl", "wb") as f:
    pickle.dump(CSARset1_interact_results, f)

In [27]:
with open("./preprocessed_data/step5_CSARset2_interaction.pkl", "wb") as f:
    pickle.dump(CSARset2_interact_results, f)

In [28]:
with open("./preprocessed_data/step5_Astex_interaction.pkl", "wb") as f:
    pickle.dump(Astex_interact_results, f)

In [29]:
with open("./preprocessed_data/step5_COACH420_interaction.pkl", "wb") as f:
    pickle.dump(COACH420_interact_results, f)

In [30]:
with open("./preprocessed_data/step5_HOLO4K_interaction.pkl", "wb") as f:
    pickle.dump(HOLO4K_interact_results, f)

In [31]:
def check_index(df, interact_dict):
    index = list()
    
    for line in df.values:
        pdbid, ligand_code = line[0], line[1]
        if f"{pdbid}_{ligand_code}" in interact_dict:
            index.append(True)
        else:
            index.append(False)
    return index

In [32]:
PDBbind_df = PDBbind_df.loc[check_index(PDBbind_df, PDBbind_interact_results), :]
CASF2016_df = CASF2016_df.loc[check_index(CASF2016_df, CASF2016_interact_results),:]
CASF2013_df = CASF2013_df.loc[check_index(CASF2013_df, CASF2013_interact_results),:]
CSAR2014_df = CSAR2014_df.loc[check_index(CSAR2014_df, CSAR2014_interact_results),:]
CSAR2012_df = CSAR2012_df.loc[check_index(CSAR2012_df, CSAR2012_interact_results),:]
CSARset1_df = CSARset1_df.loc[check_index(CSARset1_df, CSARset1_interact_results),:]
CSARset2_df = CSARset2_df.loc[check_index(CSARset2_df, CSARset2_interact_results),:]
Astex_df = Astex_df.loc[check_index(Astex_df, Astex_interact_results),:]
COACH420_df = COACH420_df.loc[check_index(COACH420_df, COACH420_interact_results),:]
HOLO4K_df = HOLO4K_df.loc[check_index(HOLO4K_df, HOLO4K_interact_results),:]

In [33]:
PDBbind_df.to_csv("./preprocessed_data/step5_PDBbind_data.tsv", sep = "\t", index = False)
CASF2016_df.to_csv("./preprocessed_data/step5_CASF2016_data.tsv", sep = "\t", index = False)
CASF2013_df.to_csv("./preprocessed_data/step5_CASF2013_data.tsv", sep = "\t", index = False)
CSAR2014_df.to_csv("./preprocessed_data/step5_CSAR2014_data.tsv", sep = "\t", index = False)
CSAR2012_df.to_csv("./preprocessed_data/step5_CSAR2012_data.tsv", sep = "\t", index = False)
CSARset1_df.to_csv("./preprocessed_data/step5_CSARset1_data.tsv", sep = "\t", index = False)
CSARset2_df.to_csv("./preprocessed_data/step5_CSARset2_data.tsv", sep = "\t", index = False)
Astex_df.to_csv("./preprocessed_data/step5_Astex_data.tsv", sep = "\t", index = False)
COACH420_df.to_csv("./preprocessed_data/step5_COACH420_data.tsv", sep = "\t", index = False)
HOLO4K_df.to_csv("./preprocessed_data/step5_HOLO4K_data.tsv", sep = "\t", index = False)

### 6. Make fasta files

In [34]:
def make_fasta(df, query_file_name, target_file_name):
    fw1 = open(f"./fasta/{query_file_name}", "w")
    fw2 = open(f"./fasta/{target_file_name}", "w")
    
    pdbid_list, chain_list, uniprot_id_list, pdb_seqs_list, uniprot_seqs_list = \
        df.PDB.values, df.Chain.values, df.Uniprot_IDs.values, df.PDB_seqs.values, df.Uniprot_seqs.values 
    
    for pdbid, chain, uniprotid, pdb_seqs, uniprot_seqs in zip(pdbid_list, chain_list, uniprot_id_list, pdb_seqs_list, uniprot_seqs_list):
        for ch, un, ps, us in zip(chain.split(","), uniprotid.split(","), pdb_seqs.split(","), uniprot_seqs.split(",")):
            if us != 'None':
                fw1.write('>'+pdbid+'_'+ch+'\n')
                fw1.write(ps+'\n')

                fw2.write('>'+pdbid+'_'+ch+'_'+un+'\n')
                fw2.write(us+'\n')                
    fw1.close()
    fw2.close()

In [35]:
make_fasta(PDBbind_df, "PDBbind_query_pdb.fasta", "PDBbind_target_uniprot_pdb.fasta")
make_fasta(CASF2016_df, "CASF2016_query_pdb.fasta", "CASF2016_target_uniprot_pdb.fasta")
make_fasta(CASF2013_df, "CASF2013_query_pdb.fasta", "CASF2013_target_uniprot_pdb.fasta")
make_fasta(CSAR2014_df, "CSAR2014_query_pdb.fasta", "CSAR2014_target_uniprot_pdb.fasta")
make_fasta(CSAR2012_df, "CSAR2012_query_pdb.fasta", "CSAR2012_target_uniprot_pdb.fasta")
make_fasta(CSARset1_df, "CSARset1_query_pdb.fasta", "CSARset1_target_uniprot_pdb.fasta")
make_fasta(CSARset2_df, "CSARset2_query_pdb.fasta", "CSARset2_target_uniprot_pdb.fasta")
make_fasta(Astex_df, "Astex_query_pdb.fasta", "Astex_target_uniprot_pdb.fasta")
make_fasta(COACH420_df, "COACH420_query_pdb.fasta", "COACH420_target_uniprot_pdb.fasta")
make_fasta(HOLO4K_df, "HOLO4K_query_pdb.fasta", "HOLO4K_target_uniprot_pdb.fasta")