In [1]:
from pathlib import Path
from plip.structure.preparation import PDBComplex
import pandas as pd
import warnings
warnings.filterwarnings("ignore")

In [2]:
HERE = Path(".").resolve()
DATA_DIR = HERE / "data"
PDB_DIR = HERE / "data_pdb"
PDB_DIR.mkdir(exist_ok=True)

In [3]:
#функция для конвертирования .cif. в pdb.:
import subprocess
import os

def convert_cif_to_pdb(cif_path, pdb_path):
    try:
        subprocess.run(
            ["obabel", str(cif_path), "-O", str(pdb_path)],
            check=True,
            stdout=subprocess.DEVNULL,
            stderr=subprocess.DEVNULL
        )
        return True
    except Exception as e:
        print(f"Не удалось конвертировать {cif_path.name}: {e}")
        return False

In [4]:
#найдем все .cif файлы
cif_files = list(DATA_DIR.glob("*.cif"))
print(f"Найдено {len(cif_files)} .cif файлов")

Найдено 83 .cif файлов


In [5]:
# Конвертация
pdb_files = []
for cif in cif_files:
    pdb_name = cif.stem + ".pdb"
    pdb_path = PDB_DIR / pdb_name
    if convert_cif_to_pdb(cif, pdb_path):
        pdb_files.append(pdb_path)

print(f"Успешно конвертировано: {len(pdb_files)} файлов")

Успешно конвертировано: 83 файлов


In [6]:
from plip.structure.preparation import PDBComplex

pdb_file = pdb_files[0]

protlig = PDBComplex()
protlig.load_pdb(str(pdb_file))
protlig.analyze()

site = list(protlig.interaction_sets.values())[0]
print("Доступные атрибуты InteractionSet:")
print([attr for attr in dir(site) if not attr.startswith('_')])

Доступные атрибуты InteractionSet:
['Mapper', 'all_hbonds_ldon', 'all_hbonds_pdon', 'all_hydrophobic_contacts', 'all_itypes', 'all_pication_laro', 'all_pication_paro', 'altconf', 'bindingsite', 'find_unpaired_ligand', 'halogen_bonds', 'hbonds_ldon', 'hbonds_pdon', 'hydrophobic_contacts', 'interacting_chains', 'interacting_res', 'lig_members', 'ligand', 'metal_complexes', 'no_interactions', 'num_unpaired_hal', 'num_unpaired_hba', 'num_unpaired_hbd', 'output_path', 'pdbid', 'pication_laro', 'pication_paro', 'pistacking', 'refine_hbonds_ldon', 'refine_hbonds_pdon', 'refine_hydrophobic', 'refine_pication', 'refine_water_bridges', 'saltbridge_lneg', 'saltbridge_pneg', 'unpaired_hal', 'unpaired_hal_orig_idx', 'unpaired_hba', 'unpaired_hba_orig_idx', 'unpaired_hbd', 'unpaired_hbd_orig_idx', 'water_bridges']


In [7]:
#функция для анализа типов связей через PLIP
def get_interactions_from_pdb(pdb_path):
    protlig = PDBComplex()
    protlig.load_pdb(str(pdb_path))
    protlig.analyze()

    # Берём первый сайт и его ID
    first_bs = list(protlig.interaction_sets.keys())[0]
    site = protlig.interaction_sets[first_bs]

    ligand_name = first_bs.split(":")[0]

    residue_interactions = {}
    interaction_map = {
        "hydrophobic": site.hydrophobic_contacts,
        "hbond": site.hbonds_ldon + site.hbonds_pdon,
        "waterbridge": site.water_bridges,
        "saltbridge": site.saltbridge_lneg + site.saltbridge_pneg,
        "pistacking": site.pistacking,
        "pication": site.pication_laro + site.pication_paro,
        "halogen": site.halogen_bonds,
        "metal": site.metal_complexes,
    }

    for itype, interactions in interaction_map.items():
        for i in interactions:
            res_key = f"{i.restype}_{i.reschain}:{i.resnr}"
            if res_key not in residue_interactions:
                residue_interactions[res_key] = set()
            residue_interactions[res_key].add(itype)

    return residue_interactions, ligand_name

In [9]:
all_residues = set()
data = {}
ligands = {}

for pdb_file in pdb_files:
    pdb_id = pdb_file.stem
    try:
        interactions, ligand_name = get_interactions_from_pdb(pdb_file)
        data[pdb_id] = interactions
        ligands[pdb_id] = ligand_name
        all_residues.update(interactions.keys())
    except Exception as e:
        print(f"    Пропущен: {e}")
        data[pdb_id] = {}
        ligands[pdb_id] = "N/A"

In [10]:
total_files = len(pdb_files)
processed_files = sum(1 for v in data.values() if v)
skipped_files = total_files - processed_files

print(f"Всего файлов: {total_files}")
print(f"Успешно обработано: {processed_files}")
print(f"Пропущено: {skipped_files}")

Всего файлов: 83
Успешно обработано: 81
Пропущено: 2


In [11]:
#формируем датасет
rows = []
for pdb_id in data:
    row = {
        "Ligand": ligands[pdb_id],
        "Protein": pdb_id
    }
    for res in sorted(all_residues):
        types = data[pdb_id].get(res, set())
        row[res] = ", ".join(sorted(types)) if types else 0
    rows.append(row)

df = pd.DataFrame(rows)
df = df.set_index(["Ligand", "Protein"])

In [12]:
output_file = HERE / "protein_ligand_interaction.csv"
df.to_csv(output_file)

df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,ALA_A:167,ALA_A:207,ALA_A:53,ALA_A:59,ALA_A:63,ARG_A:133,ARG_A:171,ARG_A:172,ARG_A:175,ARG_A:176,...,THR_A:232,TYR_A:104,TYR_A:106,TYR_A:110,TYR_A:114,TYR_A:116,VAL_A:187,VAL_A:190,VAL_A:52,VAL_A:88
Ligand,Protein,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
TPO,4RMZ,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
STU,4U9A,hbond,0,0,0,0,0,0,0,0,0,...,0,0,0,0,hydrophobic,0,0,0,0,0
TPO,4XS2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
TPO,4Y73,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
TPO,4YO6,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [16]:
df_reset = df.reset_index()

residue_cols = df_reset.columns[2:]
df_reset["total_interactions"] = df_reset[residue_cols].apply(lambda row: (row != 0).sum(), axis=1)
df_sorted = df_reset.sort_values("total_interactions", ascending=False)
print(df_sorted[["Ligand", "Protein", "total_interactions"]])

   Ligand Protein  total_interactions
26    J8A    6EGA                  14
25    0LI    6EG9                  10
28    DL1    6EGE                   8
27    J87    6EGD                   8
42    NBK    6THW                   6
..    ...     ...                 ...
0     TPO    4RMZ                   2
56    CSO    8ATL                   2
55    CSO    8ATB                   2
64    TPO    8SCW                   0
62    TPO    8SCE                   0

[83 rows x 3 columns]
