In [4]:
###This is the script for pocket detection via fpocket and selection via different parameters and the results from P2Rank
#since P2Rank runs in bash, this line of code has to be run first:
#/home/sylviane/meet-eu/Heidelberg_Team_2-1/P2Rank/p2rank_2.5.1/prank predict -f /home/sylviane/meet-eu/Heidelberg_Team_2-1/fpocket_data/struc
#tures/8I92_ACE2.pdb

#these libraries need to be activated:
import nglview as nv
import re
import ipywidgets
import shutil
import zipfile
import pandas as pd
import numpy as np
from biobb_structure_utils.utils.extract_molecule import extract_molecule
from biobb_vs.fpocket.fpocket_run import fpocket_run
from biobb_vs.fpocket.fpocket_filter import fpocket_filter
from pathlib import Path, PurePath
from Bio.PDB import PDBParser
import glob

########################## user set-up ##########################
#input pdb file path and file name:
pdb_file = "/home/sylviane/meet-eu/Heidelberg_Team_2-1/fpocket_data/structures/8WM3_Tiagabine_ACE2.pdb"

#is there a ligand in the pdb_file that needs to be removed?
ligand_pdb = 'yes'

#computed P2Rank prediction
P2_dir = '/home/sylviane/meet-eu/Heidelberg_Team_2-1/P2Rank/p2rank_2.5.1/test_output/predict_8WM3_Tiagabine_ACE2/8WM3_Tiagabine_ACE2.pdb_predictions.csv'

#This is the file path under which all files will be stored
#make sure that all base folders exist:
base_dir = Path("/home/sylviane/meet-eu/Heidelberg_Team_2-1/fpocket_data/8WM3_ACE2_test")
base_dir.mkdir(parents=True, exist_ok=True)

#properties for fpocket-computation
properties_fpocket = {"min_radius":3, "max_radius": 6, "num_spheres": 35}

#parameters for fpocket selection
fpocket_params = {"volume": [500, 1200], 
                 "druggability_score": [0.5, 1],
                 "number_of_alpha_spheres": [35,],
                 "score": [0, 1]}

#thresholds for P2Rank selection
P2_score = 5 #score: #60-100: high confidence, 40-60: moderate confidence, <40: bad
P2_probability = 0.5
P2_rank = 10

#max distance between pockets to be selected
threshold = 6.0


########################## start of pipe ##########################
#These files will be creared automatically later:
filtered_fpockets_dir = base_dir / 'pockets_dir'
filtered_fpockets_dir.mkdir(parents=True, exist_ok=True)
#output path all fpockets as zip
output_fpockets_zip = base_dir / 'temp' / 'all_fpockets.zip'
#output path all fpockets summary
output_fpockets_summary = base_dir / 'temp' / 'fpocket_summary.json'
#output path filtered pockets
output_filtered_fpockets_zip = base_dir / 'temp' / 'fpocket_filtered.zip'
output_fpockets_zip.parent.mkdir(parents=True, exist_ok=True)
#output file with fpocket centers
output_fpocket_centers = base_dir / 'temp' / 'fpocket_centers.csv'
#output filtered P2Rank pockets
output_P2pocket_filtered = base_dir / 'temp' / 'P2rank_filtered.csv'
#output matching pockets
output_pocket_matches = base_dir / 'pockets_final.csv'

########################## fpocket prediction ##########################
## .pdb file pre-processing
#if the the protein in the .pdb file is bound to a ligand, this is removed herre

if ligand_pdb == 'yes':
    extract_molecule(input_structure_path = pdb_file,
                     output_molecule_path = pdb_file) #hier testen ob überschreiben funktioniert

##run fpocket to detect all pockets according to the set properties
fpocket_run(input_pdb_path = pdb_file, 
            output_pockets_zip = str(output_fpockets_zip),
            output_summary = str(output_fpockets_summary),
            properties = properties_fpocket)


##filter the computed pockets according to the set parameters
fpocket_filter(input_pockets_zip = str(output_fpockets_zip),
               input_summary = str(output_fpockets_summary),
               output_filter_pockets_zip = str(output_filtered_fpockets_zip),
               properties = fpocket_params)


##extract the filtered pockets and store them in a directory
with zipfile.ZipFile(str(output_filtered_fpockets_zip), 'r') as zip_ref:
    zip_ref.extractall(str(filtered_fpockets_dir))

##now we prepare the fpocket files for the comparison later
#the fpocket files first need to be converted into a common csv file:
fpocket_files = glob.glob(str(filtered_fpockets_dir / "pocket*_atm.pdb"))

fpocket_list = []

for fpocket_file in fpocket_files:
    parser = PDBParser(QUIET=True)
    structure = parser.get_structure('pocket', fpocket_file)
    coords = np.array([atom.get_coord() for atom in structure.get_atoms()])
    center = coords.mean(axis=0)  # center of mass of pocket atoms
    volume = len(coords)  # rough proxy for size
    pocket_id = fpocket_file.split('/')[-1].replace('.pdb','')
    fpocket_list.append({
        'pocket_id': pocket_id,
        'center_x': center[0],
        'center_y': center[1],
        'center_z': center[2],
        'volume': volume
    })

fpocket_df = pd.DataFrame(fpocket_list)
fpocket_df.to_csv(str(output_fpocket_centers), index=False)


########################## P2Rank filtering ##########################
#these are the P2Rank output files we are going to need ('.pdb_predictions.csv'):
P2pockets = pd.read_csv(str(P2_dir))

#strip empty spaces
P2pockets.columns = P2pockets.columns.str.strip()

P2pockets_filtered = P2pockets[
    (P2pockets['score'] >= P2_score) &
    (P2pockets['probability'] >= P2_probability) &
    (P2pockets['rank'] <= P2_rank)
]

P2pockets_filtered.to_csv(str(output_P2pocket_filtered))


########################## fpocket and P2Rank comparison ##########################
#we are comparing pockets via the distance of their centers. The centers need to be closer than the set threshold (in armstrong) to be considered one
#Here's a function that computes the distance between two points (d=√((x_2-x_1)²+(y_2-y_1)²+(z_2-z_1)²)) with p = (x, y, z):
def dist(p1, p2):
    return np.sqrt((p1[0]-p2[0])**2 + (p1[1]-p2[1])**2 + (p1[2]-p2[2])**2)

#now, let's look for matches:
#fpocket_df and P2pockets_filtered are the variables needed

matches = []
for i, p2 in P2pockets_filtered.iterrows():
    p2_center = [p2['center_x'], p2['center_y'], p2['center_z']]
    for j, fp in fpocket_df.iterrows():
        fp_center = [fp['center_x'], fp['center_y'], p2['center_z']]
        if dist(p2_center, fp_center) <= threshold:
            matches.append({
                'p2rank_rank': p2['rank'],
                'fpocket_nr': fp['pocket_id'],
                'distance': dist(p2_center, fp_center),
                'fpocket_center' : fp_center,
                'p2Rank_center' : p2_center
            })
matches_df = pd.DataFrame(matches)
matches_df.to_csv(str(output_pocket_matches))



2025-12-13 07:01:49,625 [MainThread  ] [INFO ]  Module: biobb_structure_utils.utils.extract_molecule Version: 5.1.0
2025-12-13 07:01:49,627 [MainThread  ] [INFO ]  /home/sylviane/meet-eu/Heidelberg_Team_2-1/fpocket_scripts/sandbox_e42edfeb-e3b6-41f0-aac7-6266dba1f438 directory successfully created
2025-12-13 07:01:49,629 [MainThread  ] [INFO ]  Copy: /home/sylviane/meet-eu/Heidelberg_Team_2-1/fpocket_data/structures/8WM3_Tiagabine_ACE2.pdb to /home/sylviane/meet-eu/Heidelberg_Team_2-1/fpocket_scripts/sandbox_e42edfeb-e3b6-41f0-aac7-6266dba1f438
2025-12-13 07:01:49,630 [MainThread  ] [INFO ]  Creating 9bd0aced-2bf6-4059-9146-6c9dae3bf0b7 temporary folder
2025-12-13 07:01:49,631 [MainThread  ] [INFO ]  check_structure -i /home/sylviane/meet-eu/Heidelberg_Team_2-1/fpocket_data/structures/8WM3_Tiagabine_ACE2.pdb -o /home/sylviane/meet-eu/Heidelberg_Team_2-1/fpocket_data/structures/8WM3_Tiagabine_ACE2.pdb --force_save --non_interactive command_list --list 9bd0aced-2bf6-4059-9146-6c9dae3bf0b

