In [10]:
from Bio.PDB.MMCIFParser import MMCIFParser
from Bio.PDB import PDBIO, Select
from rdkit import Chem
from rdkit.Chem import AllChem
import tempfile
import os

# Protein selector: standard residues only
class ProteinSelect(Select):
    def accept_residue(self, residue):
        return residue.id[0] == " "  # Main-chain residues

# Ligand selector: heteroatoms only
class LigandSelect(Select):
    def accept_residue(self, residue):
        return residue.id[0] != " "  # Ligands/metals/etc.

def extract_pdb_and_mol2_no_pybel(cif_path, pdb_out, ligand_pdb_out):
    parser = MMCIFParser(QUIET=True)
    structure = parser.get_structure("struct", cif_path)

    io = PDBIO()
    io.set_structure(structure)

    # Save protein
    io.save(pdb_out, ProteinSelect())
    io.save(ligand_pdb_out, LigandSelect())

In [11]:
recp_name = "s2"
pred_dir = f"/lustre/fs6/lyu_lab/scratch/ichen/data/boltz_runs/{recp_name}/boltz_results_{recp_name}_fasta/predictions"
mol2_dir = f"/lustre/fs6/lyu_lab/scratch/ichen/data/boltz_runs/{recp_name}_mol2"
pdb_dir = f"/lustre/fs6/lyu_lab/scratch/ichen/data/boltz_runs/{recp_name}_pdb"

os.makedirs(mol2_dir, exist_ok=True)
os.makedirs(pdb_dir, exist_ok=True)

In [12]:
for i in os.listdir(pred_dir):
    complex_dir = f"{pred_dir}/{i}/{i}_model_0.cif"

    pdb_out_dir = f"{pdb_dir}/{i}.pdb"
    mol2_out_dir = f"{mol2_dir}/{i}.pdb"

    if not os.path.exists(complex_dir):
        print(i)
    else:
        if not os.path.exists(pdb_out_dir) or not os.path.exists(mol2_out_dir):
            #print(f"Processing {i}...")
            # Extract PDB and ligand PDB
            if not os.path.exists(complex_dir):
                print(f"File {complex_dir} does not exist.")
                continue
            # Extract the protein and ligand structures
            extract_pdb_and_mol2_no_pybel(complex_dir, pdb_out=pdb_out_dir, ligand_pdb_out=mol2_out_dir)