In [11]:
!pip install pymol-open-source
!pip install tqdm

Collecting tqdm
  Using cached tqdm-4.67.1-py3-none-any.whl.metadata (57 kB)
Using cached tqdm-4.67.1-py3-none-any.whl (78 kB)
Installing collected packages: tqdm
Successfully installed tqdm-4.67.1


In [20]:
import os
import sys
import shutil
from pymol import cmd

# Try importing tqdm for the progress bar
try:
    from tqdm import tqdm
except ImportError:
    def tqdm(iterable, **kwargs):
        print("tqdm not installed, running without progress bar...")
        return iterable

def screen_pdbs_sequential(input_dir, output_dir, template_pdb_path, clash_cutoff=2.5):
    """
    Sequentially checks PDBs for clashes against a template.
    Copies clash-free PDBs to output_dir.
    """

    # 1. Validation & Setup
    if not os.path.exists(template_pdb_path):
        raise FileNotFoundError(f"Template not found: {template_pdb_path}")
    if not os.path.exists(input_dir):
        raise FileNotFoundError(f"Input dir not found: {input_dir}")

    # Create output directory if needed
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
        print(f"Created output directory: {output_dir}")

    input_files = [f for f in os.listdir(input_dir) if f.endswith('.pdb')]
    total_files = len(input_files)

    if total_files == 0:
        print("No PDB files found.")
        return []

    clash_free_pdbs = []

    # 2. Prepare PyMOL
    # Reset PyMOL to ensure a clean slate
    cmd.reinitialize()

    # Load the template ONCE (much faster than loading it in the loop)
    template_obj = "template_obj"
    cmd.load(template_pdb_path, template_obj)

    # Silence PyMOL output so the progress bar looks nice
    cmd.feedback("disable", "all", "actions")
    cmd.feedback("disable", "all", "results")

    print(f"Screening {total_files} files (Sequential Mode)...")

    # 3. The Loop
    for file_name in tqdm(input_files, desc="Processing", unit="pdb"):
        full_path = os.path.join(input_dir, file_name)
        mobile_obj = "mobile_obj"

        try:
            # Load input
            cmd.load(full_path, mobile_obj)

            # Align (quietly)
            cmd.align(mobile_obj, template_obj, quiet=1)

            # Detect Clashes
            # (Mobile Chain A vs Template Chain B, no Hydrogens)
            selection_query = (
                f"({mobile_obj} and chain A and not elem H) "
                f"within {clash_cutoff} of "
                f"({template_obj} and chain B and not elem H)"
            )

            clash_count = cmd.select("clash_selection", selection_query)

            # Check result
            if clash_count == 0:
                clash_free_pdbs.append(file_name)

                # Copy the file immediately
                src = full_path
                dst = os.path.join(output_dir, file_name)
                shutil.copy2(src, dst)

        except Exception as e:
            # Print error to stderr so it doesn't break the progress bar visual
            sys.stderr.write(f"\nError processing {file_name}: {e}\n")

        finally:
            # Cleanup: Delete the specific objects for this iteration
            # We keep the template loaded!
            cmd.delete(mobile_obj)
            cmd.delete("clash_selection")

    # 4. Final Cleanup
    cmd.delete(template_obj)

    # Re-enable PyMOL feedback
    cmd.feedback("enable", "all", "actions")
    cmd.feedback("enable", "all", "results")

    return clash_free_pdbs

# ==========================================
# EXECUTION
# ==========================================
if __name__ == "__main__":
    # Define Paths
    base_path = "/mnt/d/桌面/PTN/PKU/Chen_Lab/HACE/rigid_HACE"

    my_input_dir = os.path.join(base_path, "design/1/backbones/rigid_HACE/rigid_HACE/")
    my_template_dir = os.path.join(base_path, "tool_functions/clash_detecter/templates/")
    my_template_path = os.path.join(my_template_dir, "HACE.pdb")

    # Output Directory
    my_output_dir = os.path.join(base_path, "design/1/backbones/rigid_HACE/clean_pdbs/")

    # Log File
    output_txt_file = "clash_free_list.txt"

    try:
        # Run
        clean_list = screen_pdbs_sequential(
            input_dir=my_input_dir,
            output_dir=my_output_dir,
            template_pdb_path=my_template_path,
            clash_cutoff=2.0
        )

        # Save Text List
        if clean_list:
            print(f"\nSuccess! {len(clean_list)} structures passed.")
            print(f"Files copied to: {my_output_dir}")

            with open(output_txt_file, "w") as f:
                for pdb_name in clean_list:
                    f.write(f"{pdb_name}\n")
            print(f"List saved to: {os.path.abspath(output_txt_file)}")
        else:
            print("\nDone. No structures passed the filter.")

    except Exception as e:
        print(f"An error occurred: {e}")

Screening 500 files (Sequential Mode)...


Processing: 100%|██████████| 500/500 [00:47<00:00, 10.53pdb/s]


Success! 42 structures passed.
Files copied to: /mnt/d/桌面/PTN/PKU/Chen_Lab/HACE/rigid_HACE/design/1/backbones/rigid_HACE/clean_pdbs/
List saved to: /mnt/d/桌面/PTN/PKU/Chen_Lab/HACE/rigid_HACE/tool_functions/clash_detecter/clash_free_list.txt



