In [2]:
import os
import papermill as pm
import glob
import logging

# Define the base directory
base_dir = (
    "/Users/lkv206/work/to_do_projects/chembl_ligands/DUDEZ_DOCKING_GRIDS_AND_POSES"
)

# Create papermill and output directories
papermill_dir = os.path.join(base_dir, "papermill")
output_dir = os.path.join(papermill_dir, "extrema_0pt5LD_torsion_notebooks")
os.makedirs(output_dir, exist_ok=True)

# Set up logging
log_file = os.path.join(papermill_dir, "extrema_0pt5LD_torsion_runner.log")
logging.basicConfig(
    filename=log_file,
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s",
)


def find_files(protein_dir):
    protein_name = os.path.basename(protein_dir)
    active_sdf = glob.glob(
        os.path.join(
            protein_dir, f"{protein_name}_std_dudez_0pt5LD_ligand_poses_lib_sorted.sdf"
        )
    )[0]
    decoy_sdf = glob.glob(
        os.path.join(
            protein_dir,
            f"{protein_name}_std_extrema_0pt5LD_decoy_poses_lib_sorted.sdf",
        )
    )[0]
    active_strain = glob.glob(
        os.path.join(
            protein_dir,
            f"{protein_name}_std_dudez_0pt5LD_ligand_poses_lib_sorted_tstrain.csv",
        )
    )[0]
    decoy_strain = glob.glob(
        os.path.join(
            protein_dir,
            f"{protein_name}_std_extrema_0pt5LD_decoy_poses_lib_sorted_tstrain.csv",
        )
    )[0]

    return active_sdf, decoy_sdf, active_strain, decoy_strain


def run_notebook(protein_dir):
    protein_name = os.path.basename(protein_dir)
    try:
        active_sdf, decoy_sdf, active_strain, decoy_strain = find_files(protein_dir)

        output_notebook = os.path.join(
            output_dir, f"{protein_name}_extrema_0pt5_output.ipynb"
        )

        pm.execute_notebook(
            "input_for_dudez_analysis_papermill.ipynb",
            output_notebook,
            parameters={
                "title_suffix": f"{protein_name}_extrema_0pt5",
                "file_path_sdf_active": active_sdf,
                "file_path_sdf_decoy": decoy_sdf,
                "file_path_strain_active": active_strain,
                "file_path_strain_decoy": decoy_strain,
            },
        )

        logging.info(f"Completed analysis for {protein_name}")
        print(f"Completed analysis for {protein_name}")
    except Exception as e:
        logging.error(f"Error processing {protein_name}: {str(e)}")
        print(f"Error processing {protein_name}: {str(e)}")


# Find all protein directories
protein_dirs = [
    d for d in glob.glob(os.path.join(base_dir, "[A-Z0-9]*")) if os.path.isdir(d)
]

# Run the notebook for each protein
total_proteins = len(protein_dirs)
for i, protein_dir in enumerate(protein_dirs, 1):
    print(
        f"Processing protein {i} of {total_proteins}: {os.path.basename(protein_dir)}"
    )
    run_notebook(protein_dir)

print(f"All proteins processed. Output notebooks are in {output_dir}")
print(f"Check the log file at {log_file} for details.")

Processing protein 1 of 43: MK01


Executing:   0%|          | 0/132 [00:00<?, ?cell/s]

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


Completed analysis for MK01
Processing protein 2 of 43: ADA


Executing:   0%|          | 0/132 [00:00<?, ?cell/s]

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


Completed analysis for ADA
Processing protein 3 of 43: HMDH


Executing:   0%|          | 0/132 [00:00<?, ?cell/s]

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


Completed analysis for HMDH
Processing protein 4 of 43: THRB


Executing:   0%|          | 0/132 [00:00<?, ?cell/s]

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


Completed analysis for THRB
Processing protein 5 of 43: HDAC8


Executing:   0%|          | 0/132 [00:00<?, ?cell/s]

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


Completed analysis for HDAC8
Processing protein 6 of 43: DEF


Executing:   0%|          | 0/132 [00:00<?, ?cell/s]

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


Completed analysis for DEF
Processing protein 7 of 43: UROK


Executing:   0%|          | 0/132 [00:00<?, ?cell/s]

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


Completed analysis for UROK
Processing protein 8 of 43: KITH


Executing:   0%|          | 0/132 [00:00<?, ?cell/s]

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


Completed analysis for KITH
Processing protein 9 of 43: MAPK2


Executing:   0%|          | 0/132 [00:00<?, ?cell/s]

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


Completed analysis for MAPK2
Processing protein 10 of 43: HS90A


Executing:   0%|          | 0/132 [00:00<?, ?cell/s]

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


Completed analysis for HS90A
Processing protein 11 of 43: AA2AR


Executing:   0%|          | 0/132 [00:00<?, ?cell/s]

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


Completed analysis for AA2AR
Processing protein 12 of 43: MT1


Executing:   0%|          | 0/132 [00:00<?, ?cell/s]

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


Completed analysis for MT1
Processing protein 13 of 43: GLCM


Executing:   0%|          | 0/132 [00:00<?, ?cell/s]

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


Completed analysis for GLCM
Processing protein 14 of 43: FGFR1


Executing:   0%|          | 0/132 [00:00<?, ?cell/s]

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


Completed analysis for FGFR1
Processing protein 15 of 43: ACES


Executing:   0%|          | 0/132 [00:00<?, ?cell/s]

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


Completed analysis for ACES
Processing protein 16 of 43: EGFR


Executing:   0%|          | 0/132 [00:00<?, ?cell/s]

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


Completed analysis for EGFR
Processing protein 17 of 43: PLK1


Executing:   0%|          | 0/132 [00:00<?, ?cell/s]

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


Completed analysis for PLK1
Processing protein 18 of 43: RENI


Executing:   0%|          | 0/132 [00:00<?, ?cell/s]

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


Completed analysis for RENI
Processing protein 19 of 43: KIT


Executing:   0%|          | 0/132 [00:00<?, ?cell/s]

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


Completed analysis for KIT
Processing protein 20 of 43: PPARA


Executing:   0%|          | 0/132 [00:00<?, ?cell/s]

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


Completed analysis for PPARA
Processing protein 21 of 43: ADRB2


Executing:   0%|          | 0/132 [00:00<?, ?cell/s]

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


Completed analysis for ADRB2
Processing protein 22 of 43: TRYB1


Executing:   0%|          | 0/132 [00:00<?, ?cell/s]

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


Completed analysis for TRYB1
Processing protein 23 of 43: DRD4
Error processing DRD4: list index out of range
Processing protein 24 of 43: AMPC
Error processing AMPC: list index out of range
Processing protein 25 of 43: ABL1


Executing:   0%|          | 0/132 [00:00<?, ?cell/s]

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


Completed analysis for ABL1
Processing protein 26 of 43: XIAP


Executing:   0%|          | 0/132 [00:00<?, ?cell/s]

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


Completed analysis for XIAP
Processing protein 27 of 43: FA10


Executing:   0%|          | 0/132 [00:00<?, ?cell/s]

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


Completed analysis for FA10
Processing protein 28 of 43: NRAM


Executing:   0%|          | 0/132 [00:00<?, ?cell/s]

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


Completed analysis for NRAM
Processing protein 29 of 43: HIVPR


Executing:   0%|          | 0/132 [00:00<?, ?cell/s]

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


Completed analysis for HIVPR
Processing protein 30 of 43: LCK


Executing:   0%|          | 0/132 [00:00<?, ?cell/s]

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


Completed analysis for LCK
Processing protein 31 of 43: ITAL


Executing:   0%|          | 0/132 [00:00<?, ?cell/s]

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


Completed analysis for ITAL
Processing protein 32 of 43: PARP1


Executing:   0%|          | 0/132 [00:00<?, ?cell/s]

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


Completed analysis for PARP1
Processing protein 33 of 43: ROCK1


Executing:   0%|          | 0/132 [00:00<?, ?cell/s]

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


Completed analysis for ROCK1
Processing protein 34 of 43: TRY1


Executing:   0%|          | 0/132 [00:00<?, ?cell/s]

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


Completed analysis for TRY1
Processing protein 35 of 43: FA7


Executing:   0%|          | 0/132 [00:00<?, ?cell/s]

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


Completed analysis for FA7
Processing protein 36 of 43: FKB1A


Executing:   0%|          | 0/132 [00:00<?, ?cell/s]

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


Completed analysis for FKB1A
Processing protein 37 of 43: CXCR4


Executing:   0%|          | 0/132 [00:00<?, ?cell/s]

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


Completed analysis for CXCR4
Processing protein 38 of 43: ANDR


Executing:   0%|          | 0/132 [00:00<?, ?cell/s]

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


Completed analysis for ANDR
Processing protein 39 of 43: CSF1R


Executing:   0%|          | 0/132 [00:00<?, ?cell/s]

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


Completed analysis for CSF1R
Processing protein 40 of 43: FABP4


Executing:   0%|          | 0/132 [00:00<?, ?cell/s]

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


Completed analysis for FABP4
Processing protein 41 of 43: PTN1


Executing:   0%|          | 0/132 [00:00<?, ?cell/s]

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


Completed analysis for PTN1
Processing protein 42 of 43: PUR2


Executing:   0%|          | 0/132 [00:00<?, ?cell/s]

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


Completed analysis for PUR2
Processing protein 43 of 43: SRC


Executing:   0%|          | 0/132 [00:00<?, ?cell/s]

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


Completed analysis for SRC
All proteins processed. Output notebooks are in /Users/lkv206/work/to_do_projects/chembl_ligands/DUDEZ_DOCKING_GRIDS_AND_POSES/papermill/extrema_0pt5LD_torsion_notebooks
Check the log file at /Users/lkv206/work/to_do_projects/chembl_ligands/DUDEZ_DOCKING_GRIDS_AND_POSES/papermill/extrema_0pt5LD_torsion_runner.log for details.
