In [6]:
import os
import papermill as pm
import glob
import logging

# Define the base directory
base_dir = os.getcwd()

# Create papermill and output directories
papermill_dir = os.path.join(base_dir, "papermill")
output_dir = os.path.join(papermill_dir, "goldilocks_1pt0LD_torsion_notebooks")
os.makedirs(output_dir, exist_ok=True)

# Set up logging
log_file = os.path.join(papermill_dir, "goldilocks_1pt0LD_torsion_runner.log")
logging.basicConfig(
    filename=log_file,
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s",
)

# List of proteins to skip
SKIP_PROTEINS = ["AMPC", "DRD4"]


def find_files(protein_dir):
    protein_name = os.path.basename(protein_dir)
    # Use more flexible glob patterns
    active_sdf = glob.glob(
        os.path.join(
            protein_dir, f"{protein_name}_std_dudez_1pt0LD*ligand*poses_lib_sorted.sdf"
        )
    )
    decoy_sdf = glob.glob(
        os.path.join(
            protein_dir,
            f"{protein_name}_std_goldilocks_1pt0LD*decoy*poses_lib_sorted.sdf",
        )
    )
    active_strain = glob.glob(
        os.path.join(
            protein_dir,
            f"{protein_name}_std_dudez_1pt0LD*ligand*poses_lib_sorted_tstrain.csv",
        )
    )
    decoy_strain = glob.glob(
        os.path.join(
            protein_dir,
            f"{protein_name}_std_goldilocks_1pt0LD*decoy*poses_lib_sorted_tstrain.csv",
        )
    )
    # Check if we found exactly one file for each type
    if (
        len(active_sdf) != 1
        or len(decoy_sdf) != 1
        or len(active_strain) != 1
        or len(decoy_strain) != 1
    ):
        raise ValueError(f"Unexpected number of files found for {protein_name}")
    return active_sdf[0], decoy_sdf[0], active_strain[0], decoy_strain[0]


def run_notebook(protein_dir):
    protein_name = os.path.basename(protein_dir)
    if protein_name in SKIP_PROTEINS:
        logging.info(f"Skipping {protein_name} as it requires special handling")
        print(f"Skipping {protein_name} as it requires special handling")
        return
    try:
        active_sdf, decoy_sdf, active_strain, decoy_strain = find_files(protein_dir)
        output_notebook = os.path.join(
            output_dir, f"{protein_name}_goldilocks_1pt0_output.ipynb"
        )
        pm.execute_notebook(
            "input_for_dudez_analysis_papermill.ipynb",
            output_notebook,
            parameters={
                "title_suffix": f"{protein_name}_goldilocks_1pt0",
                "file_path_sdf_active": active_sdf,
                "file_path_sdf_decoy": decoy_sdf,
                "file_path_strain_active": active_strain,
                "file_path_strain_decoy": decoy_strain,
            },
        )
        logging.info(f"Completed analysis for {protein_name}")
        print(f"Completed analysis for {protein_name}")
    except ValueError as ve:
        logging.error(f"Error finding files for {protein_name}: {str(ve)}")
        print(f"Error finding files for {protein_name}: {str(ve)}")
    except Exception as e:
        logging.error(f"Error processing {protein_name}: {str(e)}")
        print(f"Error processing {protein_name}: {str(e)}")


# Find all protein directories
protein_dirs = [
    d for d in glob.glob(os.path.join(base_dir, "[A-Z0-9]*")) if os.path.isdir(d)
]

# Run the notebook for each protein
total_proteins = len(protein_dirs)
for i, protein_dir in enumerate(protein_dirs, 1):
    protein_name = os.path.basename(protein_dir)
    print(f"Processing protein {i} of {total_proteins}: {protein_name}")
    run_notebook(protein_dir)

print(f"All proteins processed. Output notebooks are in {output_dir}")
print(f"Check the log file at {log_file} for details.")
print(f"Note: AMPC and DRD4 were skipped and require special handling.")

Processing protein 1 of 43: MK01


Executing:   0%|          | 0/132 [00:00<?, ?cell/s]

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


Completed analysis for MK01
Processing protein 2 of 43: ADA


Executing:   0%|          | 0/132 [00:00<?, ?cell/s]

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


Completed analysis for ADA
Processing protein 3 of 43: HMDH


Executing:   0%|          | 0/132 [00:00<?, ?cell/s]

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


Completed analysis for HMDH
Processing protein 4 of 43: THRB


Executing:   0%|          | 0/132 [00:00<?, ?cell/s]

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


Completed analysis for THRB
Processing protein 5 of 43: HDAC8


Executing:   0%|          | 0/132 [00:00<?, ?cell/s]

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


Completed analysis for HDAC8
Processing protein 6 of 43: DEF


Executing:   0%|          | 0/132 [00:00<?, ?cell/s]

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


Completed analysis for DEF
Processing protein 7 of 43: UROK


Executing:   0%|          | 0/132 [00:00<?, ?cell/s]

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


Completed analysis for UROK
Processing protein 8 of 43: KITH


Executing:   0%|          | 0/132 [00:00<?, ?cell/s]

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


Completed analysis for KITH
Processing protein 9 of 43: MAPK2


Executing:   0%|          | 0/132 [00:00<?, ?cell/s]

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


Completed analysis for MAPK2
Processing protein 10 of 43: HS90A


Executing:   0%|          | 0/132 [00:00<?, ?cell/s]

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


Completed analysis for HS90A
Processing protein 11 of 43: AA2AR


Executing:   0%|          | 0/132 [00:00<?, ?cell/s]

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


Completed analysis for AA2AR
Processing protein 12 of 43: MT1


Executing:   0%|          | 0/132 [00:00<?, ?cell/s]

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


Completed analysis for MT1
Processing protein 13 of 43: GLCM


Executing:   0%|          | 0/132 [00:00<?, ?cell/s]

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


Completed analysis for GLCM
Processing protein 14 of 43: FGFR1


Executing:   0%|          | 0/132 [00:00<?, ?cell/s]

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


Completed analysis for FGFR1
Processing protein 15 of 43: ACES


Executing:   0%|          | 0/132 [00:00<?, ?cell/s]

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


Completed analysis for ACES
Processing protein 16 of 43: EGFR


Executing:   0%|          | 0/132 [00:00<?, ?cell/s]

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


Completed analysis for EGFR
Processing protein 17 of 43: PLK1


Executing:   0%|          | 0/132 [00:00<?, ?cell/s]

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


Completed analysis for PLK1
Processing protein 18 of 43: RENI


Executing:   0%|          | 0/132 [00:00<?, ?cell/s]

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


Completed analysis for RENI
Processing protein 19 of 43: KIT


Executing:   0%|          | 0/132 [00:00<?, ?cell/s]

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


Completed analysis for KIT
Processing protein 20 of 43: PPARA


Executing:   0%|          | 0/132 [00:00<?, ?cell/s]

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


Completed analysis for PPARA
Processing protein 21 of 43: ADRB2


Executing:   0%|          | 0/132 [00:00<?, ?cell/s]

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


Completed analysis for ADRB2
Processing protein 22 of 43: TRYB1


Executing:   0%|          | 0/132 [00:00<?, ?cell/s]

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


Completed analysis for TRYB1
Processing protein 23 of 43: DRD4
Skipping DRD4 as it requires special handling
Processing protein 24 of 43: AMPC
Skipping AMPC as it requires special handling
Processing protein 25 of 43: ABL1


Executing:   0%|          | 0/132 [00:00<?, ?cell/s]

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


Completed analysis for ABL1
Processing protein 26 of 43: XIAP


Executing:   0%|          | 0/132 [00:00<?, ?cell/s]

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


Completed analysis for XIAP
Processing protein 27 of 43: FA10


Executing:   0%|          | 0/132 [00:00<?, ?cell/s]

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.
