In [None]:
import os
import papermill as pm
import glob
import logging

# Define the base directory
base_dir = (
    "/Users/lkv206/work/to_do_projects/chembl_ligands/DUDEZ_DOCKING_GRIDS_AND_POSES"
)

# Create papermill and output directories
papermill_dir = os.path.join(base_dir, "papermill")
output_dir = os.path.join(papermill_dir, "dudez_1pt0LD_torsion_notebooks")
os.makedirs(output_dir, exist_ok=True)

# Set up logging
log_file = os.path.join(papermill_dir, "dudez_1pt0LD_torsion_runner.log")
logging.basicConfig(
    filename=log_file,
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s",
)


def find_files(protein_dir):
    protein_name = os.path.basename(protein_dir)
    active_sdf = glob.glob(
        os.path.join(
            protein_dir, f"{protein_name}_std_dudez_1pt0LD_ligand_poses_lib_sorted.sdf"
        )
    )[0]
    decoy_sdf = glob.glob(
        os.path.join(
            protein_dir,
            f"{protein_name}_std_dudez_1pt0LD_decoy_poses_lib_sorted.sdf",
        )
    )[0]
    active_strain = glob.glob(
        os.path.join(
            protein_dir,
            f"{protein_name}_std_dudez_1pt0LD_ligand_poses_lib_sorted_tstrain.csv",
        )
    )[0]
    decoy_strain = glob.glob(
        os.path.join(
            protein_dir,
            f"{protein_name}_std_dudez_1pt0LD_decoy_poses_lib_sorted_tstrain.csv",
        )
    )[0]

    return active_sdf, decoy_sdf, active_strain, decoy_strain


def run_notebook(protein_dir):
    protein_name = os.path.basename(protein_dir)
    try:
        active_sdf, decoy_sdf, active_strain, decoy_strain = find_files(protein_dir)

        output_notebook = os.path.join(
            output_dir, f"{protein_name}_dudez_1pt0_output.ipynb"
        )

        pm.execute_notebook(
            "input_for_dudez_analysis_papermill.ipynb",
            output_notebook,
            parameters={
                "title_suffix": f"{protein_name}_dudez_1pt0",
                "file_path_sdf_active": active_sdf,
                "file_path_sdf_decoy": decoy_sdf,
                "file_path_strain_active": active_strain,
                "file_path_strain_decoy": decoy_strain,
            },
        )

        logging.info(f"Completed analysis for {protein_name}")
        print(f"Completed analysis for {protein_name}")
    except Exception as e:
        logging.error(f"Error processing {protein_name}: {str(e)}")
        print(f"Error processing {protein_name}: {str(e)}")


# Find all protein directories
protein_dirs = [
    d for d in glob.glob(os.path.join(base_dir, "[A-Z0-9]*")) if os.path.isdir(d)
]

# Run the notebook for each protein
total_proteins = len(protein_dirs)
for i, protein_dir in enumerate(protein_dirs, 1):
    print(
        f"Processing protein {i} of {total_proteins}: {os.path.basename(protein_dir)}"
    )
    run_notebook(protein_dir)

print(f"All proteins processed. Output notebooks are in {output_dir}")
print(f"Check the log file at {log_file} for details.")