**Import required libraries and scripts**

In [None]:
import os
from scripts.library_preparation import *
from scripts.utilities import *
from scripts.docking_functions import *
from scripts.clustering_functions import *
from scripts.rescoring_functions import *
from scripts.performance_calculation import *
from scripts.dogsitescorer import *
from scripts.get_pocket import *


In [None]:
for dir in tqdm(os.listdir('/home/alacournola/DUD-E')):
    print(dir)
    software = '/home/alacournola/DockM8/software'
    protein_file = f'/home/alacournola/DUD-E/{dir}/receptor_protoss_prepared.pdb'
    ref_file = f'/home/alacournola/DUD-E/{dir}/crystal_ligand_protoss.sdf'
    pocket = 'reference'
    protonation = 'pkasolver'
    docking_library = f'/home/alacournola/DUD-E/{dir}/merged_actives_decoys.sdf'
    docking_programs = ['GNINA', 'SMINA', 'PLANTS']
    clustering_metrics = ['RMSD', 'spyRMSD', 'espsim', '3DScore', 'bestpose', 'bestpose_GNINA', 'bestpose_SMINA', 'bestpose_PLANTS']
    clustering_method = 'KMedoids'
    rescoring = ['gnina', 'AD4', 'chemplp', 'rfscorevs', 'LinF9', 'RTMScore', 'SCORCH', 'vinardo', 'KORPL', 'ConvexPLR']
    id_column = 'ID'
    n_poses = 10
    exhaustiveness = 8
    parallel = 1
    ncpus = int(os.cpu_count()-2)
    #Create a temporary folder for all further calculations
    w_dir = Path(protein_file).parent
    print('The working directory has been set to:', w_dir)
    (w_dir/'temp').mkdir(exist_ok=True)
    
    try:
        pocket_definition = get_pocket(ref_file, protein_file, 10)
        for metric in clustering_metrics:
            rescore_all(w_dir, protein_file, pocket_definition, str(w_dir / 'temp' / f'clustering/{metric}_clustered.sdf'), rescoring, ncpus)
        if (w_dir/'temp'/'consensus').id_dir == False:
            calculate_EF_single_functions(w_dir, docking_library, clustering_metrics)
            apply_consensus_methods_combinations(w_dir, docking_library, clustering_metrics)
    except Exception as e:
        printlog(f'Failed for {dir}')
        print(e)

In [None]:
software = '/home/alacournola/DockM8/software'
protein_file = f'/home/alacournola/DUD-E/xiap/receptor_protoss_prepared.pdb'
ref_file = f'/home/alacournola/DUD-E/xiap/crystal_ligand_protoss.sdf'
pocket = 'reference'
protonation = 'pkasolver'
docking_library = f'/home/alacournola/DUD-E/xiap/merged_actives_decoys.sdf'
docking_programs = ['GNINA', 'SMINA', 'PLANTS']
clustering_metrics = ['RMSD', 'spyRMSD', 'espsim', '3DScore', 'bestpose', 'bestpose_GNINA', 'bestpose_SMINA', 'bestpose_PLANTS']
clustering_method = 'KMedoids'
rescoring = ['gnina', 'AD4', 'chemplp', 'rfscorevs', 'LinF9', 'RTMScore', 'SCORCH', 'vinardo', 'KORPL', 'ConvexPLR']
id_column = 'ID'
n_poses = 10
exhaustiveness = 8
parallel = 1
ncpus = int(os.cpu_count()-2)
#Create a temporary folder for all further calculations
w_dir = Path(protein_file).parent
print('The working directory has been set to:', w_dir)
(w_dir/'temp').mkdir(exist_ok=True)

try:
    pocket_definition = get_pocket(ref_file, protein_file, 10)
    print('Loading all poses SDF file...')
    tic = time.perf_counter()
    all_poses = PandasTools.LoadSDF(str(w_dir / 'temp' / 'allposes.sdf'), idName='Pose ID', molColName='Molecule', includeFingerprints=False, strictParsing=True)
    toc = time.perf_counter()
    print(f'Finished loading all poses SDF in {toc-tic:0.4f}!...')
    for metric in clustering_metrics:
        if os.path.isfile(w_dir / 'temp' / f'clustering/{metric}_clustered.sdf') == False:
            cluster_pebble(metric, 'KMedoids', w_dir, protein_file, all_poses, ncpus)
    for metric in clustering_metrics:
        rescore_all(w_dir, protein_file, pocket_definition, str(w_dir / 'temp' / f'clustering/{metric}_clustered.sdf'), rescoring, ncpus)
    calculate_EF_single_functions(w_dir, docking_library, clustering_metrics)
    apply_consensus_methods_combinations(w_dir, docking_library, clustering_metrics)
except Exception as e:
    printlog(f'Failed for xiap')
    print(e)

In [None]:
import os
import pandas as pd

def rename_and_merge_csv_files(root_dir, target_file, output_file):
    merged_df = None

    for dirpath, dirnames, filenames in tqdm(os.walk(root_dir)):
        for filename in filenames:
            if filename == target_file:
                filepath = os.path.join(dirpath, filename)
                df = pd.read_csv(filepath, index_col=0)
                display(dirpath)
                dirname = os.path.basename(dirpath)
                df["EF1%"] = pd.to_numeric(df["EF1%"], errors='coerce')
                df.rename(columns={"EF1%": dirpath.replace('/home/alacournola/DUD-E/', '').replace('/temp/consensus', '')}, inplace=True)
                df.drop(columns='EF10%', inplace=True)
                if merged_df is None:
                    merged_df = df
                else:
                    merged_df = pd.merge(merged_df, df, on=["Scoring Function", "Clustering Metric"])

    if merged_df is not None:
        # Adding the average column
        columns_to_exclude = ["Scoring Function", "Clustering Metric"]
        numeric_columns = [col for col in merged_df.columns if col not in columns_to_exclude]
        merged_df['Average'] = merged_df[numeric_columns].mean(axis=1)
        merged_df.to_csv(output_file)
        print(f"Merged CSV file saved to: {output_file}")


root_directory = "/home/alacournola/DUD-E"
target_csv_file = "EF_single_functions.csv"
output_csv_file = "merged_output_DUD-E.csv"

rename_and_merge_csv_files(root_directory, target_csv_file, output_csv_file)


In [None]:
import os
import pandas as pd

def rename_and_merge_csv_files(root_dir, target_file, output_file):
    merged_df = None

    for dirpath, dirnames, filenames in tqdm(os.walk(root_dir)):
        for filename in filenames:
            if filename == target_file:
                filepath = os.path.join(dirpath, filename)
                df = pd.read_csv(filepath, index_col=0)
                dirname = os.path.basename(dirpath)
                df.rename(columns={"EF1%": dirpath.replace('/home/alacournola/DUD/E/', '').replace('/temp/consensus', '')}, inplace=True)
                if merged_df is None:
                    merged_df = df
                else:
                    merged_df = pd.merge(merged_df, df, on=["clustering_method", "selected_columns", 'method_name'])
                    print(merged_df.head())

    if merged_df is not None:
        # Adding the average column
        columns_to_exclude = ['method_name', 'selected_columns', 'clustering_metric']
        numeric_columns = [col for col in merged_df.columns if col not in columns_to_exclude]
        merged_df['Average'] = merged_df[numeric_columns].mean(axis=1)
        print(merged_df.head())
        merged_df.to_csv(output_file)
        print(f"Merged CSV file saved to: {output_file}")

root_directory = "/home/alacournola/DUD-E"
target_csv_file = "consensus_summary.csv"
output_csv_file = "merged_output_consensus_DUD-E.csv"

rename_and_merge_csv_files(root_directory, target_csv_file, output_csv_file)
