**Import required libraries and scripts**

In [None]:
import os
from scripts.library_preparation import *
from scripts.utilities import *
from scripts.docking_functions import *
from scripts.clustering_functions import *
from scripts.rescoring_functions import *
from scripts.performance_calculation import *
from scripts.dogsitescorer import *
from scripts.get_pocket import *


In [None]:
for dir in tqdm(os.listdir('/home/alacournola/DEKOIS')):
    print(dir)
    software = '/home/alacournola/DockM8/software'
    protein_file = f'/home/alacournola/DEKOIS/{dir}/receptor_protoss_prepared.pdb'
    ref_file = f'/home/alacournola/DEKOIS/{dir}/crystal_ligand_protoss.sdf'
    pocket = 'reference'
    protonation = 'pkasolver'
    docking_library = f'/home/alacournola/DEKOIS/{dir}/merged_actives_decoys.sdf'
    docking_programs = ['GNINA', 'SMINA', 'PLANTS']
    clustering_metrics = ['RMSD', 'spyRMSD', 'espsim', '3DScore', 'bestpose', 'bestpose_GNINA', 'bestpose_SMINA', 'bestpose_PLANTS']
    clustering_method = 'KMedoids'
    rescoring = ['gnina', 'AD4', 'chemplp', 'rfscorevs', 'LinF9', 'RTMScore', 'SCORCH', 'vinardo', 'plecscore', 'nnscore', 'KORPL', 'ConvexPLR']
    id_column = 'ID'
    n_poses = 10
    exhaustiveness = 8
    parallel = 1
    ncpus = int(os.cpu_count()-2)
    #Create a temporary folder for all further calculations
    w_dir = os.path.dirname(protein_file)
    print('The working directory has been set to:', w_dir)
    create_temp_folder(w_dir+'/temp')
    
    try:
        pocket_definition = get_pocket(ref_file, protein_file, 10)
        for metric in clustering_metrics:
            rescore_all(w_dir, protein_file, pocket_definition, software, w_dir+f'/temp/clustering/{metric}_clustered.sdf', rescoring, ncpus)
        calculate_EF_single_functions(w_dir, docking_library, clustering_metrics)
        apply_consensus_methods_combinations(w_dir, docking_library, clustering_metrics)
    except Exception as e:
        printlog(f'Failed for {dir}')
        print(e)

In [6]:
import os
import pandas as pd

def rename_and_merge_csv_files(root_dir, target_file, output_file):
    merged_df = None

    for dirpath, dirnames, filenames in os.walk(root_dir):
        for filename in filenames:
            if filename == target_file:
                filepath = os.path.join(dirpath, filename)
                df = pd.read_csv(filepath, index_col=0)
                display(dirpath, df)
                dirname = os.path.basename(dirpath)
                df["EF1%"] = pd.to_numeric(df["EF1%"], errors='coerce')
                df.rename(columns={"EF1%": dirpath.replace('/home/alacournola/DEKOIS/', '').replace('/temp/consensus', '')}, inplace=True)
                df.drop(columns='EF10%', inplace=True)
                if merged_df is None:
                    merged_df = df
                else:
                    merged_df = pd.merge(merged_df, df, on=["Scoring Function", "Clustering Metric"])

    if merged_df is not None:
        # Adding the average column
        columns_to_exclude = ["Scoring Function", "Clustering Metric"]
        numeric_columns = [col for col in merged_df.columns if col not in columns_to_exclude]
        merged_df['Average'] = merged_df[numeric_columns].mean(axis=1)
        merged_df.to_csv(output_file)
        print(f"Merged CSV file saved to: {output_file}")


root_directory = "/home/alacournola/DEKOIS"
target_csv_file = "EF_single_functions.csv"
output_csv_file = "merged_output_DEKOIS2.csv"

rename_and_merge_csv_files(root_directory, target_csv_file, output_csv_file)


'/home/alacournola/DEKOIS/adrb2/temp/consensus'

Unnamed: 0,Scoring Function,Clustering Metric,EF10%,EF1%
0,RFScoreVS,bestpose,1.75,0.00
1,PLECnn,bestpose,2.50,5.12
2,KORPL,bestpose,2.00,0.00
3,CHEMPLP,bestpose,2.25,5.12
4,ConvexPLR,bestpose,2.00,0.00
...,...,...,...,...
107,NNscore,spyRMSD,3.24,2.56
108,Vinardo,spyRMSD,3.99,5.12
109,RTMScore,spyRMSD,0.75,2.56
110,SCORCH,spyRMSD,3.49,7.68


'/home/alacournola/DEKOIS/pde5a/temp/consensus'

Unnamed: 0,Scoring Function,Clustering Metric,EF10%,EF1%
0,RFScoreVS,bestpose,1.25,0.00
1,PLECnn,bestpose,1.00,0.00
2,KORPL,bestpose,1.25,2.55
3,CHEMPLP,bestpose,0.25,0.00
4,ConvexPLR,bestpose,0.25,0.00
...,...,...,...,...
107,NNscore,spyRMSD,0.50,0.00
108,Vinardo,spyRMSD,0.50,0.00
109,RTMScore,spyRMSD,0.00,0.00
110,SCORCH,spyRMSD,0.00,0.00


'/home/alacournola/DEKOIS/hivrt/temp/consensus'

Unnamed: 0,Scoring Function,Clustering Metric,EF10%,EF1%
0,RFScoreVS,bestpose,3.24,4.81
1,PLECnn,bestpose,1.00,0.00
2,KORPL,bestpose,3.24,7.22
3,CHEMPLP,bestpose,1.00,0.00
4,ConvexPLR,bestpose,2.74,0.00
...,...,...,...,...
107,NNscore,spyRMSD,0.75,0.00
108,Vinardo,spyRMSD,1.99,0.00
109,RTMScore,spyRMSD,1.99,2.41
110,SCORCH,spyRMSD,1.49,0.00


'/home/alacournola/DEKOIS/dyr/temp/consensus'

Unnamed: 0,Scoring Function,Clustering Metric,EF10%,EF1%
0,RFScoreVS,bestpose,6.00,17.66
1,PLECnn,bestpose,0.50,0.00
2,KORPL,bestpose,6.76,17.66
3,CHEMPLP,bestpose,0.25,0.00
4,ConvexPLR,bestpose,3.00,2.52
...,...,...,...,...
107,NNscore,spyRMSD,0.25,0.00
108,Vinardo,spyRMSD,0.50,5.05
109,RTMScore,spyRMSD,2.50,5.05
110,SCORCH,spyRMSD,3.00,2.52


'/home/alacournola/DEKOIS/pnph/temp/consensus'

Unnamed: 0,Scoring Function,Clustering Metric,EF10%,EF1%
0,AD4,3DScore,1.50,0.00
1,CHEMPLP,3DScore,1.50,2.57
2,GNINA,3DScore,3.01,0.00
3,CNN-Score,3DScore,2.01,10.28
4,CNN-Affinity,3DScore,2.26,5.14
...,...,...,...,...
75,LinF9,spyRMSD,1.00,2.57
76,RFScoreVS,spyRMSD,8.03,30.85
77,RTMScore,spyRMSD,1.50,5.14
78,SCORCH,spyRMSD,3.51,7.71


'/home/alacournola/DEKOIS/aldr/temp/consensus'

Unnamed: 0,Scoring Function,Clustering Metric,EF10%,EF1%
0,RFScoreVS,bestpose,4.77,20.44
1,PLECnn,bestpose,0.75,0.00
2,KORPL,bestpose,0.50,0.00
3,CHEMPLP,bestpose,3.01,17.88
4,ConvexPLR,bestpose,3.26,2.55
...,...,...,...,...
107,NNscore,spyRMSD,0.00,0.00
108,Vinardo,spyRMSD,2.51,10.22
109,RTMScore,spyRMSD,3.01,2.55
110,SCORCH,spyRMSD,4.52,5.11


'/home/alacournola/DEKOIS/hdac2/temp/consensus'

Unnamed: 0,Scoring Function,Clustering Metric,EF10%,EF1%
0,RFScoreVS,bestpose,3.49,0.00
1,PLECnn,bestpose,2.49,5.15
2,KORPL,bestpose,5.49,18.04
3,CHEMPLP,bestpose,3.74,12.89
4,ConvexPLR,bestpose,2.74,10.31
...,...,...,...,...
107,NNscore,spyRMSD,3.49,5.15
108,Vinardo,spyRMSD,3.74,15.45
109,RTMScore,spyRMSD,1.25,5.15
110,SCORCH,spyRMSD,1.50,5.15


'/home/alacournola/DEKOIS/hs90a/temp/consensus'

Unnamed: 0,Scoring Function,Clustering Metric,EF10%,EF1%
0,RFScoreVS,bestpose,1.74,2.58
1,PLECnn,bestpose,0.00,0.00
2,KORPL,bestpose,2.99,18.02
3,CHEMPLP,bestpose,0.00,0.00
4,ConvexPLR,bestpose,1.50,2.58
...,...,...,...,...
107,NNscore,spyRMSD,0.25,0.00
108,Vinardo,spyRMSD,0.75,0.00
109,RTMScore,spyRMSD,1.99,5.15
110,SCORCH,spyRMSD,2.74,5.15


'/home/alacournola/DEKOIS/dhi1/temp/consensus'

Unnamed: 0,Scoring Function,Clustering Metric,EF10%,EF1%
0,RFScoreVS,bestpose,1.35,2.64
1,PLECnn,bestpose,0.54,2.64
2,KORPL,bestpose,0.27,0.00
3,CHEMPLP,bestpose,1.62,0.00
4,ConvexPLR,bestpose,0.54,0.00
...,...,...,...,...
107,NNscore,spyRMSD,0.54,0.00
108,Vinardo,spyRMSD,2.17,5.28
109,RTMScore,spyRMSD,2.17,5.28
110,SCORCH,spyRMSD,2.17,2.64


'/home/alacournola/DEKOIS/cox1/temp/consensus'

Unnamed: 0,Scoring Function,Clustering Metric,EF10%,EF1%
0,RFScoreVS,bestpose,5.97,23.29
1,PLECnn,bestpose,2.99,7.76
2,KORPL,bestpose,1.99,2.59
3,CHEMPLP,bestpose,3.48,5.18
4,ConvexPLR,bestpose,3.48,5.18
...,...,...,...,...
107,NNscore,spyRMSD,0.75,0.00
108,Vinardo,spyRMSD,2.74,2.59
109,RTMScore,spyRMSD,1.49,2.59
110,SCORCH,spyRMSD,3.73,5.18


'/home/alacournola/DEKOIS/cdk2/temp/consensus'

Unnamed: 0,Scoring Function,Clustering Metric,EF10%,EF1%
0,RFScoreVS,bestpose,3.00,7.62
1,PLECnn,bestpose,1.00,2.54
2,KORPL,bestpose,4.25,12.71
3,CHEMPLP,bestpose,1.00,0.00
4,ConvexPLR,bestpose,3.50,0.00
...,...,...,...,...
107,NNscore,spyRMSD,0.25,0.00
108,Vinardo,spyRMSD,1.25,0.00
109,RTMScore,spyRMSD,4.00,7.62
110,SCORCH,spyRMSD,2.00,5.08


'/home/alacournola/DEKOIS/ace/temp/consensus'

Unnamed: 0,Scoring Function,Clustering Metric,EF10%,EF1%
0,RFScoreVS,bestpose,5.00,5.04
1,PLECnn,bestpose,0.75,0.00
2,KORPL,bestpose,4.25,15.11
3,CHEMPLP,bestpose,4.50,12.59
4,ConvexPLR,bestpose,2.50,7.56
...,...,...,...,...
107,NNscore,spyRMSD,0.75,0.00
108,Vinardo,spyRMSD,0.75,2.52
109,RTMScore,spyRMSD,0.50,0.00
110,SCORCH,spyRMSD,7.24,15.11


'/home/alacournola/DEKOIS/kpcb/temp/consensus'

Unnamed: 0,Scoring Function,Clustering Metric,EF10%,EF1%
0,RFScoreVS,bestpose,5.75,7.69
1,PLECnn,bestpose,0.25,0.00
2,KORPL,bestpose,4.00,12.82
3,CHEMPLP,bestpose,1.00,0.00
4,ConvexPLR,bestpose,2.50,0.00
...,...,...,...,...
107,NNscore,spyRMSD,3.25,0.00
108,Vinardo,spyRMSD,2.50,2.56
109,RTMScore,spyRMSD,5.50,12.82
110,SCORCH,spyRMSD,1.75,0.00


'/home/alacournola/DEKOIS/igf1r/temp/consensus'

Unnamed: 0,Scoring Function,Clustering Metric,EF10%,EF1%
0,RFScoreVS,bestpose,7.48,15.45
1,PLECnn,bestpose,1.25,0.00
2,KORPL,bestpose,5.23,15.45
3,CHEMPLP,bestpose,2.24,5.15
4,ConvexPLR,bestpose,3.49,7.72
...,...,...,...,...
107,NNscore,spyRMSD,2.74,5.15
108,Vinardo,spyRMSD,3.73,7.72
109,RTMScore,spyRMSD,3.73,15.44
110,SCORCH,spyRMSD,1.24,0.00


'/home/alacournola/DEKOIS/prgr/temp/consensus'

Unnamed: 0,Scoring Function,Clustering Metric,EF10%,EF1%
0,RFScoreVS,bestpose,4.99,9.65
1,PLECnn,bestpose,1.00,0.00
2,KORPL,bestpose,3.49,16.89
3,CHEMPLP,bestpose,0.50,0.00
4,ConvexPLR,bestpose,1.25,2.41
...,...,...,...,...
107,NNscore,spyRMSD,2.50,9.65
108,Vinardo,spyRMSD,1.00,0.00
109,RTMScore,spyRMSD,1.75,9.65
110,SCORCH,spyRMSD,1.50,0.00


'/home/alacournola/DEKOIS/nram/temp/consensus'

Unnamed: 0,Scoring Function,Clustering Metric,EF10%,EF1%
0,RFScoreVS,bestpose,5.25,20.50
1,PLECnn,bestpose,1.25,2.56
2,KORPL,bestpose,6.00,12.81
3,CHEMPLP,bestpose,2.00,2.56
4,ConvexPLR,bestpose,3.75,2.56
...,...,...,...,...
107,NNscore,spyRMSD,0.25,0.00
108,Vinardo,spyRMSD,0.75,0.00
109,RTMScore,spyRMSD,0.25,0.00
110,SCORCH,spyRMSD,4.00,2.56


'/home/alacournola/DEKOIS/kith/temp/consensus'

Unnamed: 0,Scoring Function,Clustering Metric,EF10%,EF1%
0,RFScoreVS,bestpose,5.75,17.81
1,PLECnn,bestpose,0.00,0.00
2,KORPL,bestpose,6.26,15.26
3,CHEMPLP,bestpose,7.01,17.81
4,ConvexPLR,bestpose,5.25,10.17
...,...,...,...,...
107,NNscore,spyRMSD,2.50,5.09
108,Vinardo,spyRMSD,4.00,10.17
109,RTMScore,spyRMSD,0.00,0.00
110,SCORCH,spyRMSD,0.75,0.00


'/home/alacournola/DEKOIS/andr/temp/consensus'

Unnamed: 0,Scoring Function,Clustering Metric,EF10%,EF1%
0,RFScoreVS,bestpose,8.03,19.90
1,PLECnn,bestpose,0.50,0.00
2,KORPL,bestpose,4.26,19.90
3,CHEMPLP,bestpose,1.51,4.97
4,ConvexPLR,bestpose,2.51,0.00
...,...,...,...,...
107,NNscore,spyRMSD,0.25,0.00
108,Vinardo,spyRMSD,1.76,0.00
109,RTMScore,spyRMSD,2.76,2.49
110,SCORCH,spyRMSD,3.51,4.97


Merged CSV file saved to: merged_output_DEKOIS2.csv


In [None]:
import os
import pandas as pd

def rename_and_merge_csv_files(root_dir, target_file, output_file):
    merged_df = None

    for dirpath, dirnames, filenames in os.walk(root_dir):
        for filename in filenames:
            if filename == target_file:
                filepath = os.path.join(dirpath, filename)
                df = pd.read_csv(filepath, index_col=0)
                dirname = os.path.basename(dirpath)
                df.rename(columns={"EF1%": dirpath.replace('/home/alacournola/DEKOIS-noprot2/', '').replace('/temp/consensus', '')}, inplace=True)
                if merged_df is None:
                    merged_df = df
                else:
                    merged_df = pd.merge(merged_df, df, on=["clustering_method", "selected_columns", 'method_name'])
                    print(merged_df.head())

    if merged_df is not None:
        # Adding the average column
        columns_to_exclude = ['method_name', 'selected_columns', 'clustering_metric']
        numeric_columns = [col for col in merged_df.columns if col not in columns_to_exclude]
        merged_df['Average'] = merged_df[numeric_columns].mean(axis=1)
        print(merged_df.head())
        merged_df.to_csv(output_file)
        print(f"Merged CSV file saved to: {output_file}")

root_directory = "/home/alacournola/DEKOIS-noprot2"
target_csv_file = "consensus_summary.csv"
output_csv_file = "merged_output_consensus_DUD-E-noprot2.csv"

rename_and_merge_csv_files(root_directory, target_csv_file, output_csv_file)
