**Import required libraries and scripts**

In [None]:
#Import required libraries and scripts
from scripts.library_preparation import *
from scripts.utilities import *
from scripts.docking_functions2 import *
from scripts.clustering_functions import *
from scripts.rescoring_functions import *
from scripts.consensus_methods import *
from scripts.performance_calculation import *
from scripts.dogsitescorer import *
from scripts.get_pocket import *

software = '/home/mario/DockM8/software'
protein_file = '/media/mario/T7/FINISHED/FINISHED/xiap/receptor_protoss_prepared.pdb'
ref_file = '/media/mario/T7/FINISHED/FINISHED/xiap/crystal_ligand_protoss.sdf'
docking_library = '/media/mario/T7/FINISHED/FINISHED/xiap/merged_actives_decoys.sdf'
docking_programs = ['GNINA', 'SMINA', 'PLANTS']
clustering_metrics = ['RMSD', 'spyRMSD', 'espsim', '3DScore', 'bestpose', 'bestpose_GNINA', 'bestpose_SMINA', 'bestpose_PLANTS']
rescoring_functions = ['gnina', 'AD4', 'chemplp', 'rfscorevs', 'LinF9', 'SCORCH', 'RTMScore', 'vinardo']
id_column = 'ID'
n_poses = 10
exhaustiveness = 8
protonation = 'pkasolver'
parallel = 1
ncpus = int(os.cpu_count()/2)
pocket = 'reference'
#Create a temporary folder for all further calculations
w_dir = os.path.dirname(protein_file)
print('The working directory has been set to:', w_dir)
create_temp_folder(w_dir+'/temp')

In [None]:
if os.path.isfile(protein_file.replace('.pdb', '_pocket.pdb')) == False:
    if pocket == 'reference':
        pocket_definition = GetPocket(ref_file, protein_file, 8)
    elif pocket == 'dogsitescorer':
        pocket_definition = binding_site_coordinates_dogsitescorer(protein_file, w_dir, method='volume')

In [None]:
if os.path.isfile(w_dir+'/temp/final_library.sdf') == False:
    prepare_library(docking_library, id_column, software, protonation, ncpus)

In [None]:
docking(w_dir, protein_file, ref_file, software, docking_programs, exhaustiveness, n_poses, ncpus)

In [None]:
print('Loading all poses SDF file...')
tic = time.perf_counter()
all_poses = PandasTools.LoadSDF(w_dir+'/temp/allposes.sdf', idName='Pose ID', molColName='Molecule', includeFingerprints=False, strictParsing=True)
toc = time.perf_counter()
print(f'Finished loading all poses SDF in {toc-tic:0.4f}!...')


In [None]:
for metric in clustering_metrics:
        if os.path.isfile(w_dir+f'/temp/clustering/{metric}_clustered.sdf') == False:
            cluster_pebble(metric, 'KMedoids', w_dir, protein_file, all_poses, ncpus)

In [None]:
for metric in clustering_metrics:
        rescore_all(w_dir, protein_file, ref_file, software, w_dir+f'/temp/clustering/{metric}_clustered.sdf', rescoring_functions, ncpus)


In [None]:
calculate_EF_single_functions(w_dir, docking_library, clustering_metrics)
apply_consensus_methods_combinations(w_dir, docking_library, clustering_metrics)

In [None]:
def apply_consensus_methods(w_dir, clustering_metric, method, rescoring_functions):
    create_temp_folder(w_dir+'/temp/ranking')
    rescoring_folder = f'rescoring_{clustering_metric}_clustered'
    rescored_dataframe = pd.read_csv(w_dir + f'/temp/{rescoring_folder}/allposes_rescored.csv')
    standardised_dataframe = standardize_scores(rescored_dataframe)
    col_dict = {'gnina':['GNINA', 'CNN-Score', 'CNN-Affinity'], 'vinardo':'Vinardo', 'AD4':'AD4', 'LinF9':'LinF9', 'rfscorevs':'RFScoreVS', 'plp':'PLP', 'chemplp':'CHEMPLP', 'NNScore':'NNScore', 
               'PLECnn':'PLECnn', 'AAScore':'AAScore', 'ECIF':'ECIF', 'SCORCH':'SCORCH','RTMScore':'RTMScore'}
    col_list = ['Pose ID', 'GNINA', 'CNN-Score', 'RTMScore']
    # for function in rescoring_functions:
    #     cols = col_dict[function]
    #     if isinstance(cols, list):
    #         col_list.extend(cols)
    #     else:
    #         col_list.append(cols)
    # print(col_list)
    filtered_dataframe = standardised_dataframe[col_list]
    print(filtered_dataframe)
    standardised_dataframes, ranked_dataframes = process_dataframes(w_dir, {clustering_metric: rescoring_folder})
    for name, df_dict in {'standardised': standardised_dataframes, 'ranked': ranked_dataframes}.items():
        for df_name, df in df_dict.items():
            df['ID'] = df['Pose ID'].str.split('_').str[0]
            df.to_csv(w_dir + f'/temp/ranking/{df_name}.csv', index=False)

    create_temp_folder(w_dir+'/temp/consensus')
    rank_methods = {'method1': method1_ECR_best, 'method2': method2_ECR_average, 'method3': method3_avg_ECR, 'method4': method4_RbR}
    score_methods = {'method5': method5_RbV, 'method6': method6_Zscore_best, 'method7': method7_Zscore_avg}

    if method in rank_methods:
        method_function = rank_methods[method]
        analysed_dataframe = method_function(ranked_dataframes[clustering_metric+'_ranked'], clustering_metric, [col for col in ranked_dataframes[clustering_metric+'_ranked'] if col not in ['Pose ID', 'ID']])
    elif method in score_methods:
        method_function = score_methods[method]
        analysed_dataframe = method_function(standardised_dataframes[clustering_metric+'_standardised'], clustering_metric, [col for col in standardised_dataframes[clustering_metric+'_standardised'] if col not in ['Pose ID', 'ID']])
    else:
        raise ValueError(f"Invalid method: {method}")

    print(analysed_dataframe)
    analysed_dataframe = analysed_dataframe.drop(columns="Pose ID", errors='ignore')
    analysed_dataframe.to_csv(w_dir+f'/temp/consensus/{clustering_metric}_{method}_results.csv', index=False)

apply_consensus_methods(w_dir, 'bestpose', 'method6', ['gnina', 'RTMScore'])

In [None]:
results = pd.read_csv('/home/mario/DockM8/cdk2/temp/consensus/bestpose_method6_results.csv')
database = PandasTools.LoadSDF('/home/mario/DockM8/cdk2/temp/final_library.sdf', molColName=None, smilesName='SMILES', idName='ID')
final = pd.merge(database, results, on='ID', how='inner')
final.to_csv('/home/mario/DockM8/cdk2/temp/consensus/results_for_ML.csv', index=False)

In [None]:
./gnina -r /media/mario/T7/FINISHED/FINISHED/ace/receptor_prepared_protoss.pdb -l /media/mario/T7/FINISHED/FINISHED/ace/temp/clustering/RMSD_clustered.sdf --autobox_ligand /media/mario/T7/FINISHED/FINISHED/ace/crystal_ligand_protoss.sdf -o /media/mario/T7/FINISHED/FINISHED/ace/temp/rescoring_RMSD_clustered/vinardo_rescoring/vinardo_scores.csv --score_only --scoring vinardo --cnn_scoring none --no_gpu