In the previous step, we have generated the candidate poses. Now, we will filter the poses based on the its RMSD with the reference ligands and protein-ligand interaction score

# Now we have two cases in total:

1. the ligand has some references and has at least one pose that has less than 10 of our customized score
2. the ligand has some references but didn't have any pose that has less than 10 of our customized score
2. the ligand has no references

for case 1:
    we will select the top 10 (or less when less than 10 pass the threshold) poses with the lowest RMSD with the reference ligand, and send them for protein-ligand interaction scoring to select the best pose

for case 2:
    we will do upsampling first to see if we can generate any pose, and use GBSA-optimzied pose based on GBSA score if we fail

for case 3:
    we will select the best GBSA-optimzied pose based on the calcualted GBSA score

In [1]:
import glob, os, shutil, pickle
from rdkit import Chem
from tqdm import tqdm
import numpy as np
import pandas as pd
from rdkit.Chem.rdFMCS import FindMCS

def select_ligand_from_multiple_references_simplified(folder_path, file_name = "candidate_ligands", weights = None):
    """
    Simplified selection method when all candidates are the same molecule in different poses.
    
    Parameters:
    -----------
    folder_path : str
        Path to folder containing candidate ligand poses (.sdf files)
    reference_files : list
        List of paths to reference ligand SDF files
    weights : list, optional
        List of weights for each reference (defaults to equal weights)
        
    Returns:
    --------
    str : Path to best ligand pose
    """
    # Load all candidate poses
    ligand_files = glob.glob(os.path.join(folder_path, file_name, "*.sdf"))
    reference_files = glob.glob(os.path.join(folder_path, "*.sdf"))
    if not ligand_files or not reference_files:
        return None
    
    candidate_mols = []
    for ligand_file in ligand_files:
        mol = Chem.SDMolSupplier(ligand_file, removeHs=True)[0]
        if mol is not None:
            candidate_mols.append((ligand_file, mol))
    
    if not candidate_mols:
        return None
    
    # Use the first candidate to calculate MCS with references (since they're all the same molecule)
    template_mol = candidate_mols[0][1]
    
    # Load reference ligands
    references = []
    for i, ref_file in enumerate(reference_files):
        ref_mol = Chem.SDMolSupplier(ref_file, removeHs=True)[0]
        if ref_mol is None:
            continue
        # Assign weight
        weight = 1.0 if weights is None else weights[i]
        references.append((ref_mol, weight))
    # Pre-calculate MCS for each reference against the template molecule
    mcs_cache = []
    for ref_mol, weight in references:
        # Find maximum common substructure
        mcs = FindMCS([ref_mol, template_mol])
        if mcs.numAtoms < 5:  # Skip if MCS is too small
            continue
        mcs_mol = Chem.MolFromSmarts(mcs.smartsString)
        # Get atom indices for the MCS
        ref_match = ref_mol.GetSubstructMatch(mcs_mol)
        if not ref_match:
            continue
        mcs_cache.append((ref_mol, weight, mcs_mol, ref_match))
    
    # Score each candidate pose
    score_dict = {}
    
    for ligand_file, candidate_mol in candidate_mols:
        total_score = 0
        total_weight = 0
        
        for ref_mol, weight, mcs_mol, ref_match in mcs_cache:
            # Get atom indices for the candidate
            candidate_match = candidate_mol.GetSubstructMatch(mcs_mol)
            if not candidate_match:
                continue
            # Get coordinates
            ref_coords = ref_mol.GetConformer().GetPositions()[list(ref_match)]
            candidate_coords = candidate_mol.GetConformer().GetPositions()[list(candidate_match)]
            # Calculate RMSD of the matching fragment
            rmsd = np.sqrt(np.sum((ref_coords - candidate_coords) ** 2))
            # Weight the RMSD by the fragment size and reference importance
            fragment_fraction = len(ref_match) / ref_mol.GetNumAtoms()
            weighted_score = rmsd * weight * fragment_fraction
            
            total_score += weighted_score
            total_weight += weight * fragment_fraction
        
        # Calculate normalized score
        if total_weight > 0:
            final_score = total_score / total_weight
            score_dict[ligand_file] = final_score
    
    return score_dict

## Sars-frag-filter

In [None]:
sars_test_prediction_folder = "/pscratch/sd/k/kysun/polaris/frag_docking_combined/frag-results-sars-test"

frag_score_dict = {}
for folder in tqdm(glob.glob(os.path.join(sars_test_prediction_folder, "*", "candidate_ligands"))):
    ref_folder = folder.replace("candidate_ligands", "")
    lig_name = folder.split("/")[-2]
    frag_score_dict[lig_name] = select_ligand_from_multiple_references_simplified(ref_folder, weights = None)

  0%|          | 0/98 [00:00<?, ?it/s]

100%|██████████| 98/98 [41:48<00:00, 25.60s/it]  


In [None]:
sars_test_prediction_folder = "/pscratch/sd/k/kysun/polaris/frag_docking_combined/frag-results-sars-test"
frag_score_dict = pickle.load(open("frag_score_dict_sars_test.pkl", "rb"))

# get statistics
no_ref = len([x for x in frag_score_dict.keys() if frag_score_dict[x] is None])
print("Number of ligands with no reference: ", no_ref)

ligs_with_ref_bad_score = []

count = 0
# for those with reference, select top 10 of all poses that has a score of 10 or less
for lig_name in frag_score_dict.keys():
    shutil.rmtree(os.path.join(sars_test_prediction_folder, lig_name, "top_10_poses"), ignore_errors=True)
    os.makedirs(os.path.join(sars_test_prediction_folder, lig_name, "top_10_poses"), exist_ok=True)
    if frag_score_dict[lig_name] is not None:
        sorted_poses = sorted(frag_score_dict[lig_name].items(), key=lambda x: x[1])
        all_scores = []
        for i, (pose, score) in enumerate(sorted_poses[:10]):
            if score <= 10:
                all_scores.append(score)
                shutil.copy(pose, os.path.join(sars_test_prediction_folder, lig_name, "top_10_poses"))
        print(f"Ligand {lig_name} has {len(all_scores)} poses with a score of 10 or less, average score: {np.mean(all_scores)}")
        if np.min(list(frag_score_dict[lig_name].values())) > 10:
            count += 1
            ligs_with_ref_bad_score.append(lig_name)
            print(f"Ligand {lig_name} has no poses with a score of 10 or less, moving all poses")
            for pose, score in sorted_poses[:10]:
                shutil.copy(pose, os.path.join(sars_test_prediction_folder, lig_name, "top_10_poses"))
    else:
        for pose in glob.glob(os.path.join(sars_test_prediction_folder, lig_name, "candidate_ligands", "*.sdf")):
            shutil.copy(pose, os.path.join(sars_test_prediction_folder, lig_name, "top_10_poses"))
            
print(f"Number of ligands with no poses with a score of 10 or less: {count}")
            

In [8]:
# enhanced sampling for those that have no poses with a score of 10 but have reference
ligs_to_search = ligs_with_ref_bad_score
sars_test_prediction_folder = "/pscratch/sd/k/kysun/polaris/frag_docking_combined/frag-results-sars-test"

frag_score_dict_enhanced = {}
for lig in tqdm(ligs_to_search):
    folder = os.path.join(sars_test_prediction_folder, lig, "candidate_ligands_enhanced")    
    ref_folder = folder.replace("candidate_ligands_enhanced", "")
    lig_name = lig
    frag_score_dict_enhanced[lig_name] = select_ligand_from_multiple_references_simplified(ref_folder, file_name = "candidate_ligands_enhanced")

100%|██████████| 11/11 [03:05<00:00, 16.90s/it]


In [17]:
frag_score_dict_enhanced = pickle.load(open("frag_score_dict_enhanced_sars_test.pkl", "rb"))

for lig_name in frag_score_dict_enhanced.keys():
    shutil.rmtree(os.path.join(sars_test_prediction_folder, lig_name, "top_10_poses_enhanced"), ignore_errors=True)
    if frag_score_dict_enhanced[lig_name] is not None:
        sorted_poses = sorted(frag_score_dict_enhanced[lig_name].items(), key=lambda x: x[1])
        all_scores = []
        for i, (pose, score) in enumerate(sorted_poses[:10]):
            if score <= 10:
                all_scores.append(score)
                os.makedirs(os.path.join(sars_test_prediction_folder, lig_name, "top_10_poses_enhanced"), exist_ok=True)
                shutil.copy(pose, os.path.join(sars_test_prediction_folder, lig_name, "top_10_poses_enhanced"))
        if np.min(list(frag_score_dict_enhanced[lig_name].values())) > 10:
            print(f"Ligand {lig_name} has no better poses through enhanced sampling")
    else:
        print(f"Ligand {lig_name} has no poses through enhanced sampling")

Ligand lig37 has no better poses through enhanced sampling
Ligand lig74 has no better poses through enhanced sampling
Ligand lig55 has no better poses through enhanced sampling
Ligand lig75 has no poses through enhanced sampling
Ligand lig48 has no better poses through enhanced sampling
Ligand lig69 has no poses through enhanced sampling
Ligand lig46 has no better poses through enhanced sampling
Ligand lig21 has no better poses through enhanced sampling
Ligand lig63 has no better poses through enhanced sampling


## Mers-frag-filter

In [5]:
mers_test_prediction_folder = "/pscratch/sd/k/kysun/polaris/frag_docking_combined/frag-results-mers-test"

frag_score_dict_mers = {}
for folder in tqdm(glob.glob(os.path.join(mers_test_prediction_folder, "*", "candidate_ligands"))):
    ref_folder = folder.replace("candidate_ligands", "")
    rmsd_temp_dict = {}
    lig_name = folder.split("/")[-2]
    lig_index = int(lig_name[3:])
    frag_score_dict_mers[lig_name] = select_ligand_from_multiple_references_simplified(ref_folder, weights = None)

  0%|          | 0/97 [00:00<?, ?it/s]

In [None]:
mers_test_prediction_folder = "/pscratch/sd/k/kysun/polaris/frag_docking_combined/frag-results-mers-test"
frag_score_dict_mers = pickle.load(open("frag_score_dict_mers_test.pkl", "rb"))

# get statistics
no_ref = len([x for x in frag_score_dict_mers.keys() if frag_score_dict_mers[x] is None])
print("Number of ligands with no reference: ", no_ref)

ligs_with_ref_bad_score_mers = []

count = 0
# for those with reference, select top 10 of all poses that has a score of 10 or less
for lig_name in frag_score_dict_mers.keys():
    os.makedirs(os.path.join(mers_test_prediction_folder, lig_name, "top_10_poses"), exist_ok=True)
    if frag_score_dict_mers[lig_name] is not None:
        sorted_poses = sorted(frag_score_dict_mers[lig_name].items(), key=lambda x: x[1])
        all_scores =[]
        for i, (pose, score) in enumerate(sorted_poses[:10]):
            if score <= 10:
                all_scores.append(score)
                shutil.copy(pose, os.path.join(mers_test_prediction_folder, lig_name, "top_10_poses"))
        print(f"Ligand {lig_name} has {len(all_scores)} poses with a score of 10 or less, average score: {np.mean(all_scores)}")
        if np.min(list(frag_score_dict_mers[lig_name].values())) > 10:
            count += 1
            ligs_with_ref_bad_score_mers.append(lig_name)
            print(f"Ligand {lig_name} has no poses with a score of 10 or less, moving all poses")
            for pose, score in sorted_poses[:10]:
                shutil.copy(pose, os.path.join(mers_test_prediction_folder, lig_name, "top_10_poses"))
    else:
        for pose in glob.glob(os.path.join(mers_test_prediction_folder, lig_name, "candidate_ligands", "*.sdf")):
            shutil.copy(pose, os.path.join(mers_test_prediction_folder, lig_name, "top_10_poses"))
            
print(f"Number of ligands with no poses with a score of 10 or less: {count}")

In [12]:
ligs_to_search = ligs_with_ref_bad_score_mers
mers_test_prediction_folder = "/pscratch/sd/k/kysun/polaris/frag_docking_combined/frag-results-mers-test"

frag_score_dict_enhanced_mers = {}
for lig in tqdm(ligs_to_search):
    folder = os.path.join(mers_test_prediction_folder, lig, "candidate_ligands_enhanced")    
    ref_folder = folder.replace("candidate_ligands_enhanced", "")
    lig_name = lig
    frag_score_dict_enhanced_mers[lig_name] = select_ligand_from_multiple_references_simplified(ref_folder, file_name = "candidate_ligands_enhanced")

100%|██████████| 14/14 [03:40<00:00, 15.75s/it]


In [18]:
frag_score_dict_enhanced_mers = pickle.load(open("frag_score_dict_enhanced_mers_test.pkl", "rb"))

for lig_name in frag_score_dict_enhanced_mers.keys():
    shutil.rmtree(os.path.join(mers_test_prediction_folder, lig_name, "top_10_poses_enhanced"), ignore_errors=True)
    if frag_score_dict_enhanced_mers[lig_name] is not None:
        sorted_poses = sorted(frag_score_dict_enhanced_mers[lig_name].items(), key=lambda x: x[1])
        all_scores = []
        for i, (pose, score) in enumerate(sorted_poses[:10]):
            if score <= 10:
                all_scores.append(score)
                os.makedirs(os.path.join(mers_test_prediction_folder, lig_name, "top_10_poses_enhanced"), exist_ok=True)
                shutil.copy(pose, os.path.join(mers_test_prediction_folder, lig_name, "top_10_poses_enhanced"))
        if np.min(list(frag_score_dict_enhanced_mers[lig_name].values())) > 10:
            print(f"Ligand {lig_name} has no better poses through enhanced sampling")
    else:
        print(f"Ligand {lig_name} has no poses through enhanced sampling")

Ligand lig17 has no poses through enhanced sampling
Ligand lig34 has no better poses through enhanced sampling
Ligand lig66 has no poses through enhanced sampling
Ligand lig90 has no poses through enhanced sampling
Ligand lig93 has no better poses through enhanced sampling
Ligand lig12 has no better poses through enhanced sampling
Ligand lig27 has no poses through enhanced sampling
Ligand lig31 has no better poses through enhanced sampling
Ligand lig8 has no poses through enhanced sampling


In [14]:
print("SARS ligand that has reference but bad score: ", len(ligs_with_ref_bad_score), "MERS ligand that has reference but bad score: ", len(ligs_with_ref_bad_score_mers)) 

SARS ligand that has reference but bad score:  11 MERS ligand that has reference but bad score:  14


In [12]:
# clean up the top_10_poses folder for Sars-test

for folder in glob.glob(os.path.join(sars_test_prediction_folder, "*", "top_10_poses")):
    sdf_files = glob.glob(os.path.join(folder, "*.sdf"))
    if len(sdf_files) > 10:
        conf_to_delete = [f"conf{i}.sdf" for i in range(3, 10)]
        for file in sdf_files:
            if file.split("_")[-1] in conf_to_delete:
                os.remove(file)
    
    folder_for_protein = folder.replace("/top_10_poses", "")
    polaris_protein_folder = os.path.join("/pscratch/sd/k/kysun/polaris/polaris-proteins")
    
    updated_sdf_files = glob.glob(os.path.join(folder, "*.sdf"))
    for file in updated_sdf_files:
        folder = os.path.dirname(file)
        base_name = os.path.basename(file)
        if "docked_mcsce" in base_name:
            protein_name = base_name.split("_")[0]
            conf = base_name.split("_")[-1].replace(".sdf", "")
            new_name = os.path.join(folder, f"{protein_name}_mcsce_docked_{conf}.sdf")
            os.rename(file, new_name)
            protein_file = os.path.join(polaris_protein_folder, "mcsce", f"{protein_name}_fixed.pdb")
            if not os.path.exists(os.path.join(folder_for_protein, f"{protein_name}_mcsce_protein.pdb")):
                shutil.copy(protein_file, os.path.join(folder_for_protein, f"{protein_name}_mcsce_protein.pdb"))
        elif "docked_conf" in base_name:
            protein_name = base_name.split("_")[0]
            conf = base_name.split("_")[-1].replace(".sdf", "")
            new_name = os.path.join(folder, f"{protein_name}_exp_docked_{conf}.sdf")
            os.rename(file, new_name)
            protein_file = os.path.join(polaris_protein_folder, "exp", f"{protein_name}_fixed.pdb")
            if not os.path.exists(os.path.join(folder_for_protein, f"{protein_name}_exp_protein.pdb")):
                shutil.copy(protein_file, os.path.join(folder_for_protein, f"{protein_name}_exp_protein.pdb"))

    

In [19]:
# select top 1 pose if there are homologs and similar fragments
sars_test_prediction_folder = "/pscratch/sd/k/kysun/polaris/frag_docking_combined/frag-results-sars-test"
sars_gbsa_folder = "/pscratch/sd/k/kysun/polaris/final_results/gbsa/frag-results-sars-test"
frag_score_dict = pickle.load(open("frag_score_dict_sars_test.pkl", "rb"))
frag_score_dict_enhanced = pickle.load(open("frag_score_dict_enhanced_sars_test.pkl", "rb"))
no_ref = len([x for x in frag_score_dict.keys() if frag_score_dict[x] is None])
print("Number of ligands with no reference: ", no_ref)

# for those with reference, select top 10 of all poses that has a score of 10 or less
for lig_name in frag_score_dict.keys():
    shutil.rmtree(os.path.join(sars_test_prediction_folder, lig_name, "top_1_poses"), ignore_errors=True)
    try:
        if frag_score_dict[lig_name] is not None:
            sorted_poses = sorted(frag_score_dict[lig_name].items(), key=lambda x: x[1])
            poses_to_select = [x[0] for x in sorted_poses if x[1] <= 10]
            pose = poses_to_select[0]
            os.makedirs(os.path.join(sars_test_prediction_folder, lig_name, "top_1_poses"), exist_ok=True)
            shutil.copy(pose, os.path.join(sars_test_prediction_folder, lig_name, "top_1_poses"))
        elif frag_score_dict_enhanced[lig_name] is not None:
            sorted_poses = sorted(frag_score_dict_enhanced[lig_name].items(), key=lambda x: x[1])
            poses_to_select = [x[0] for x in sorted_poses if x[1] <= 10]
            pose = poses_to_select[0]
            os.makedirs(os.path.join(sars_test_prediction_folder, lig_name, "top_1_poses"), exist_ok=True)
            shutil.copy(pose, os.path.join(sars_test_prediction_folder, lig_name, "top_1_poses"))
        else:
            gbsa_csv_path = os.path.join(sars_gbsa_folder, lig_name, "top_10_gbsa.csv")
            top1_ligand = pd.read_csv(gbsa_csv_path)['name'].values[0]
            os.makedirs(os.path.join(sars_test_prediction_folder, lig_name, "top_1_poses"), exist_ok=True)
            shutil.copy(os.path.join(sars_gbsa_folder, lig_name, "top_10_poses_opt", f"{top1_ligand}_opt.sdf"), os.path.join(sars_test_prediction_folder, lig_name, "top_1_poses"))
    except Exception as e:
        gbsa_csv_path = os.path.join(sars_gbsa_folder, lig_name, "top_10_gbsa.csv")
        top1_ligand = pd.read_csv(gbsa_csv_path)['name'].values[0]
        os.makedirs(os.path.join(sars_test_prediction_folder, lig_name, "top_1_poses"), exist_ok=True)
        shutil.copy(os.path.join(sars_gbsa_folder, lig_name, "top_10_poses_opt", f"{top1_ligand}_opt.sdf"), os.path.join(sars_test_prediction_folder, lig_name, "top_1_poses"))

Number of ligands with no reference:  17


In [20]:
lig_name_no_frag_sars_test = []
frag_score_dict = pickle.load(open("frag_score_dict_sars_test.pkl", "rb"))
for lig_name in frag_score_dict.keys():
    if not os.path.exists(os.path.join(sars_test_prediction_folder, lig_name, "top_1_poses")):
        lig_name_no_frag_sars_test.append(lig_name)

print(lig_name_no_frag_sars_test), len(lig_name_no_frag_sars_test)

[]


(None, 0)

In [13]:
# clean up the top_10_poses folder for Mers-test

for folder in glob.glob(os.path.join(mers_test_prediction_folder, "*", "top_10_poses")):
    sdf_files = glob.glob(os.path.join(folder, "*.sdf"))
    if len(sdf_files) > 10:
        conf_to_delete = [f"conf{i}.sdf" for i in range(6, 10)]
        for file in sdf_files:
            if file.split("_")[-1] in conf_to_delete:
                os.remove(file)
    
    folder_for_protein = folder.replace("/top_10_poses", "")
    polaris_protein_folder = os.path.join("/pscratch/sd/k/kysun/polaris/polaris-proteins")
    
    updated_sdf_files = glob.glob(os.path.join(folder, "*.sdf"))
    for file in updated_sdf_files:
        folder = os.path.dirname(file)
        base_name = os.path.basename(file)
        if "docked_mcsce" in base_name:
            protein_name = base_name.split("_")[0]
            conf = base_name.split("_")[-1].replace(".sdf", "")
            new_name = os.path.join(folder, f"{protein_name}_mcsce_docked_{conf}.sdf")
            os.rename(file, new_name)
            protein_file = os.path.join(polaris_protein_folder, "mcsce", f"{protein_name}_fixed.pdb")
            if not os.path.exists(os.path.join(folder_for_protein, f"{protein_name}_mcsce_protein.pdb")):
                shutil.copy(protein_file, os.path.join(folder_for_protein, f"{protein_name}_mcsce_protein.pdb"))
        elif "docked_conf" in base_name:
            protein_name = base_name.split("_")[0]
            conf = base_name.split("_")[-1].replace(".sdf", "")
            new_name = os.path.join(folder, f"{protein_name}_exp_docked_{conf}.sdf")
            os.rename(file, new_name)
            protein_file = os.path.join(polaris_protein_folder, "exp", f"{protein_name}_fixed.pdb")
            if not os.path.exists(os.path.join(folder_for_protein, f"{protein_name}_exp_protein.pdb")):
                shutil.copy(protein_file, os.path.join(folder_for_protein, f"{protein_name}_exp_protein.pdb"))

    

In [21]:
# select top 1 pose for Mers-test
mers_test_prediction_folder = "/pscratch/sd/k/kysun/polaris/frag_docking_combined/frag-results-mers-test"
mers_gbsa_folder = "/pscratch/sd/k/kysun/polaris/final_results/gbsa/frag-results-mers-test"
frag_score_dict_mers = pickle.load(open("frag_score_dict_mers_test.pkl", "rb"))
frag_score_dict_enhanced_mers = pickle.load(open("frag_score_dict_enhanced_mers_test.pkl", "rb"))
no_ref = len([x for x in frag_score_dict_mers.keys() if frag_score_dict_mers[x] is None])
print("Number of ligands with no reference: ", no_ref)  

# for those with reference, select top 10 of all poses that has a score of 10 or less
for lig_name in frag_score_dict_mers.keys():
    shutil.rmtree(os.path.join(mers_test_prediction_folder, lig_name, "top_1_poses"), ignore_errors=True)
    try:
        if frag_score_dict_mers[lig_name] is not None:
            sorted_poses = sorted(frag_score_dict_mers[lig_name].items(), key=lambda x: x[1])
            poses_to_select = [x[0] for x in sorted_poses if x[1] <= 10]
            pose = poses_to_select[0]
            os.makedirs(os.path.join(mers_test_prediction_folder, lig_name, "top_1_poses"), exist_ok=True)
            shutil.copy(pose, os.path.join(mers_test_prediction_folder, lig_name, "top_1_poses"))
        elif frag_score_dict_enhanced_mers[lig_name] is not None:
            sorted_poses = sorted(frag_score_dict_enhanced_mers[lig_name].items(), key=lambda x: x[1])
            poses_to_select = [x[0] for x in sorted_poses if x[1] <= 10]
            pose = poses_to_select[0]
            os.makedirs(os.path.join(mers_test_prediction_folder, lig_name, "top_1_poses"), exist_ok=True)
            shutil.copy(pose, os.path.join(mers_test_prediction_folder, lig_name, "top_1_poses"))
        else:
            gbsa_csv_path = os.path.join(mers_gbsa_folder, lig_name, "top_10_gbsa.csv")
            top1_ligand = pd.read_csv(gbsa_csv_path)['name'].values[0]
            os.makedirs(os.path.join(mers_test_prediction_folder, lig_name, "top_1_poses"), exist_ok=True)
            shutil.copy(os.path.join(mers_gbsa_folder, lig_name, "top_10_poses_opt", f"{top1_ligand}_opt.sdf"), os.path.join(mers_test_prediction_folder, lig_name, "top_1_poses"))
    except:
        gbsa_csv_path = os.path.join(mers_gbsa_folder, lig_name, "top_10_gbsa.csv")
        top1_ligand = pd.read_csv(gbsa_csv_path)['name'].values[0]
        os.makedirs(os.path.join(mers_test_prediction_folder, lig_name, "top_1_poses"), exist_ok=True)
        shutil.copy(os.path.join(mers_gbsa_folder, lig_name, "top_10_poses_opt", f"{top1_ligand}_opt.sdf"), os.path.join(mers_test_prediction_folder, lig_name, "top_1_poses"))

Number of ligands with no reference:  10


In [22]:
lig_name_no_frag_mers_test = []
frag_score_dict_mers = pickle.load(open("frag_score_dict_mers_test.pkl", "rb"))
for lig_name in frag_score_dict_mers.keys():
    if not os.path.exists(os.path.join(mers_test_prediction_folder, lig_name, "top_1_poses")):
        lig_name_no_frag_mers_test.append(lig_name)

print(lig_name_no_frag_mers_test), len(lig_name_no_frag_mers_test)

[]


(None, 0)

## Final Submission of the results

In [23]:
raw_results_folder = "/pscratch/sd/k/kysun/polaris/final_results/raw_results"
submission_folder = "/pscratch/sd/k/kysun/polaris/final_results/submission"
mers_test_prediction_folder = "/pscratch/sd/k/kysun/polaris/frag_docking_combined/frag-results-mers-test"
sars_test_prediction_folder = "/pscratch/sd/k/kysun/polaris/frag_docking_combined/frag-results-sars-test"

### If there are references and the score is good, we select the top 1 pose based on the score

In [24]:
frag_score_dict_mers = pickle.load(open("frag_score_dict_mers_test.pkl", "rb"))
shutil.rmtree(os.path.join(raw_results_folder, "mers_test"), ignore_errors=True)
shutil.rmtree(os.path.join(submission_folder, "mers_test_final"), ignore_errors=True)
os.makedirs(os.path.join(raw_results_folder, "mers_test"), exist_ok=True)
os.makedirs(os.path.join(submission_folder, "mers_test_final"), exist_ok=True)
for lig_name in frag_score_dict_mers.keys():
    if not os.path.exists(os.path.join(mers_test_prediction_folder, lig_name, "top_1_poses")):
        shutil.copytree(os.path.join(mers_test_prediction_folder, lig_name, "top_10_poses"), os.path.join(raw_results_folder, "mers_test", lig_name))
        for file in glob.glob(os.path.join(mers_test_prediction_folder, lig_name, "*.pdb")):
            shutil.copy(file, os.path.join(raw_results_folder, "mers_test", lig_name))
    else:
        shutil.copy(glob.glob(os.path.join(mers_test_prediction_folder, lig_name, "top_1_poses", "*.sdf"))[0], os.path.join(submission_folder, "mers_test_final", f"{lig_name}.sdf"))

In [25]:
frag_score_dict_sars = pickle.load(open("frag_score_dict_sars_test.pkl", "rb"))
shutil.rmtree(os.path.join(raw_results_folder, "sars_test"), ignore_errors=True)
shutil.rmtree(os.path.join(submission_folder, "sars_test_final"), ignore_errors=True)
os.makedirs(os.path.join(raw_results_folder, "sars_test"), exist_ok=True)
os.makedirs(os.path.join(submission_folder, "sars_test_final"), exist_ok=True)
for lig_name in frag_score_dict_sars.keys():
    if not os.path.exists(os.path.join(sars_test_prediction_folder, lig_name, "top_1_poses")):
        shutil.copytree(os.path.join(sars_test_prediction_folder, lig_name, "top_10_poses"), os.path.join(raw_results_folder, "sars_test", lig_name))
        for file in glob.glob(os.path.join(sars_test_prediction_folder, lig_name, "*.pdb")):
            shutil.copy(file, os.path.join(raw_results_folder, "sars_test", lig_name))
    else:
        shutil.copy(glob.glob(os.path.join(sars_test_prediction_folder, lig_name, "top_1_poses", "*.sdf"))[0], os.path.join(submission_folder, "sars_test_final", f"{lig_name}.sdf"))