Comparing the structure rmsd:

- Comparing the cofolding model between wild type EST binding AcrR
- Comparing the mutant-apo with wildtype apo AcrR

In [None]:
from helperfunction import boltzutils
import os 
import json
import pandas as pd
from helperfunction import eval_mut_comb
def prase_boltz_confidence_results(pdb_boltz_folder: str, prot_name: str,wt_pdb: str):
    """
    Parse Boltz confidence results from a given folder and save them to a CSV file.
    
    Args:
        pdb_boltz_folder (str): The path to the folder containing Boltz predictions.
        prot_name (str): The name of the protein, e.g. '1flm'.
        
    Returns:
        writes a CSV file with the results in the format:
        prot_name, confidence_pred_value, confidence_probability_binary
    """
    # get subfolders in the folder
    subfolders = [f.path for f in os.scandir(pdb_boltz_folder) if f.is_dir()]

    
    # direct to the predictions folder
    error_df = pd.DataFrame(columns=["prot", "error"])
    score_all = {}  # to collect all scores
    for sub_path in subfolders:
    
        name_prefix = sub_path.split("/")[-1]  # get the name of the subfolder, e.g. 1flm_hcy12_87
    
    # crate a dictionary to store the scores for this protein
        if name_prefix not in score_all:
            score_all[name_prefix] = {}

        # read affinity json file which starts with "confidence_"
        predict_path = os.path.join(sub_path, "predictions")
        print(predict_path)
        confidence_files = os.listdir(predict_path)


        try:
            confidence_files_real = os.path.join(predict_path, confidence_files[0])
            print(f'processing {confidence_files_real}')

            #get json under current path 

            confidence_json = [json_file for json_file in os.listdir(confidence_files_real) if json_file.startswith("confidence_")]
            print(confidence_json)
            # get the absolute path of the first confidence file
            confidence_file_path = os.path.join(confidence_files_real, confidence_json[0])

            # get mut_plddt from e.g. plddt_1flm_HCY1_1_model_0.npz

            plddt_file = [f for f in os.listdir(confidence_files_real) if f.startswith("plddt_") and f.endswith(".npz")][0]
            plddt_file_path = os.path.join(confidence_files_real, plddt_file)
            # get the absolute path of the clean pdb file
            clean_pdb = [pdb for pdb in os.listdir(confidence_files_real) if pdb.endswith("cleanH.pdb")][0]
            if clean_pdb:
                print(f"clean pdb found in {confidence_files_real}")
            

            clean_pdb_path = os.path.join(confidence_files_real, clean_pdb)
            #mean_plddt,plddt_info = eval_mut_comb.parse_prot_mut_plddt(wt_pdb,clean_pdb_path,plddt_file_path)
            # format convert to float which json could handle
            mean_plddt = float(mean_plddt)
            print(f'the mutation residues plddt:{mean_plddt}')
        


        except IndexError:
            print(f"No confidence files found in {predict_path}. Skipping...")
            df_temp = pd.DataFrame({"prot": [name_prefix], "error": "No confidence files found"})
            error_df = pd.concat([error_df, df_temp], ignore_index=True)
            error_df.to_csv(os.path.join(pdb_boltz_folder, f"{prot_name}_confidence_errors.csv"), index=False)
            continue
        
        with open(confidence_file_path, 'r') as f:
            data = json.load(f)
            

# extract confidence score and probability binary

pdb_boltz_folder = "/home/hdwang/AcrR_EST_stru/apo"
prot_name = "AcrR"
wt_pdb = "/home/hdwang/TransF_biosensor/TF_AcrR_redesign/pocket_redesign/cotimed_predict/pdb/AcrR_apo.pdb"
prase_boltz_confidence_results(pdb_boltz_folder, prot_name, wt_pdb)



