Comparing the structure rmsd:

- Comparing the cofolding model between wild type EST binding AcrR
- Comparing the mutant-apo with wildtype apo AcrR

In [None]:
from helperfunction import boltzutils
import os 
import json
import pandas as pd
from helperfunction import eval_mut_comb
def prase_boltz_confidence_results(pdb_boltz_folder: str, prot_name: str,wt_pdb: str):
    """
    Parse Boltz confidence results from a given folder and save them to a CSV file.
    
    Args:
        pdb_boltz_folder (str): The path to the folder containing Boltz predictions.
        prot_name (str): The name of the protein, e.g. '1flm'.
        
    Returns:
        writes a CSV file with the results in the format:
        prot_name, confidence_pred_value, confidence_probability_binary
    """
    # get subfolders in the folder
    subfolders = [f.path for f in os.scandir(pdb_boltz_folder) if f.is_dir()]

    
    # direct to the predictions folder
    error_df = pd.DataFrame(columns=["prot", "error"])
    score_all = {}  # to collect all scores
    for sub_path in subfolders:
    
        name_prefix = sub_path.split("/")[-1]  # get the name of the subfolder, e.g. 1flm_hcy12_87
    
    # crate a dictionary to store the scores for this protein
        if name_prefix not in score_all:
            score_all[name_prefix] = {}

        # read affinity json file which starts with "confidence_"
        predict_path = os.path.join(sub_path, "predictions")
        print(predict_path)
        confidence_files = os.listdir(predict_path)

        /home/hdwang/sensor_hd/TransF_biosensor/AcrR_EST_stru/apo/boltz_results_AcrR_1/predictions/AcrR_1/confidence_AcrR_1_model_0.json
        /home/hdwang/sensor_hd/TransF_biosensor/AcrR_EST_stru/apo/boltz_results_AcrR_35/predictions/AcrR_35
        try:
            confidence_files_real = os.path.join(predict_path, confidence_files[0])
            print(f'processing {confidence_files_real}')

            #get json under current path 

            confidence_json = [json_file for json_file in os.listdir(confidence_files_real) if json_file.startswith("confidence_")]
            print(confidence_json)
            # get the absolute path of the first confidence file
            confidence_file_path = os.path.join(confidence_files_real, confidence_json[0])

            # get mut_plddt from e.g. plddt_1flm_HCY1_1_model_0.npz

            plddt_file = [f for f in os.listdir(confidence_files_real) if f.startswith("plddt_") and f.endswith(".npz")][0]
            plddt_file_path = os.path.join(confidence_files_real, plddt_file)
            # get the absolute path of the clean pdb file
            clean_pdb = [pdb for pdb in os.listdir(confidence_files_real) if pdb.endswith("cleanH.pdb")][0]
            if clean_pdb:
                print(f"clean pdb found in {confidence_files_real}")
            

            clean_pdb_path = os.path.join(confidence_files_real, clean_pdb)
            #mean_plddt,plddt_info = eval_mut_comb.parse_prot_mut_plddt(wt_pdb,clean_pdb_path,plddt_file_path)
            # format convert to float which json could handle
            mean_plddt = float(mean_plddt)
            print(f'the mutation residues plddt:{mean_plddt}')
        


        except IndexError:
            print(f"No confidence files found in {predict_path}. Skipping...")
            df_temp = pd.DataFrame({"prot": [name_prefix], "error": "No confidence files found"})
            error_df = pd.concat([error_df, df_temp], ignore_index=True)
            error_df.to_csv(os.path.join(pdb_boltz_folder, f"{prot_name}_confidence_errors.csv"), index=False)
            continue
        
        with open(confidence_file_path, 'r') as f:
            data = json.load(f)
            

# extract confidence score and probability binary

pdb_boltz_folder = "/home/hdwang/sensor_hd/TransF_biosensor/AcrR_EST_stru/apo"
prot_name = "AcrR"
wt_pdb = "/home/hdwang/sensor_hd/TransF_biosensor/TF_AcrR_redesign/pocket_redesign/cotimed_predict/pdb/AcrR_apo.pdb"
prase_boltz_confidence_results(pdb_boltz_folder, prot_name, wt_pdb)





Generating the aligment pymol sessions for apo/holo models from aligned_pdb_boltz

In [None]:
from pymol import cmd


# Load the PDB file
model_1_path = '/home/hdwang/sensor_hd/TransF_biosensor/TF_AcrR_redesign/pocket_redesign/cotimed_predict/helperfunction/aligned_pdb_boltz/AcrR_1_model_0/AcrR_1_model_0_rmsd_0.70.pdb'
holo_path = '/home/hdwang/sensor_hd/TransF_biosensor/TF_AcrR_redesign/pocket_redesign/cotimed_predict/pdb/WT_AcrR_EST_model_0.pdb'
cmd.load(holo_path, 'holo_AcrR')
cmd.load(model_1_path, 'model_1')



In [1]:
# render_alignment.py
import os
from pymol import cmd

def safe_obj_name(path: str) -> str:
    base = os.path.splitext(os.path.basename(path))[0]
    return "".join(c if c.isalnum() or c in "_-" else "_" for c in base)

def style_whole_alignment(holo_obj: str, model_obj: str):
    cmd.bg_color("white")
    cmd.hide("everything", "all")

    # Show protein cartoons
    cmd.show("cartoon", f"{holo_obj} or {model_obj}")
    cmd.set("cartoon_fancy_helices", 1)
    cmd.set("cartoon_fancy_sheets", 1)
    cmd.set("cartoon_transparency", 0.35, holo_obj)  # make holo semi-transparent

    # Colors
    cmd.color("gray70", holo_obj)
    cmd.color("tv_blue", model_obj)

    # Ligands as sticks (non-protein, non-solvent)
    for obj, col in [(holo_obj, "salmon"), (model_obj, "forest")]:
        lig_sel = f"({obj}) and not polymer.protein and not solvent"
        if cmd.count_atoms(lig_sel) > 0:
            cmd.show("sticks", lig_sel)
            cmd.color(col, lig_sel)

    # Lines for metals/ions (optional)
    cmd.show("spheres", f"({holo_obj} or {model_obj}) and inorganic")
    cmd.set("sphere_scale", 0.25)

    # Nice image settings
    cmd.set("antialias", 2)
    cmd.set("ray_shadows", 0)
    cmd.set("specular", 0.2)
    cmd.set("ambient", 0.6)
    cmd.set("depth_cue", 0)  # disable fog

def align_and_render(holo_path: str, model_path: str, out_png: str, method: str = "align",
                     width: int = 2400, height: int = 1800, dpi: int = 300):
    cmd.reinitialize()

    holo_obj = safe_obj_name(holo_path) + "_holo"
    model_obj = safe_obj_name(model_path)

    cmd.load(holo_path, holo_obj)
    cmd.load(model_path, model_obj)

    # Optional: remove waters/solvent
    cmd.remove(f"({holo_obj} or {model_obj}) and solvent")

    ref = f"{holo_obj} and polymer.protein and name CA"
    mob = f"{model_obj} and polymer.protein and name CA"

    if method == "super":
        rms = cmd.super(mob, ref)[0]
    else:
        rms = cmd.align(mob, ref)[0]

    print(f"Alignment RMSD (CA): {rms:.3f} Å")

    style_whole_alignment(holo_obj, model_obj)

    # Zoom to whole proteins
    cmd.zoom(f"{holo_obj} or {model_obj}")

    # Set viewport and render
    cmd.viewport(width, height)
    cmd.png(out_png, width=width, height=height, dpi=dpi, ray=1)
    print(f"Saved figure: {out_png}")

if __name__ == "__main__":
    # Example paths (replace with yours)
    holo_path = "/home/hdwang/sensor_hd/TransF_biosensor/TF_AcrR_redesign/pocket_redesign/cotimed_predict/pdb/WT_AcrR_EST_model_0.pdb"
    model_path = "/home/hdwang/sensor_hd/TransF_biosensor/TF_AcrR_redesign/pocket_redesign/cotimed_predict/helperfunction/aligned_pdb_boltz/AcrR_1_model_0/AcrR_1_model_0_rmsd_0.70.pdb"
    out_png = "AcrR_whole_alignment.png"

    align_and_render(holo_path, model_path, out_png, method="align")


Alignment RMSD (CA): 0.865 Å
Saved figure: AcrR_whole_alignment.png
