In [1]:
import pandas as pd
import glob, os
import json
import datetime as dt

# Define a function to extract ipTM and pTM scores

In [12]:
def extract_af3_results(af3_out_dir, proteinNM1 = "Pathogen", proteinNM2 = "Plant", save_csv_path="./"):
    """
    extract ipTM and pTM scores from every '*_summary_confidences.json'
    
    input:
       1) af3_out_dir, which is the root directory of af3 results,  # example './dimer_af3_out'
       2) proteinNM1 is the general name for the first proteins in the prepared json file for running AlphaFold 3.
       3) proteinNM2 is the general name for the second proteins in the prepared json file for running AlphaFold 3.
       4) save_csv_path is a .csv path you want to save the alphafold3 results
       
    output:
       a csv file collected all ipTM and pTM scores for all protein-protein pairs in the prepared json file for running AlphaFold 3.
    
    """

    json_path = os.path.join(af3_out_dir, '*/*/*_summary_confidences.json')
    files = glob.glob(json_path, recursive=True)
    
    # an empty list to store all lists of iptm and ptm scores
    results = []
    
    for file in files:
        
        # an empty list to store iptm and ptm scores for each file
        result =[]
        
        # get the protein-protein pair name #change to fit your needs!
        split_res = file.split("/")[-1].split("_summ")[0].split("_")
        prt_nm1 = split_res[0] + "_" + split_res[1] 
        prt_nm2 = split_res[2] + "_" + split_res[3] 
        
        # add the protein-protein pair name to the result list
        result.append(prt_nm1)
        result.append(prt_nm2)

        # open file and load as json ojbect
        with open(file) as f:
            data = json.load(f)
            
            # add iptm score
            result.append(data['iptm'])
            
            # add ptm score
            result.append(data['ptm'])
            
            # add the ranking score (0.8iptm+0.2ptm)
            result.append(round(float(data['iptm'])*0.8 + float(data['ptm'])*0.2, 3))
            
        # add a result(ppnm, iptm, ptm, 0.8iptm+0.2ptm) list to the results list
        results.append(result)

    # convert results to pandas DataFrame
    df = pd.DataFrame(results, columns=[proteinNM1, proteinNM2, "ipTM", "pTM", "0.8ipTM+0.2pTM"])
    
    # sort values by the ranking score 
    df = df.sort_values(by=["0.8ipTM+0.2pTM"], ascending=False)
    
    # save results as csv
    df.to_csv(save_csv_path, index=False)
    
    return df

In [13]:
extract_af3_results('AF3_out', proteinNM1 = "BDM", proteinNM2 = "ZCY", save_csv_path="./summary.csv")

Unnamed: 0,BDM,ZCY,ipTM,pTM,0.8ipTM+0.2pTM
26,3bdm_9,2zcy_8,0.91,0.93,0.914
88,3bdm_8,2zcy_9,0.90,0.93,0.906
149,3bdm_14,2zcy_13,0.89,0.86,0.884
27,3bdm_5,2zcy_1,0.85,0.88,0.856
92,3bdm_6,2zcy_5,0.86,0.84,0.856
...,...,...,...,...,...
30,3bdm_4,2zcy_13,0.11,0.48,0.184
171,3bdm_13,2zcy_13,0.12,0.44,0.184
28,3bdm_3,2zcy_13,0.10,0.48,0.176
105,3bdm_13,2zcy_3,0.10,0.48,0.176
