In [None]:
import pickle
import pandas as pd
from Bio.PDB import PDBParser
from Bio.PDB.DSSP import DSSP
from Bio.PDB.SASA import ShrakeRupley
from Bio.PDB.HSExposure import HSExposureCB
from os import listdir
import os
import subprocess

In [None]:
def dssp_analysis(model,pdbFile,prot_loc):
    #没有TCO、Kappa和Alpha等值
    dssp_path = 'D:/All_for_paper/1. PhD Work Program/3. Research project/1. Dizco/Structural Information Calculation/DSSP/dssp-3.0.0-win32.exe'
    dssp = DSSP(model, path+pdbFile,dssp=dssp_path)

    dssp_result = [tuple((dssp[res])) for res in list(dssp.keys())]
    columns = ['DSSP index','Residue','Secondary structure','Relative ASA',
               'Phi','Psi','nho1_relidx','nho1_energy','onh1_relidx','onh1_energy',
               'nho2_relidx','nho2_energy','onh2_relidx','onh2_energy']
    dssp_result = pd.DataFrame(dssp_result,columns=columns)
    return dssp_result

In [None]:
def extract_dssp_infor(output_path):
    with open(output_path, 'r') as file:
        lines = file.readlines()
    
    columns = ['DSSP index', 'Residue', 'AA', 'Secondary structure', 'Relative ASA', 
               'Phi', 'Psi', 'TCO', 'Kappa', 'Alpha', 
               'nho1_relidx', 'nho1_energy', 'onh1_relidx', 'onh1_energy',
               'nho2_relidx', 'nho2_energy', 'onh2_relidx', 'onh2_energy']
    
    dssp_result = []
    for line in lines[28:]:
        if line[13] == '!': continue
        
        dssp_index = int(line[0:5].strip())  # DSSP index
        residue = int(line[5:10].strip())    # Residue number
        aa = line[13]                        # Amino acid one-letter code
        sec_structure = line[16]             # Secondary structure
        rel_asa = float(line[35:38].strip()) # Relative ASA
        phi = float(line[103:109].strip())   # Phi angle
        psi = float(line[109:115].strip())   # Psi angle
        tco = float(line[85:91].strip())     # TCO value
        kappa = float(line[91:97].strip())   # Kappa value
        alpha = float(line[97:103].strip())  # Alpha value
        # Extract hydrogen bonding information
        nho1_relidx = int(line[39:45].strip())
        nho1_energy = float(line[46:50].strip())
        onh1_relidx = int(line[50:56].strip())
        onh1_energy = float(line[57:61].strip())
        nho2_relidx = int(line[61:67].strip())
        nho2_energy = float(line[68:72].strip())
        onh2_relidx = int(line[72:78].strip())
        onh2_energy = float(line[79:83].strip())
    
        row = [dssp_index, residue, aa, sec_structure, rel_asa, phi, psi, 
               tco, kappa, alpha, 
               nho1_relidx, nho1_energy, onh1_relidx, onh1_energy,
               nho2_relidx, nho2_energy, onh2_relidx, onh2_energy]
        
        dssp_result.append(row)
    
    dssp_result = pd.DataFrame(dssp_result, columns=columns)
    return dssp_result
    
def retrieve_dssp_from_file(pdbFile):
    dssp_path = 'D:/DSSP/DSSP/dssp-3.0.0-win32.exe'
    exe_path = 'D:/DSSP/DSSP/'
    input_path = path+pdbFile
    output_path = exe_path+'output_{:}.dssp'.format(pdbFile.split('-')[1])
    cmd = '{:} -i "{:}" -o {:}'.format(dssp_path,input_path,output_path)
    subprocess.call(cmd, cwd=exe_path, shell=True)
    
    dssp_result = extract_dssp_infor(output_path)
    os.remove(output_path)
    return dssp_result

In [None]:
def sasa_cal(structure):
    sr = ShrakeRupley(n_points=600)
    sr.compute(structure, level="R")
    
    sasa_result = []
    for res in structure.get_residues():
        sasa_result.append(tuple((res.get_id()[1],res.get_resname(),res.sasa)))
    sasa_result = pd.DataFrame(sasa_result,columns=['Res_id','Residue','sasa'])
    return sasa_result

In [None]:
path = 'D:/All_for_paper/1. PhD Work Program/3. Research project/1. Dizco/AlphaFold_pdbFiles/'
prot_loc_dic = pickle.load(open('D:/All_for_paper/1. PhD Work Program/3. Research project/1. Dizco/pkl_files/Coordinates of residues of proteins.pkl','rb'))

In [None]:
for pdbFile in listdir(path):
    prot = pdbFile.split('-')[1]
    pdb = PDBParser(PERMISSIVE=1)
    structure = pdb.get_structure(prot, path+pdbFile)
    model = structure[0]
    prot_loc = prot_loc_dic[prot]
    
    dssp_result_bio = dssp_analysis(model,pdbFile,prot_loc)
    dssp_result_exe = retrieve_dssp_from_file(pdbFile)
    sasa_result = sasa_cal(structure)
    sasa_result['RSA'] = dssp_result_bio.loc[:,'Relative ASA']