In [None]:
import numpy as np
import pandas as pd
import formulas
import os

In [None]:
#path to directory where pdb files are stored.
path = r'str'

#making list of files to be analyzed.
files = []
with open('txt file with list of pdb files', 'r') as f:
    for file in f:
        files.append(os.path.join(path, file[:-1]))

In [None]:
def coordinates_sg(file):
    """
    Extracting data from pdb as DataFrame.
    
    Args:
        file (str): pdb_file_name.
        
    Returns:
        sg (DataFrame): DataFrame containing data of all SG atoms.
        df (DataFrame): DataFrame containing data of atoms from pdb file.
    """
    df = formulas.read_pdb(file)
    sg = df[df['ATOM ID'] == "SG"]
    return sg, df

In [None]:
def disulphide(sg):
    """
    Searching for i+5 disulphide bonds.
    
    Args:
        sg (DataFrame): DataFrame containing data of all SG atoms.
        
    Returns:
        disulphide_df (DataFrame): DataFrame with details of i+5 disulphide bonds.
    """
    SG1_Chain = []
    SG1_Res_No = []
    SG2_Chain = []
    SG2_Res_No = []
    distances = []
    for i1, r1 in sg.iterrows():
        for i2, r2 in sg.iterrows():
            if i1 != i2 and r1['RES SEQ'] == r2['RES SEQ'] and r1['RES INSERT'] == r2['RES INSERT'] - 5:
                distance = formulas.eu_distance(np.array([r1['X AXIS'], r1['Y AXIS'], r1['Z AXIS']]), 
                                                np.array([r2['X AXIS'], r2['Y AXIS'], r2['Z AXIS']]))
                if 0 < distance < 2.1:
                    SG1_Chain.append(r1['RES SEQ'])
                    SG1_Res_No.append(r1['RES INSERT'])
                    SG2_Chain.append(r2['RES SEQ'])
                    SG2_Res_No.append(r2['RES INSERT'])
                    distances.append(distance)
                else:
                    pass
    disulphide_df = pd.DataFrame({'SG1_Chain': SG1_Chain, 'SG1_RES_No': SG1_Res_No, 'SG2_Chain': SG2_Chain, 
                                 'SG2_RES_No': SG2_Res_No, 'SS_distance': distances})
    return disulphide_df

In [None]:
def coordinates_atoms(df, row):
    """
    Extracting data of the atoms for distance calculations within beta turn
    
    Args:
        df (DataFrame): DataFrame containing data of atoms from pdb file.
        row (dict): A dictionary with details of the disulphide bond.
        
    Returns:
        atoms (list): A list of DataFrames of atoms data required for further calculations. 
    """
    S1 = df[(df['RES SEQ'] == row['SG1_Chain']) & (df['RES INSERT'] == row['SG1_RES_No']) & 
            (df['ATOM ID'] == 'SG')]
    C1 = df[(df['RES SEQ'] == row['SG1_Chain']) & (df['RES INSERT'] == row['SG1_RES_No']) & 
            (df['ATOM ID'] == 'CB')]
    S2 = df[(df['RES SEQ'] == row['SG1_Chain']) & (df['RES INSERT'] == row['SG2_RES_No']) & 
            (df['ATOM ID'] == 'SG')]
    C2 = df[(df['RES SEQ'] == row['SG1_Chain']) & (df['RES INSERT'] == row['SG2_RES_No']) & 
            (df['ATOM ID'] == 'CB')]
    O2 = df[(df['RES SEQ'] == row['SG1_Chain']) & (df['RES INSERT'] == row['SG1_RES_No'] + 2) & 
            (df['ATOM ID'] == 'O')]
    N5 =df[(df['RES SEQ'] == row['SG1_Chain']) & (df['RES INSERT'] == row['SG1_RES_No'] + 5) & 
           (df['ATOM ID'] == 'N')]
    O1 = df[(df['RES SEQ'] == row['SG1_Chain']) & (df['RES INSERT'] == row['SG1_RES_No'] + 1) & 
            (df['ATOM ID'] == 'O')]
    N4 = df[(df['RES SEQ'] == row['SG1_Chain']) & (df['RES INSERT'] == row['SG1_RES_No'] + 4) & 
            (df['ATOM ID'] == 'N')]
    atoms1 = [S1, C1, S2, C2, O2, N5, O1, N4]
    atoms = []
    for df1 in atoms1:
        df1 = df1.reset_index(drop=True)
        atoms.append(df1)
    return atoms

In [None]:
def distances_within_turn(atoms):
    """
    Calculating distances between atoms in beta turn distances.
    
    Args:
        atoms (list): A list of DataFrames of atoms data required for further calculations.
        
    Returns:
        distances (list): List of distances (float64) in beta-turn.
    """
    S1, C1, S2, C2, O2, N5, O1, N4 = atoms
    distance_SG1O2 = formulas.eu_distance(np.array([S1['X AXIS'].iloc[0], S1['Y AXIS'].iloc[0], S1['Z AXIS'].iloc[0]]),
                                          np.array([O2['X AXIS'].iloc[0], O2['Y AXIS'].iloc[0], O2['Z AXIS'].iloc[0]]))
    distance_SG2O2 = formulas.eu_distance(np.array([S2['X AXIS'].iloc[0], S2['Y AXIS'].iloc[0], S2['Z AXIS'].iloc[0]]),
                                          np.array([O2['X AXIS'].iloc[0], O2['Y AXIS'].iloc[0], O2['Z AXIS'].iloc[0]]))
    distance_O2N5 = formulas.eu_distance(np.array([O2['X AXIS'].iloc[0], O2['Y AXIS'].iloc[0], O2['Z AXIS'].iloc[0]]),
                                         np.array([N5['X AXIS'].iloc[0], N5['Y AXIS'].iloc[0], N5['Z AXIS'].iloc[0]]))
    distance_O1N4 = formulas.eu_distance(np.array([O1['X AXIS'].iloc[0], O1['Y AXIS'].iloc[0], O1['Z AXIS'].iloc[0]]),
                                         np.array([N4['X AXIS'].iloc[0], N4['Y AXIS'].iloc[0], N4['Z AXIS'].iloc[0]]))
    distances = [distance_SG1O2, distance_SG2O2, distance_O2N5, distance_O1N4]
    return distances

In [None]:
def angles(atoms):
    """
    Calculating delta-theta angles of the beta-turn.
    
    Args:
        atoms (list): A list of DataFrames of atoms data required for further calculations.
        
    Return:
        angles_list (list): A list of angles (float64) between residues of beta-turn.
    """
    S1, C1, S2, C2, O2, N5, O1, N4 = atoms
    delta1, theta1 = formulas.theta_phi(np.array([S1['X AXIS'].iloc[0], S1['Y AXIS'].iloc[0], S1['Z AXIS'].iloc[0]]), 
                                        np.array([C1['X AXIS'].iloc[0], C1['Y AXIS'].iloc[0], C1['Z AXIS'].iloc[0]]),
                                        np.array([S2['X AXIS'].iloc[0], S2['Y AXIS'].iloc[0], S2['Z AXIS'].iloc[0]]),
                                        np.array([O2['X AXIS'].iloc[0], O2['Y AXIS'].iloc[0], O2['Z AXIS'].iloc[0]]))
    delta2, theta2 = formulas.theta_phi(np.array([S2['X AXIS'].iloc[0], S2['Y AXIS'].iloc[0], S2['Z AXIS'].iloc[0]]), 
                                        np.array([C2['X AXIS'].iloc[0], C2['Y AXIS'].iloc[0], C2['Z AXIS'].iloc[0]]),
                                        np.array([S1['X AXIS'].iloc[0], S1['Y AXIS'].iloc[0], S1['Z AXIS'].iloc[0]]),
                                        np.array([O2['X AXIS'].iloc[0], O2['Y AXIS'].iloc[0], O2['Z AXIS'].iloc[0]]))
    angles_list = [delta1, theta1, delta2, theta2]
    return angles_list

In [None]:
def backbone_atoms(row, df):
    """
    Extracting backbone atoms for Ramachandran-angles calculations.
    
    Args:
        row (dict): A dictionary with details of the disulphide bond.
        df (DataFrame): DataFrame containing data of atoms from pdb file.
        
    Returns:
        atom_rows (list): A list of DataFrames of atoms data required for Ramachandran-angles calculations.
    """
    atoms_id = ["C", "N", "CA"]
    chain_id = row['SG1_Chain']
    res_no = [i for i in range(row['SG1_RES_No'] - 1, row['SG1_RES_No'] + 7)]
    atom_rows = df[(df['ATOM ID'].isin(atoms_id)) & (df['RES SEQ'] == chain_id) & 
                   (df['RES INSERT'].isin(res_no))]
    return atom_rows

In [None]:
def psi_phi(atom_rows, row):
    """
    Calculating ramachandran angles of the residues in beta-turn.
    Args:
        atom_rows (list): A list of DataFrames of atoms data required for Ramachandran-angles calculations.
        row (dict): A dictionary with details of the disulphide bond.
        
    Returns:
        phi_psi_list (list): list of calculated Ramachandran-angles(float64).
    """
    res_no_SG1 = row['SG1_RES_No']
    res_nos = [i for i in range(res_no_SG1, res_no_SG1 + 6)]
    phi_psi_list = []
    for res_no in res_nos:
        p1 = atom_rows[(atom_rows['RES INSERT'] == res_no - 1) & 
                       (atom_rows['ATOM ID'] == 'C')].reset_index(drop=True)
        p1 = np.array([p1['X AXIS'].iloc[0], p1['Y AXIS'].iloc[0], p1['Z AXIS'].iloc[0]])
        p2 = atom_rows[(atom_rows['RES INSERT'] == res_no) & 
                       (atom_rows['ATOM ID'] == 'N')].reset_index(drop=True)
        p2 = np.array([p2['X AXIS'].iloc[0], p2['Y AXIS'].iloc[0], p2['Z AXIS'].iloc[0]])
        p3 = atom_rows[(atom_rows['RES INSERT'] == res_no) & 
                       (atom_rows['ATOM ID'] == 'CA')].reset_index(drop=True)
        p3 = np.array([p3['X AXIS'].iloc[0], p3['Y AXIS'].iloc[0], p3['Z AXIS'].iloc[0]])
        p4 = atom_rows[(atom_rows['RES INSERT'] == res_no) & 
                       (atom_rows['ATOM ID'] == 'C')].reset_index(drop=True)
        p4 = np.array([p4['X AXIS'].iloc[0], p4['Y AXIS'].iloc[0], p4['Z AXIS'].iloc[0]])
        p5 = atom_rows[(atom_rows['RES INSERT'] == res_no + 1) & 
                       (atom_rows['ATOM ID'] == 'N')].reset_index(drop=True)
        p5 = np.array([p5['X AXIS'].iloc[0], p5['Y AXIS'].iloc[0], p5['Z AXIS'].iloc[0]])
        phi = formulas.dihedral_angle(p1, p2, p3, p4)
        psi = formulas.dihedral_angle(p2, p3, p4, p5)
        phi_psi_list.append(phi)
        phi_psi_list.append(psi)
    return phi_psi_list

In [None]:
def seq_run(file):
    """
    Running functions defined in cells before sequentially.
    
    Args:
        file (str): pdb_file_name.
    
    Returns:
        result2 (DataFrame): A dataframe with details of distance and directional criterion
                             of the residues from beta-turn.
    """
    sg, df = coordinates_sg(file)
    sg = sg.reset_index(drop=True)
    disulphide_df = disulphide(sg)
    cols = ['file', 'SG1_Chain', 'SG1_Res_no', 'SG2_Chain', 'SG2_Res_no', 'distance_SG1O2', 
            'distance_SG2O2', 'distance_O2N5', 'distance_O1N4', 'delta1', 'theta1', 'delta2', 
            'theta2', 'phi_i', 'psi_i', 'phi_i+1', 'psi_i+1', 'phi_i+2', 'psi_i+2', 'phi_i+3', 
            'psi_i+3', 'phi_i+4', 'psi_i+4', 'phi_i+5', 'psi_i+5']
    result2 = pd.DataFrame(columns=cols)
    for i, row in disulphide_df.iterrows():
        atoms = coordinates_atoms(df, row)
        distances = distances_within_turn(atoms)
        angles_list = angles(atoms)
        atom_rows = backbone_atoms(row, df)
        phi_psi_list = psi_phi(atom_rows, row)
        distance_SG1O2, distance_SG2O2, distance_O2N5, distance_O1N4 = distances
        delta1, theta1, delta2, theta2 = angles_list
        result1 = [file[-8:], row['SG1_Chain'], row['SG1_RES_No'], row['SG2_Chain'], row['SG2_RES_No'], 
                   distance_SG1O2, distance_SG2O2, distance_O2N5, distance_O1N4, delta1, theta1, delta2, 
                   theta2]
        result1 = result1 + phi_psi_list
        result2.loc[len(result2)] = result1
    return result2


result = pd.DataFrame()
for file in files:
    """
    for loop to analyze all pdb files sequentially.
    """
    result2 = seq_run(file)
    result = result.append(result2, ignore_index=True)

result.to_csv('beta_turn.txt', sep='\t', index=False)