In [None]:
import os
import pandas as pd
import numpy as np
import formulas

In [None]:
contacts = pd.read_csv('txt file of interacting S-N', sep='\t')
path = r'str'
contacts

In [None]:
def coordinates(row):
    """
    Extracting data from pdb as DataFrame.
    
    Args:
        row (DataFrame row): Row of DataFrame with data of interacting Sulfur and Oxygen.
        
    Returns:
        O, N (DataFrame): DataFrames containing coordinates of (i)th O and (i+4)th N.
    """
    filename = row['file']
    file = os.path.join(path, filename)
    df = formulas.read_pdb(file)
    O_dist = df[(df['ATOM ID'] == "O") & (df['RES SEQ'] == row['IntResChain']) & 
           (df['RES INSERT'] == (row['IntResNo'] - 4))]
    O_ang = df[(df['ATOM ID'] == "O") & (df['RES SEQ'] == row['IntResChain']) & 
           (df['RES INSERT'] == (row['IntResNo'] - 1))]
    C_ang = df[(df['ATOM ID'] == "C") & (df['RES SEQ'] == row['IntResChain']) & 
           (df['RES INSERT'] == (row['IntResNo'] - 1))]
    N = df[(df['ATOM ID'] == "N") & (df['RES SEQ'] == row['IntResChain']) & 
           (df['RES INSERT'] == row['IntResNo'])]
    S = df[(df['ATOM ID'] == "SG") & (df['RES SEQ'] == row['Cys1_Chain']) & 
           (df['RES INSERT'] == row['Cys1_ResNo'])]
    
    O_dist.reset_index(inplace=True)
    O_ang.reset_index(inplace=True)
    C_ang.reset_index(inplace=True)
    N.reset_index(inplace=True)
    S.reset_index(inplace=True)
    atoms = [O_dist, O_ang, C_ang, N, S]
    return atoms

In [None]:
def distance_ON(atom):
    """
    Calculating distance between (i)th O and (i+4)th N.
    
    Args:
        O, N (DataFrame): DataFrames containing coordinates of (i)th O and (i+4)th N.
        
    Returns:
        dist_ON (float): distance between (i)th O and (i+4)th N.
    """
    O_dist, O_ang, C_ang, N, S = atom
    p1 = np.array([O_dist['X AXIS'].iloc[0], O_dist['Y AXIS'].iloc[0], O_dist['Z AXIS'].iloc[0]])
    p2 = np.array([N['X AXIS'].iloc[0], N['Y AXIS'].iloc[0], N['Z AXIS'].iloc[0]])
    dist_ON = formulas.eu_distance(p1, p2)
    return dist_ON

In [None]:
def hb_angle(atom):
    O_dist, O_ang, C_ang, N, S = atom
    p1 = np.array([O_ang['X AXIS'].iloc[0], O_ang['Y AXIS'].iloc[0], O_ang['Z AXIS'].iloc[0]])
    p2 = np.array([C_ang['X AXIS'].iloc[0], C_ang['Y AXIS'].iloc[0], C_ang['Z AXIS'].iloc[0]])
    p3 = np.array([N['X AXIS'].iloc[0], N['Y AXIS'].iloc[0], N['Z AXIS'].iloc[0]])        
    p4 = np.array([S['X AXIS'].iloc[0], S['Y AXIS'].iloc[0], S['Z AXIS'].iloc[0]])
    hb_ang1 = formulas.dihedral_angle(p1, p2, p3, p4)
    hb_ang = formulas.phi_conversion360(hb_ang1)
    return hb_ang

In [None]:
def Helix(row):
    """
    List of alpha helix in the structure.
    
    Args:
        row (DataFrame row): Row of DataFrame with data of interacting Sulfur and Oxygen.
        
    Returns:
        helix (list): List of alpha helix in the structure.
    """
    filename = row['file']
    file = os.path.join(path, filename)
    helix = []
    with open(file, 'r') as pdb:
        for line in pdb:
            lis = line.split()
            if lis[0] == 'HELIX':
                lst = [lis[4], int(lis[5]), int(lis[8])]
                helix.append(lst)
            else:
                pass
    return helix

In [None]:
def cap_find(row, helix):
    """
    Check if interacting residues forming C-Cap.
    
    Args:
        row (DataFrame row): Row of DataFrame with data of interacting Sulfur and Oxygen.
        
        helix (list): List of alpha helix in the structure.
        
    
    Returns:
        cap (list): Deatils of the C-Cap.
    """
    for hlx in helix:
        if row['IntResChain'] == hlx[0]:
            if int(row['IntResNo']) == hlx[1] + 1:
                cap = [row, 'N1-Cap', hlx]
            elif int(row['IntResNo']) == hlx[1] + 2:
                cap = [row, 'N2-Cap', hlx]
            elif int(row['IntResNo']) == hlx[1] + 3:
                cap = [row, 'N3-Cap', hlx]
            else:
                pass
    return cap

In [None]:
def seq_run(row):
    """
    Running functions defined in cells before sequentially.
    
    Args:
        row (DataFrame row): Row of DataFrame with data of interacting Sulfur and Oxygen.
    
    Returns:
        result (DataFrame): A dataframe with details of distance and directional criterion
                            of the residues from C-Cap.
    """   
    cols = ['file', 'Cys1_Chain', 'Cys1_ResNo', 'Cys2_Chain', 'Cys2_ResNo', 'IntResChain', 
            'IntResNo', 'IntAtomID', 'distance', 'delta', 'theta', 'distance_ON', 
            'angle_N_i-4O', 'CapType', 'HelixChainID', 'HelixRes1', 'HelixResLast']
    result = pd.DataFrame(columns=cols)
    atom = coordinates(row)
    
    dist_ON = distance_ON(atom)
    
    if dist_ON >= 3.5:
        hb_ang = hb_angle(atom)
        if 120 <= hb_ang <= 240:
            helix = Helix(row)
            cap = cap_find(row, helix)
            res = [row['file'], row['Cys1_Chain'], row['Cys1_ResNo'], row['Cys2_Chain'], row['Cys2_ResNo'], 
                   row['IntResChain'], row['IntResNo'], row['IntAtomID'], row['distance'], row['delta'], 
                   row['theta'], dist_ON, hb_ang, cap[1]] + cap[-1]
            result.loc[len(result)] = res
        else:
            pass
    else:
        pass
    return result

results = pd.DataFrame()
for i, row in contacts.iterrows():
    if (-90 <= row['delta'] <= -50 or 50 <= row['delta'] <= 90) and (95 <= row['theta'] <= 145):
        result = seq_run(row)
        results = results.append(result, ignore_index=True)

results.to_csv('CSS_N-Caps.txt', sep='\t', index=False)