In [None]:
import os
import pandas as pd
import numpy as np
import formulas

In [None]:
contacts = pd.read_csv('txt file of interacting S-O', sep='\t')
path = r'str'
contacts

In [None]:
def coordinates(row):
    """
    Extracting data from pdb as DataFrame.
    
    Args:
        row (DataFrame row): Row of DataFrame with data of interacting Sulfur and Oxygen.
        
    Returns:
        O, N (DataFrame): DataFrames containing coordinates of (i)th O and (i+4)th N.
    """
    filename = row['file']
    file = os.path.join(path, filename)
    df = formulas.read_pdb(file)
    O = df[(df['ATOM ID'] == "O") & (df['RES SEQ'] == row['IntResChain']) & 
           (df['RES INSERT'] == row['IntResNo'])]
    N = df[(df['ATOM ID'] == "N") & (df['RES SEQ'] == row['IntResChain']) & 
           (df['RES INSERT'] == (row['IntResNo'] + 4))]
    O.reset_index(inplace=True)
    N.reset_index(inplace=True)
    return O, N

In [None]:
def distance_ON(O, N):
    """
    Calculating distance between (i)th O and (i+4)th N.
    
    Args:
        O, N (DataFrame): DataFrames containing coordinates of (i)th O and (i+4)th N.
        
    Returns:
        dist_ON (float): distance between (i)th O and (i+4)th N.
    """
    p1 = np.array([O['X AXIS'].iloc[0], O['Y AXIS'].iloc[0], O['Z AXIS'].iloc[0]])
    p2 = np.array([N['X AXIS'].iloc[0], N['Y AXIS'].iloc[0], N['Z AXIS'].iloc[0]])
    dist_ON = formulas.eu_distance(p1, p2)
    return dist_ON

In [None]:
def Helix(row):
    """
    List of alpha helix in the structure.
    
    Args:
        row (DataFrame row): Row of DataFrame with data of interacting Sulfur and Oxygen.
        
    Returns:
        helix (list): List of alpha helix in the structure.
    """
    filename = row['file']
    file = os.path.join(path, filename)
    helix = []
    with open(file, 'r') as pdb:
        for line in pdb:
            lis = line.split()
            if lis[0] == 'HELIX':
                lst = [lis[4], int(lis[5]), int(lis[8])]
                helix.append(lst)
            else:
                pass
    return helix

In [None]:
def cap_find(row, helix):
    """
    Check if interacting residues forming C-Cap.
    
    Args:
        row (DataFrame row): Row of DataFrame with data of interacting Sulfur and Oxygen.
        
        helix (list): List of alpha helix in the structure.
        
    
    Returns:
        cap (list): Deatils of the C-Cap.
    """
    for hlx in helix:
        if row['IntResChain'] == hlx[0]:
            if int(row['IntResNo']) == hlx[2] - 1:
                cap = [row, 'C1-Cap', hlx]
            elif int(row['IntResNo']) == hlx[2] - 2:
                cap = [row, 'C2-Cap', hlx]
            elif int(row['IntResNo']) == hlx[2] - 3:
                cap = [row, 'C3-Cap', hlx]
            else:
                pass
    return cap

In [None]:
def seq_run(row):
    """
    Running functions defined in cells before sequentially.
    
    Args:
        row (DataFrame row): Row of DataFrame with data of interacting Sulfur and Oxygen.
    
    Returns:
        result (DataFrame): A dataframe with details of distance and directional criterion
                            of the residues from C-Cap.
    """   
    cols = ['file', 'Met_Chain', 'Met_ResNo', 'IntResChain', 'IntResNo', 'IntAtomID', 'distance',
            'delta', 'theta', 'distance_ON', 'CapType', 'HelixChainID', 'HelixRes1', 'HelixResLast']
    result = pd.DataFrame(columns=cols)
    O, N = coordinates(row)
    
    dist_ON = distance_ON(O, N)
    
    if dist_ON >= 3.5:
        helix = Helix(row)
        cap = cap_find(row, helix)
        res = [row['file'], row['Met_Chain'], row['Met_ResNo'], row['IntResChain'], row['IntResNo'], 
                  row['IntAtomID'], row['distance'], row['delta'], row['theta'], dist_ON, cap[1]] + cap[-1]
        result.loc[len(result)] = res
    return result

results = pd.DataFrame()
for i, row in contacts.iterrows():
    if (-50 <= row['delta'] <= 50) and (115 <= row['theta'] <= 155):
        result = seq_run(row)
        results = results.append(result, ignore_index=True)

results.to_csv('CSC_C-Caps.txt', sep='\t', index=False)