In [None]:
import numpy as np
import pandas as pd
import formulas
import os

In [None]:
#path to directory where pdb files are stored.
path = r'str'

#making list of files to be analyzed.
files = []
with open('txt file with list of pdb files', 'r') as f:
    for file in f:
        files.append(os.path.join(path, file[:-1]))

In [None]:
def coordinates1(file):
    """
    Extracting data from pdb as DataFrame.
    
    Args:
        file (str): pdb_file_name.
        
    Returns:
        dfs (list): A list of DataFrames containing coordinates of 
                         desired atoms.
    """
    df = formulas.read_pdb(file)
    SG = df[df['ATOM ID'] == "SG"]
    CB = df[df['ATOM ID'] == "CB"]
    O = df[df['Atm'] == "O"]
    N = df[df['Atm'] == "N"]
    dfs = [SG, CB, O, N]
    return dfs

In [None]:
def coordinates2(file):
    """
    Extracting water molecules data from pdb file as DataFrame.
    
    Args:
        file (str): pdb_file_name.
        
    Returns:
        OH (DataFrame): DataFrame containing water molecule data.
        MT (DataFrame): DataFrame containing metal ion data.
    """
    metals = ['FE', 'ZN', 'AU', 'CU', 'HG', 'AS', 'CD', 'CO', 'NI', 'PT',
              'SE', 'MO', 'MN', 'K', 'CA', 'PB', 'MG', 'F', 'SM', 'GA', 'SN']
    OH = formulas.read_pdb_water(file)
    MT = formulas.read_pdb_metals(file)
    return OH, MT

In [None]:
def s_m(dfs):
    """
    Extracting water molecules data from pdb file as DataFrame.
    
    Args:
        dfs (list): A list of DataFrames containing coordinates of desired atoms.
        
    Returns:
        cys_m (list): A list of Cys and metal forming coordinate bond.
    """
    cys_m = []
    SG, CB, O, N, OH, MT = dfs
    for i1, r1 in SG.iterrows():
        for i2, r2 in MT.iterrows():
            p1 = np.array([r1['X AXIS'], r1['Y AXIS'], r1['Z AXIS']])
            p2 = np.array([r2['X AXIS'], r2['Y AXIS'], r2['Z AXIS']])
            distance = formulas.eu_distance(p1, p2)
            if 1.9 < distance < 2.8:
                cys_m.append([r1, r2, distance])
            else:
                pass
    return cys_m

In [None]:
def find_contacts(dfs, cys_m):
    """
    Searching for contacts between SG and (O, N, OH).
    
    Args:
        dfs (list): A list of DataFrames containing coordinates of desired atoms.
        
    Returns:
        contacts (list): list with details of atoms making contact with SG.
    """
    contacts = []
    SG, CB, O, N, OH, MT = dfs
    for cm in cys_m:
        S = cm[0]
        p1 = np.array([S['X AXIS'], S['Y AXIS'], S['Z AXIS']])
        intra = [(S['RES SEQ'], S['RES INSERT']), (S['RES SEQ'], S['RES INSERT'] + 1),
                 (S['RES SEQ'], S['RES INSERT'] - 1)]
        for i, r in O.iterrows():
            if not (r['RES SEQ'], r['RES INSERT']) in intra:
                p2 = np.array([r['X AXIS'], r['Y AXIS'], r['Z AXIS']])
                distance = formulas.eu_distance(p1, p2)
                if distance <= 3.32:
                    contact = cm[:-1] + [r, cm[-1], distance]
                    contacts.append(contact)
                else:
                    pass
        for i, r in N.iterrows():
            if not (r['RES SEQ'], r['RES INSERT']) in intra:
                p2 = np.array([r['X AXIS'], r['Y AXIS'], r['Z AXIS']])
                distance = formulas.eu_distance(p1, p2)
                if distance <= 3.35:
                    contact = cm[:-1] + [r, cm[-1], distance]
                    contacts.append(contact)
                else:
                    pass
        for i, r in OH.iterrows():
            p2 = np.array([r['X AXIS'], r['Y AXIS'], r['Z AXIS']])
            distance = formulas.eu_distance(p1, p2)
            if distance <= 3.32:
                contact = cm[:-1] + [r, cm[-1], distance]
                contacts.append(contact)
            else:
                pass
    return contacts

In [None]:
def angles(dfs, contact):
    """
    Searching for contacts between SD and (O, N, OH).
    
    Args:
        dfs (list): A list of DataFrames containing coordinates of desired atoms.
        contact (list): list of details of atom making  contact with SD.
        
    Returns:
        angles_list (list): list of calculated angles (theta-delta) to study directionality
                            of the interaction.
    """
    SG, CB, O, N, OH, MT = dfs
    S1 = contact[0]
    M = contact[1]
    O = contact[2]
    C1 = CB.loc[(CB["RES SEQ"] == S1["RES SEQ"]) & (CB["RES INSERT"] == S1["RES INSERT"])]
    C1 = C1.reset_index(drop=True)
    p1 = np.array([S1['X AXIS'], S1['Y AXIS'], S1['Z AXIS']])
    p4 = np.array([O['X AXIS'], O['Y AXIS'], O['Z AXIS']])
    p2 = np.array([C1['X AXIS'].iloc[0], C1['Y AXIS'].iloc[0], C1['Z AXIS'].iloc[0]])
    p3 = np.array([M['X AXIS'], M['Y AXIS'], M['Z AXIS']])
    delta, theta = formulas.theta_phi(p1, p2, p3, p4)
    angles_list = [delta, theta]
    return angles_list

In [None]:
def seq_run(file):
    """
    Running functions defined in cells before sequentially.
    
    Args:
        file (str): pdb_file_name.
    
    Returns:
        result (DataFrame): A dataframe with details of distance and directional criterion
                             of the residues from beta-turn.
    """
    cols = ['file', 'Cys_Chain', 'Cys_ResNo', 'Metal', 'Metal_Atom_No', 'IntResChain', 'IntResNo', 
            'IntAtom', 'IntAtomID', 'IntRes', 'SG-Metal_distance', 'distance', 'delta', 'theta']
    result = pd.DataFrame(columns=cols)
    dfs = coordinates1(file)
    OH, MT = coordinates2(file)
    dfs.append(OH)
    dfs.append(MT)
    cys_m = s_m(dfs)
    contacts = find_contacts(dfs, cys_m)
    for contact in contacts:
        angles_list = angles(dfs, contact)
        res = [file[-8:], contact[0]['RES SEQ'], contact[0]['RES INSERT'], contact[1]['Atm'], 
               contact[1]['ATOM NO'], contact[2]['RES SEQ'], contact[2]['RES INSERT'], contact[2]['Atm'], 
               contact[2]['ATOM ID'], contact[2]['RESIDUE'], contact[3], contact[4]] + angles_list
        result.loc[len(result)] = res
    return result

results = pd.DataFrame()
for file in files:
    result = seq_run(file)
    results = results.append(result, ignore_index=True)
    
results.to_csv('CSC_data.txt', sep='\t', index=False)