In [210]:
import numpy as np
from autoeisd.utils.parsers import get_backbone_pdb # borrowed from idpconfgen
from idpconfgen.libs.libcalc import calc_torsion_angles


In [211]:
import os
import time

In [214]:
from Bio.PDB import PDBParser as parser
import pandas as pd

In [215]:
cm = 1/2.54
np.random.seed(12)

In [23]:
asyn_seq = 'MDVFMKGLSKAKEGVVAAAEKTKQGVAEAAGKTKEGVLYVGSKTKEGVVHGVATVAEKTKEQVTNVGGAV\
VTGVTAVAQKTVEGAGSIAAATGFVKKDQLGKNEEGAPQEGILEDMPVDPDNEAYEMPSEEGYQDYEPEA' 
drk_seq = 'MEAIAKHDFSATADDELSFRKTQILKILNMEDDSNWYRAELDGKEGLIPSNYIEMKNHD'

In [217]:
# read JC exp file
exp = pd.read_csv('/home/oufan/Desktop/X-EISD/data/drk/experimental_data/drksh3_JC_exp_clean.txt')
# align torsion index as the first residue doesn't have phi torsion
resn = exp.resnum.values - 2

In [236]:
# read NOE exp file
exp = pd.read_csv('/home/oufan/Desktop/X-EISD/data/drk/experimental_data/8AAC_noes.txt')
res1 = exp.res1.values.astype(np.int)
atom1_name = exp.atom1.values
res2 = exp.res2.values.astype(np.int)
atom2_name = exp.atom2.values

multi1 = exp.atom1_multiple_assignments.values
multi2 = exp.atom2_multiple_assignments.values

In [6]:
#read PREs exp file
exp = pd.read_csv('/home/oufan/Desktop/X-EISD/data/asyn/experimental_data/asyn_pres.txt')
res1 = exp.res1.values.astype(np.int)
atom1_name = exp.atom1.values
res2 = exp.res2.values.astype(np.int)
atom2_name = exp.atom2.values

In [228]:
# read EFRET exp file (format modified)
# the EFRET exp file format is based on aSyn repository:
# column names: index, res1, res2, scaler, value
# where scaler is the r0 Foster radius of the dye pair
exp = pd.read_csv('/home/oufan/Desktop/X-EISD/data/asyn/experimental_data/asyn_efret.txt')
res1 = exp.res1.values.astype(np.int)
res2 = exp.res2.values.astype(np.int)
scaler = exp.scale.values

# generates smFRET back calculation data
fret_bc = []
p = parser()
for n in range(200):
    # read pdb files to back calculate
    pdb = 'local/recurrent_attention_residue/training_5/reinforce/prejc_pdbs/gen_%i.pdb'%(n+1)
    struct = p.get_structure('d', pdb)
    # assumes CA as atom labeled
    for j in range(exp.shape[0]):
        r1 = np.int(res1[j])
        r2 = np.int(res2[j])
        d = struct[0]['A'][r1]['CA'] - struct[0]['A'][r2]['CA'] 
        # 58 = abs(res1 - res2) # for drk 
        # scale_factor to adjust for dye size and CA to label distances
        scale_factor = ((np.abs(r1 - r2) + 7) / np.abs(r1 - r2)) ** 0.5
        d = d*scale_factor
        eff = 1.0/(1.0+(d/scaler[j])**6.0)
        #assert isinstance(eff, np.ndarray)
        fret_bc.append(eff)
    if n%100 ==0: time.sleep(5)
fret_bc = np.reshape(fret_bc, (-1, exp.shape[0]))


In [219]:
#PREs
p = parser()
pre_bc = []
jc_bc = []
for n in range(200):
    pdb = 'local/recurrent_attention_residue/training_25/reinforce/noejc2_pdbs/gen_%i.pdb'%(n+1)
    struct = p.get_structure('d', pdb)
    # calculates JC back calculation alphas
    coords = next(get_backbone_pdb(pdb, 1))[0]
    alpha = np.cos(calc_torsion_angles(coords)[2::3][resn] - np.radians(60))
    jc_bc.append(alpha)
    # fetch PRE distances
    dist = []
    for j in range(exp.shape[0]):
        r1 = np.int(res1[j])
        r2 = np.int(res2[j])
        for atom in struct[0]['A'][r1]:            
            if atom1_name[j] == 'H':
                atom1 = struct[0]['A'][r1]['H']
                break
            if atom1_name[j] in atom.get_name():
                atom1 = atom
                break
        for atom in struct[0]['A'][r2]:
            if atom2_name[j] == 'H':
                atom2 = struct[0]['A'][r2]['H']
                break
            if atom2_name[j] in atom.get_name():
                atom2 = atom
                break
        #print(atom1_list, r1, atom1_name[j])

        dist.append(atom1-atom2)
        
    pre_bc.append(dist)
    if n%50 == 0 : time.sleep(5)

In [257]:
#NOES
p = parser()
noe_bc = []
for n in range(200):
    pdb = 'local/recurrent_attention_residue/training_25/reinforce/noejc2_pdbs/gen_%i.pdb'%(n+1)
    struct = p.get_structure('d', pdb)
    dist = []
    for j in range(exp.shape[0]):
        r1 = np.int(res1[j])
        r2 = np.int(res2[j])
        atom1_list = []
        atom2_list = []
        for atom in struct[0]['A'][r1]:            
            if atom1_name[j] == 'H':
                atom1_list.append(struct[0]['A'][r1]['H'])
                break
            if atom1_name[j] in atom.get_name():
                atom1_list.append(atom)
            if len(atom1_list) == 2: break
            if not multi1[j] and len(atom1_list)==1:
                break
        for atom in struct[0]['A'][r2]:
            if atom2_name[j] == 'H':
                atom2_list.append(struct[0]['A'][r2]['H'])
                break
            if atom2_name[j] in atom.get_name():
                atom2_list.append(atom)
            if len(atom2_list) == 2: break
            if not multi2[j] and len(atom2_list)==1:
                break
        #print(atom1_list, r1, atom1_name[j])
        combos = 0.0
        num_combos = 0
        # handles multiple atom assignments by <r^-6>
        for first_atom in atom1_list:
            for second_atom in atom2_list:
                combos += (first_atom - second_atom)**(-6.)
                num_combos += 1

        dist.append((combos/float(num_combos))**(-1/6))
        
    noe_bc.append(dist)
    if n%50==0: time.sleep(5)

In [260]:
# save back calculations as csv
#old_alphas = pd.read_csv('/home/oufan/Desktop/X-EISD/data/drk/back_calc_data/ML/rl_noe.txt', header=None, index_col=0)
#new = np.vstack((old_alphas.values, np.array(noe_bc)))
new_alphas = pd.DataFrame(np.array(noe_bc), index=None) 
new_alphas.to_csv('/home/oufan/Desktop/X-EISD/data/drk/back_calc_data/ML/rl_noe.txt', header=None)
