In [2]:
import mdtraj as md
import matplotlib.pyplot as plt
from numpy import dot
import nglview as nv
import numpy as np

# https://biopython.org/docs/1.74/api/Bio.SVDSuperimposer.html
from Bio.SVDSuperimposer import SVDSuperimposer

# Load H-NS dimers
loc_dimers = './data/dimer_pdbs/'
dimers = md.join([md.load(loc_dimers+f'run{i}.pdb') for i in range(0,16)])

# Load H-NS DBD attached to DNA
loc_complexes = './data/complex_traj/'
complexes = md.load(loc_complexes+f'DBD_complex.xtc', top=loc_complexes+f'DBD_complex.pdb').remove_solvent()



In [3]:
# Extract xyz coordinates of chosen frame for complex and dimer
def extract_coords(traj):
    return traj.xyz[0].T

# Get indices of atoms to fit, in this case only the last 48 residues, which coincide with the DBD
def get_index(molecule, chain_idx, atoms):
    return [at.index for res in molecule.top._chains[chain_idx]._residues[-atoms:] for at in res.atoms]

def slice_transpose(molecule,molecule_idx):
    return molecule.atom_slice(molecule_idx).xyz[0].T

def superimpose(coords_a, coords_b):
    sup = SVDSuperimposer()

    # Set the coords y will be rotated and translated on x
    x = coords_a.T
    y = coords_b.T
    sup.set(x, y)

    # Do the lsq fit
    sup.run()

    # Get the rms
    rms = sup.get_rms()

    # Get rotation (right multiplying!) and the translation
    rot, tran = sup.get_rotran()

    # Rotate y on x
    y_on_x1 = dot(y, rot) + tran

    # Same thing
    y_on_x2 = sup.get_transformed()
    
    return(rms, rot, tran)

def fit(complexes, dimers, monomer, frame_dm, frame_dbd, fit_selection):
    # van fit selection een lijst van indeces
    dbd = complexes[frame_dbd]
    dimer = dimers[frame_dm]
    
    xyz_dbd = extract_coords(dbd)
    xyz_dimer = extract_coords(dimer)
    
    # getal 2 om dbd te kiezen dat gebonden is aan dna en niet dna zelf 
    dbd_idx  = get_index(complexes, 2, fit_selection)
    # print("dbd_idx")
    # print(len(dbd_idx))
    # print(dbd_idx)
    monomer_idx  = get_index(dimers, monomer, fit_selection)
    # print("monomer_idx")
    # print(len(monomer_idx))
    # print(monomer_idx)
    
    atom_set_dbd = slice_transpose(dbd, dbd_idx)
    atom_set_dimer = slice_transpose(dimer, monomer_idx)
    
    si_result = superimpose(atom_set_dbd, atom_set_dimer)

    rms = si_result[0]
    rot = si_result[1]
    tran = si_result[2]
    
    a = xyz_dbd.T
    b = xyz_dimer.T

    # rotate b on a
    b_on_a1 = dot(b, rot) + tran

    # replace coordinates 
    dimers.xyz[frame_dm] = b_on_a1
    
    fitted = complexes[0].stack(dimers[frame_dm])
    
    selection = [at.index for idx,chain in enumerate(fitted.top.chains) if idx != 2 for at in chain.atoms]
    # probably easiest to use atomslice to remove/delete monomer
    final = fitted.atom_slice(selection)
    
    # return final
    return final, rms

# nomenclature for variables is not general but at the moment specifically for dbd and dimer because that is
# what's being loaded in to this program. This can easily be changed to a more general nomenclature when needed
#
# usage: main(complexes, dimers, monomer, frame_dm, frame_dbd, fit_selection)
# where 
# complexes: trajectory object of target for fit
# dimers: trajectory object of subject to fit on target
# monomer: which monomer of the dimer will be used for fitting (0 or 1)
# frame_dm: which frame of the dimer trajectory will be used
# frame_dbd: which frame of the target will be used
# fit_selection: which atoms of the monomer will be used for the fitting

# final = fit(complexes, dimers, 0, 11, 0, 48)
# final_traj = final[0]
# final_traj

all_rms = []
counter = 0
max_rms = 0
traj_max_rms = 0
trajs = []

for target_frame in range(1):
    for dimer_frame in range(16):
        for monomer in range(2):
            traj, rms = fit(complexes, dimers, monomer, dimer_frame, target_frame, 48)
            trajs.append(traj)
            all_rms.append(rms)
            # if rms > max_rms:
            #     traj_max_rms = rms
            if counter % 30 == 0:
                print(counter)
            
            counter += 1
            
# print(max(all_rms))
# plt.hist(all_rms, bins = 20)
# print(traj_max_rms)
# final = traj_max_rms
# view = nv.show_mdtraj(traj_max_rms)
# view
# print(trajs)


            

0
30


In [4]:
# make one trajectory of multiple frames with worst fit as first frame and best as last
all_trajs = md.join(trajs, check_topology = True, discard_overlapping_frames = False)
all_trajs = all_trajs[np.argsort(all_rms)]



In [4]:
print(all_rms)

[0.6911098171704878, 1.1583296517598691, 0.447661086614389, 1.1048988096807257, 0.6339176596379676, 1.1038043343228565, 0.41999531783371147, 1.1954009042343756, 0.4957010497179811, 1.1252495560229114, 0.5200595028810081, 1.1539799246800364, 0.5358679903838062, 1.1483413029800205, 0.5535625302282154, 1.1840230655684243, 0.44061448416187743, 1.1113855100403254, 0.5083175524300924, 1.321919464314594, 0.2901319844898188, 1.156401330545798, 0.39875185214701325, 1.082289764737237, 0.4242540030068672, 1.1882871619079194, 0.40682508482227414, 1.153662055230801, 0.47518115714494524, 1.1979574518179543, 0.48027569754770555, 1.1012447012809723]


In [5]:
bad_indices = np.random.randint(low=0, high=5000, size=100)

In [6]:
# for atom in final_traj.xyz
view = nv.show_mdtraj(all_trajs)
# view.clear()#
view.add_representation('ball+stick',selection=bad_indices,color='red',size=100)
view

NGLWidget(max_frame=31)

In [7]:
dna_idx = all_trajs.topology.select("not protein")
dimer_idx = all_trajs.topology.select("protein")

pairs = [[i, j] for i in dna_idx for j in dimer_idx]
distances = md.compute_distances(all_trajs, pairs)


In [8]:
print(distances.shape)

(32, 5548960)


In [9]:
# find original minimum distance
# # te grote dataset
# print(complexes)
# dna_idx_og = complexes.topology.select("not protein")
# dimer_idx_og = complexes.topology.select("protein")
# og_pairs = [[i, j] for i in dna_idx_og for j in dimer_idx_og]
# og_distances = md.compute_distances(complexes, og_pairs)
frame = complexes[0]
dna_idx_og = frame.topology.select("not protein")
dimer_idx_og = frame.topology.select("protein")
og_pairs = [[i, j] for i in dna_idx_og for j in dimer_idx_og]
og_distances = md.compute_distances(frame, og_pairs)

In [17]:
# 915 atoms in dimer make pairs with 1264 atoms of dna
# pairs is a list of lists, every index is a list containing to integers
# the integers are the indexes of the atoms that make the pair
print(og_distances.shape)
print(len(og_pairs))

print()

print(og_pairs[0])
print(og_pairs[100])
print(og_pairs[1000])

print()

print(len(dna_idx_og))
print(len(dimer_idx_og))

print()

print(type(og_pairs))
print(type(og_pairs[0]))
print(type(og_pairs[0][0]))

print(og_distances[0])
print(len(og_distances[0]))
print(len(og_distances))

(1, 1156560)
1156560

[0, 1264]
[0, 1364]
[1, 1349]

1264
915

<class 'list'>
<class 'list'>
<class 'numpy.int64'>
[3.0983002 3.1295998 3.1285558 ... 4.855035  4.7650557 4.8606434]
1156560
1


In [32]:
min_dist_og = np.min(og_distances)
print(min_dist_og)
print("rest")

for x in range(len(og_distances)): 
    smallest_distance = np.min(og_distances[x])
    smallest_distance_idx = np.argmin(og_distances[x])
    print(og_pairs[smallest_distance_idx])
    print(smallest_distance_idx)
    print(smallest_distance)

0.17759518
rest
[189, 1357]
173028
0.17759518


In [44]:
# distances is 1D list of distances
# pairs is list of tuples/lists containing a pair of atom indices
# top is the topology which is analyzed
def overlap(distances, pairs, top):
    # give n smallest distances
    # smallest_distances = 
    sml_dist = np.min(distances)
    sml_dist_idx = np.argmin(distances)
    pair = pairs[sml_dist_idx]
    atom_a = top.atom(pair[0])
    atom_b = top.atom(pair[1])
    
    return sml_dist, pair, atom_a, atom_b



In [55]:
test = overlap(og_distances[0], og_pairs, frame.topology)
print(test)
top.select('residue 8')
# top = frame.topology
# sml_dist = test[0]
# atom_a = test[1][0]
# # atom_a = frame.topology.select(f"atom {test[1][0]}")
# atom_a = top.atom(189)
# print(frame.topology)
# atom_b = test[1][1]
# print(test)
# print(sml_dist)
# print(atom_a)
# print(atom_b)

(0.17759518, [189, 1357], DC7-OP2, THR8-HG1)


array([ 217,  218,  219,  220,  221,  222,  223,  224,  225,  226,  227,
        228,  229,  230,  231,  232,  233,  234,  235,  236,  237,  238,
        239,  240,  241,  242,  243,  244,  245,  246,  247,  248,  849,
        850,  851,  852,  853,  854,  855,  856,  857,  858,  859,  860,
        861,  862,  863,  864,  865,  866,  867,  868,  869,  870,  871,
        872,  873,  874,  875,  876,  877,  878,  879,  880, 1346, 1347,
       1348, 1349, 1350, 1351, 1352, 1353, 1354, 1355, 1356, 1357, 1358,
       1359])

In [12]:
# oude onzin evt nog nodig:

#     md.compute
#     atom = final.xyz[frame_idx, atom_idx,:]
# print(atom[0])
# print(atom[2])
# print(type(atom))

# Get all coordinates of atoms
# Find out how coordinates relate to distance in Angstrom (min distance between atoms = 2 angstrom = 0.2 nm)
# Write script that can check for every atom of any atom is within a 2 angstrom distance apart from itself
### - apart from itself
### - save atom checked and atoms that are to close to it (maybe in a dict, key = checked atom, values = atoms that are too close
        
# # calculating the average distance between two atoms
# import mdtraj as md
# import numpy as np
# t = md.load('trajectory.h5')
# np.mean(np.sqrt(np.sum((t.xyz[:, 0, :] - t.xyz[:, 21, :])**2, axis=1)))

print(np.array((0, 1)))
for atom_dimer_idx in range(0, final.n_atoms - 5717):
    for atom_b_idx in range(final.n_atoms - 5700):
        if atom_a_idx != atom_b_idx:
            print(atom_a_idx)
            print(atom_b_idx)
            print(topology.atom(atom_a_idx))
            print(topology.atom(atom_b_idx))
            # atom_pair = topology.select_pairs(np.array(atom_a_idx), np.array(atom_b_idx))
            distance = md.compute_distances(final, np.array([[atom_a_idx, atom_b_idx]]))
            # distance = md.compute_distances(final, np.array(atom_pair))
            # distance = md.compute_distances(final, np.array(atom_a_idx, atom_b_idx))
            
            print(distance)
            
            # print(topology.atom(atom_a_idx))
            # print(topology.atom(atom_b_idx))
            # atom_pair = topology.select_pairs(np.array(atom_a_idx), np.array(atom_b_idx))
            # distance = md.compute_distances(topology, np.array(atom_pair))
            # distance = md.compute_distances(topology, np.array(atom_a_idx, atom_b_idx))

[0 1]


NameError: name 'final' is not defined