In [22]:
import numpy as np
import mdtraj as md
import matplotlib.pyplot as plt
import nglview as nv

import openmm as mm
import openmm.app as app
import openmm.unit as unit
from mdtraj.reporters import HDF5Reporter
import copy
import nglview as nv
import random

# # Path: pymdna/__init__.py, prototype of the package and atomic is not properly referenced in the package at genertors.py now I just explicitly define the path loction
# import pymdna as mdna
import sys
sys.path.append('/Users/thor/surfdrive/Projects/pymdna/')
import pymdna as mdna 
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [5]:
from Bio.SVDSuperimposer import SVDSuperimposer

def get_base_vectors(res):
       """Compute base vectors from reference base."""
       ref_base = mdna.ReferenceBase(res)
       return np.array([ref_base.b_R, ref_base.b_L, ref_base.b_D, ref_base.b_N]).swapaxes(0,1)

def get_rot_mat_trans(x,y):
    # load super imposer
    sup = SVDSuperimposer()

    # Set the coords, y will be rotated and translated on x
    sup.set(x, y)

    # Do the leastsquared fit
    sup.run()

    # Get the rms
    rms = sup.get_rms()

    # Get rotation (right multiplying!) and the translation
    rot, tran = sup.get_rotran()
    return rot, tran

# Function to calculate positions from origin and vectors
def calculate_positions(triad):
    
    origin = triad[0]
    vectors = triad[1:]
    # Each row in vectors is added to the origin to get the end position
    end_positions = origin + vectors
    # Combine the origin with these end positions
    positions = np.vstack([origin, end_positions])
    return positions


def align_to_ref(traj):

    ref = np.array([[0,0,0.0],[1,0,0],[0,1,0],[0,0,1]])
    vectors = get_base_vectors(traj)
    positions = calculate_positions(vectors[0])
    ref_position =  calculate_positions(ref)
    rot, tran = get_rot_mat_trans(ref_position,positions)
    new_xyz = np.dot(traj.xyz[0], rot) + tran
    traj.xyz[0] = new_xyz
    return traj





## Hachimoji

In [6]:
hachimojis = [md.load(mdna.utils.get_data_file_path(f"atomic/hachimoji/6mi{letter}.pdb")).remove_solvent() for letter in ['g','h','k']]
targets = ['1WA6', 'IGU7','JSP10','1W511','1WA5','IGU7','JSP10','1W512','1WA6','1WA7','JSP8','1W510','1W511']
# targets = ['1WA5', 'IGU7','JSP8','1W510']
residues = []
for hachimoji in hachimojis:
    top = hachimoji.topology
    for c in top.chains:
        for r in c.residues:
            if str(r) in targets:
                indices = [a.index for a in r.atoms]
                res = hachimoji.atom_slice(indices)
                residues.append(res)



# Compute base vectors for each base (origin, right, left, normal)
bases = {'1WA':{'res':[],'triad':[]}, '1W5':{'res':[],'triad':[]}, 'IGU':{'res':[],'triad':[]}, 'JSP':{'res':[],'triad':[]}}
unique_bases = list(bases.keys())
for _,res in enumerate(residues):
    base_vectors = get_base_vectors(res)
    res_name = str(res.topology.chain(0).residue(0).name)
    if res_name in bases.keys():
        if len (bases[res_name]['res']) == 0:
            bases[res_name]['res'].append(res)
            bases[res_name]['triad'].append(base_vectors)

print('Number of bases:', {k:len(v['res']) for k,v in bases.items()})


# Align all bases to a reference frame
ref = np.array([[0,0,0.0],[1,0,0],[0,1,0],[0,0,1]])

for name, residues in bases.items():
  
    positions = np.array([np.squeeze(calculate_positions(triad[0]))for triad in residues['triad']])
    ref_position = np.array(calculate_positions(ref))

    count = 0 
    for res, pos in zip(residues['res'], positions):
        rot, tran = get_rot_mat_trans(ref_position,pos)
        new_xyz = np.dot(res.xyz[0], rot) + tran # + np.array([0,1,0])*count
        res.xyz[0] = new_xyz

        # bases[name]['res'][count] = res
        count+=1


def stack_bases(key, bases):
    new = md.Trajectory(xyz=bases[key]['res'][0].xyz, topology=bases[key]['res'][0].topology)
    for traj in bases[key]['res'][1:]:
        new = new.stack(traj)
    return new

trajs = [stack_bases(key, bases) for key in unique_bases]

HM_bases = md.Trajectory(trajs[0].xyz, trajs[0].topology)
for _,traj in enumerate(trajs[1:],1):
    #traj.xyz[0] += np.array([1,0,0])*_
    HM_bases = HM_bases.stack(traj)

view = nv.show_mdtraj(HM_bases)
view.clear()
a = HM_bases.top.select('resname "1WA"')
b = HM_bases.top.select('resname "1W5"')
c = HM_bases.top.select('resname IGU')
d = HM_bases.top.select('resname JSP')

view.add_representation('licorice', selection=a, color='blue')
view.add_representation('licorice', selection=b, color='red')
view.add_representation('licorice', selection=c, color='green')
view.add_representation('licorice', selection=d, color='orange')
view

base_type B
"C1'"
[array([12])]
N9
[array([11])]
C4
[array([5])]
base_type A
"C1'"
[array([10])]
N9
[array([11])]
C4
[array([21])]
base_type Z
"C1'"
[array([1])]
C1
[array([0])]
C2
[array([2])]
base_type S
"C1'"
[array([10])]
C1
[array([2])]
C2
[array([3])]
base_type B
"C1'"
[array([12])]
N9
[array([11])]
C4
[array([5])]
base_type A
"C1'"
[array([10])]
N9
[array([11])]
C4
[array([21])]
base_type Z
"C1'"
[array([1])]
C1
[array([0])]
C2
[array([2])]
base_type S
"C1'"
[array([10])]
C1
[array([2])]
C2
[array([3])]
base_type B
"C1'"
[array([12])]
N9
[array([11])]
C4
[array([5])]
base_type B
"C1'"
[array([12])]
N9
[array([11])]
C4
[array([5])]
base_type Z
"C1'"
[array([1])]
C1
[array([0])]
C2
[array([2])]
base_type S
"C1'"
[array([10])]
C1
[array([2])]
C2
[array([3])]
base_type S
"C1'"
[array([10])]
C1
[array([2])]
C2
[array([3])]
Number of bases: {'1WA': 1, '1W5': 1, 'IGU': 1, 'JSP': 1}


NGLWidget()

In [7]:
mapper = {'1WA':'B','1W5':'S','IGU':'P','JSP':'Z'}
for res in HM_bases.top._residues:
    indices = [a.index for a in res.atoms]
    subtraj = HM_bases.atom_slice(indices)
    print('original',subtraj.top)
    subtraj.save(f'BDNA_{mapper[res.name]}.h5')
    subtraj.save(f'BDNA_{mapper[res.name]}.pdb')
    test = md.load_hdf5(f'BDNA_{mapper[res.name]}.h5')
    print('loaded  ',test.top)

original <mdtraj.Topology with 1 chains, 1 residues, 22 atoms, 24 bonds>
loaded   <mdtraj.Topology with 1 chains, 1 residues, 22 atoms, 24 bonds>
original <mdtraj.Topology with 1 chains, 1 residues, 22 atoms, 23 bonds>
loaded   <mdtraj.Topology with 1 chains, 1 residues, 22 atoms, 23 bonds>
original <mdtraj.Topology with 1 chains, 1 residues, 22 atoms, 24 bonds>
loaded   <mdtraj.Topology with 1 chains, 1 residues, 22 atoms, 24 bonds>
original <mdtraj.Topology with 1 chains, 1 residues, 20 atoms, 21 bonds>
loaded   <mdtraj.Topology with 1 chains, 1 residues, 20 atoms, 21 bonds>


## Artificial hydrophobic interaction base

In [8]:

pdb = md.load(mdna.utils.get_data_file_path(f"atomic/artificial_basepair/4c8m.pdb")).remove_solvent()

dsSICS_indices = pdb.top.select('resname LHO')
dNaM_indices = pdb.top.select('resname BMN')

SICS = pdb.atom_slice(dsSICS_indices)
NaM = pdb.atom_slice(dNaM_indices)
print(SICS.top, NaM.top)
traj = SICS.stack(NaM)
res_a = mdna.ReferenceBase(SICS)
res_b = mdna.ReferenceBase(NaM)


# Align all bases to a reference frame
ref = np.array([[0,0,0],[1,0,0],[0,1,0],[0,0,1]])

base_vectors_a = get_base_vectors(SICS)
base_vectors_b = get_base_vectors(NaM)

positions_a = calculate_positions(base_vectors_a[0])
positions_b = calculate_positions(base_vectors_b[0])

ref_position = calculate_positions(ref)

UBPs = [SICS, NaM]
count = 0
for res,pos in zip([res_a, res_b],[positions_a, positions_b]):
    rot, tran = get_rot_mat_trans(ref_position,pos)
    new_xyz = np.dot(UBPs[count].xyz[0], rot) + tran
    UBPs[count].xyz[0] = new_xyz
    count+=1

traj = SICS.stack(NaM)

view = nv.show_mdtraj(traj)
view.clear()
view.add_representation('licorice')
view


<mdtraj.Topology with 1 chains, 1 residues, 38 atoms, 40 bonds> <mdtraj.Topology with 1 chains, 1 residues, 40 atoms, 42 bonds>
base_type L
"C1'"
[array([12])]
N1
[array([2])]
C5
[array([7])]
base_type M
"C1'"
[array([7])]
C1
[array([0])]
C6
[array([6])]
base_type L
"C1'"
[array([12])]
N1
[array([2])]
C5
[array([7])]
base_type M
"C1'"
[array([7])]
C1
[array([0])]
C6
[array([6])]


NGLWidget()

In [9]:
mapper = {'LHO':'L','BMN':'M'}
for res in traj.top._residues:
    indices = [a.index for a in res.atoms]
    subtraj = traj.atom_slice(indices)
    print(subtraj.top)
    subtraj.save(f'BDNA_{mapper[res.name]}.h5')
    subtraj.save(f'BDNA_{mapper[res.name]}.pdb')
    test = md.load_hdf5(f'BDNA_{mapper[res.name]}.h5')
    print(test.top)


  

<mdtraj.Topology with 1 chains, 1 residues, 38 atoms, 40 bonds>
<mdtraj.Topology with 1 chains, 1 residues, 38 atoms, 40 bonds>
<mdtraj.Topology with 1 chains, 1 residues, 40 atoms, 42 bonds>
<mdtraj.Topology with 1 chains, 1 residues, 40 atoms, 42 bonds>


## Fluorescent Tetra cytosine

In [10]:
pdb = md.load(mdna.utils.get_data_file_path(f"atomic/fluorescent/1tuq.pdb")).remove_solvent()[0]
ref = np.array([[0,0,0.0],[1,0,0],[0,1,0],[0,0,1]])
for res in pdb.top.residues:
    if str(res.name) == 'TC1':
        indices = [at.index for at in res.atoms]
        base = pdb.atom_slice(indices)
        base = align_to_ref(base)
        
print(base.top)
base.save('BDNA_D.h5')
base.save('BDNA_D.pdb')
view = nv.show_mdtraj(base)
view



base_type D
"C1'"
[array([8])]
N1
[array([10])]
C2
[array([11])]
base_type D
"C1'"
[array([8])]
N1
[array([10])]
C2
[array([11])]
<mdtraj.Topology with 1 chains, 1 residues, 39 atoms, 42 bonds>




NGLWidget()

### 2 Amino purine

In [11]:
pdb = md.load(mdna.utils.get_data_file_path(f"atomic/fluorescent/2kv0.pdb")).remove_solvent()[0]
for res in pdb.top.residues:
    if str(res.name) == '2PR':
        indices = [at.index for at in res.atoms]
        base = pdb.atom_slice(indices)
        base = align_to_ref(base)
        
print(base.top)
base.save('BDNA_E.h5')
base.save('BDNA_E.pdb')
view = nv.show_mdtraj(base)
view

base_type E
"C1'"
[array([10])]
N9
[array([11])]
C4
[array([20])]
<mdtraj.Topology with 1 chains, 1 residues, 32 atoms, 34 bonds>




NGLWidget()

### Canonical Bases

In [18]:
pdb = md.load(mdna.utils.get_data_file_path(f"atomic/ddd/1bna.pdb")).remove_solvent()
letters = ['A','T','G','C','U']
A = False
G = False
C = False
T = False
for res in pdb.top._residues[2:]:
    
    if str(res.name) == 'DA' and not A:
        print(res)
        A = True
        indices = [at.index for at in res.atoms]
        base = pdb.atom_slice(indices)
        base = align_to_ref(base)
        print(base.top)
        base.save(f'BDNA_A.h5')
        base.save(f'BDNA_A.pdb')
        test = md.load_hdf5(f'BDNA_A.h5')
        print(test.top)
    elif str(res.name) == 'DT' and not T:
        print(res)
        T = True
        indices = [at.index for at in res.atoms]
        base = pdb.atom_slice(indices)
        base = align_to_ref(base)
        print(base.top)
        base.save(f'BDNA_T.h5')
        base.save(f'BDNA_T.pdb')
        test = md.load_hdf5(f'BDNA_T.h5')
        print(test.top)
    elif str(res.name) == 'DG' and not T:
        print(res)
        G = True
        indices = [at.index for at in res.atoms]
        base = pdb.atom_slice(indices)
        base = align_to_ref(base)
        print(base.top)
        base.save(f'BDNA_G.h5')
        base.save(f'BDNA_G.pdb')
        test = md.load_hdf5(f'BDNA_G.h5')
        print(test.top)
    elif str(res.name) == 'DC' and not T:
        print(res)
        C = True
        indices = [at.index for at in res.atoms]
        base = pdb.atom_slice(indices)
        base = align_to_ref(base)
        print(base.top)
        base.save(f'BDNA_C.h5')
        base.save(f'BDNA_C.pdb')
        test = md.load_hdf5(f'BDNA_C.h5')
        print(test.top)
    


DC3
<mdtraj.Topology with 1 chains, 1 residues, 19 atoms, 20 bonds>
<mdtraj.Topology with 1 chains, 1 residues, 19 atoms, 20 bonds>
DG4
<mdtraj.Topology with 1 chains, 1 residues, 22 atoms, 24 bonds>
<mdtraj.Topology with 1 chains, 1 residues, 22 atoms, 24 bonds>
DA5
<mdtraj.Topology with 1 chains, 1 residues, 21 atoms, 23 bonds>
<mdtraj.Topology with 1 chains, 1 residues, 21 atoms, 23 bonds>
DT7
<mdtraj.Topology with 1 chains, 1 residues, 20 atoms, 21 bonds>
<mdtraj.Topology with 1 chains, 1 residues, 20 atoms, 21 bonds>


In [21]:
view = nv.show_mdtraj(pdb)
view.clear()
view.add_representation('licorice')
view

NGLWidget()

In [12]:
letters = ['A','T','G','C','U']
bases = [md.load(mdna.utils.get_data_file_path(f"atomic/BDNA_{letter}.pdb")).remove_solvent() for letter in letters]

for _,base in enumerate(bases):
    base = align_to_ref(base)
    print(base.top)
    base.save(f'BDNA_{letters[_]}.h5')
    base.save(f'BDNA_{letters[_]}.pdb')
    test = md.load_hdf5(f'BDNA_{letters[_]}.h5')
    print(test.top)


base_type A
"C1'"
[array([10])]
N9
[array([11])]
C4
[array([20])]
<mdtraj.Topology with 1 chains, 1 residues, 21 atoms, 23 bonds>
<mdtraj.Topology with 1 chains, 1 residues, 21 atoms, 23 bonds>
base_type T
"C1'"
[array([10])]
N1
[array([11])]
C2
[array([12])]
<mdtraj.Topology with 1 chains, 1 residues, 20 atoms, 21 bonds>
<mdtraj.Topology with 1 chains, 1 residues, 20 atoms, 21 bonds>
base_type G
"C1'"
[array([10])]
N9
[array([11])]
C4
[array([21])]
<mdtraj.Topology with 1 chains, 1 residues, 22 atoms, 24 bonds>
<mdtraj.Topology with 1 chains, 1 residues, 22 atoms, 24 bonds>
base_type C
"C1'"
[array([10])]
N1
[array([11])]
C2
[array([12])]
<mdtraj.Topology with 1 chains, 1 residues, 19 atoms, 20 bonds>
<mdtraj.Topology with 1 chains, 1 residues, 19 atoms, 20 bonds>
base_type U
"C1'"
[array([10])]
N1
[array([11])]
C2
[array([12])]
<mdtraj.Topology with 1 chains, 1 residues, 19 atoms, 20 bonds>
<mdtraj.Topology with 1 chains, 1 residues, 19 atoms, 20 bonds>
