# Base-linker generation

In [34]:
import pandas as pd
from biopandas.mol2 import PandasMol2
import fluordynamics as fd
import os

In [35]:
import importlib
importlib.reload(fd.ff)

<module 'fluordynamics.ff' from '/mnt/c/Users/fsteffen/Github/gromacs-tools/fluordynamics/ff.py'>

In [4]:
cmd_gui = fd.jupyter.connect2pymol()

## Labeling on C5 of deoxythymidine

In [407]:
names_methylene = ['C7','H01','H02']

base_resn = ('deoxythymidine', 'DTM')
#base_resn = ('deoxythymidine', 'RUM') # use deoxythymidine to make RNA fragment mimic (after labeling the RNA sugar of the target will be retained) 

cmd_gui.reinitialize()
cmd_gui.load('../fragments/bases/out/{}.mol2'.format(base_resn[0]))
cmd_gui.load('../fragments/linkers/MLE/out/MLE_capped_resp.mol2')
cmd_gui.remove('MLE_capped_resp and name {}'.format('+'.join(str(i) for i in names_methylene)))
cmd_gui.remove('{} and (name H71 or name H72)'.format(base_resn[0]))
cmd_gui.fuse('{} and name C7'.format(base_resn[0]), 'MLE_capped_resp and name C8 and resn MLE')
cmd_gui.delete('{}'.format(base_resn[0]))
cmd_gui.alter('all', 'type="ATOM"')
cmd_gui.alter('all', 'elem=""') # PyMOL struggles with atom type definitions in mol2 files, therfore let PyMOL guess the elements itself
cmd_gui.set_name('MLE_capped_resp', base_resn[1])
cmd_gui.set_title('MLE',1,base_resn[1])
cmd_gui.unbond('resn DTM and name C8', 'resn DT and name C7')
cmd_gui.bond('resn DTM and name C8', 'resn DT and name C7', 2)

In [408]:
fd.ff.pymol_savemol2('../fragments/base_linkers/{}.mol2'.format(base_resn[1]), base_resn[1], overwrite=True)

In [409]:
dyes = ['C3W', 'C5W']
for dye in dyes:
    fd.ff.couple_dye2baselinker(dye, base_resn[1], ['O91', 'C99', 'C27'], ['O98', 'C16', 'C17'], ['O98', 'C16', 'C17', 'H95', 'H96', 'H97'])
    fd.ff.save_molecule('../fluorlabel/dyes/{}_{}.pdb'.format(dye, base_resn[1]), '{}_{}'.format(dye, base_resn[1]), 'pdb', overwrite=True)

## Labeling at 5'-end

In [1002]:
names_methyl = ['C01','H01','H02','H03']
names_phosphate = ['P','O1P','O5\'','O2P']

base_resn = ('deoxythymidine', 'DTP')
#base_resn = ('deoxyadenosine', 'DAP')
#base_resn = ('deoxyguanosine', 'DGP')
#base_resn = ('deoxycytidine', 'DCP')
#base_resn = ('uridine', 'RUP')
#base_resn = ('adenosine', 'RAP')
#base_resn = ('guanosine', 'RGP')
#base_resn = ('cytidine', 'RCP')

cmd_gui.reinitialize()
cmd_gui.load('../fragments/bases/out/{}.mol2'.format(base_resn[0]))
cmd_gui.load('../fragments/linkers/POS/out/POS_capped_resp_5prime.mol2')
cmd_gui.align('POS_capped_resp_5prime and name {}'.format('+'.join(str(i) for i in names_phosphate)), '{} and (name P or name O1P or name O2P or name O5\')'.format(base_resn[0]))
cmd_gui.remove('POS_capped_resp_5prime and name {}'.format('+'.join(str(i) for i in names_methyl+names_phosphate)))
cmd_gui.create(base_resn[1], 'POS_capped_resp_5prime or {}'.format(base_resn[0]))
cmd_gui.delete(base_resn[0])
cmd_gui.delete('POS_capped_resp_5prime')
cmd_gui.bond('{} and name P'.format(base_resn[1]), '{} and name O01'.format(base_resn[1]))
cmd_gui.alter('all', 'type="ATOM"')
cmd_gui.alter('all', 'elem=""')
cmd_gui.h_add('name N99')
cmd_gui.set_title(base_resn[1],1,base_resn[1])


In [1003]:
fd.ff.pymol_savemol2('../fragments/base_linkers/{}.mol2'.format(base_resn[1]), base_resn[1], overwrite=True)

In [1004]:
dyes = ['C3W', 'C5W']
for dye in dyes:
    fd.ff.couple_dye2baselinker(dye, base_resn[1], ['O91', 'C99', 'C27'], ['O98', 'C16', 'C17'], ['O98', 'C16', 'C17', 'H95', 'H96', 'H97'])
    fd.ff.save_molecule('../fluorlabel/dyes/{}_{}.pdb'.format(dye, base_resn[1]), '{}_{}'.format(dye, base_resn[1]), 'pdb', overwrite=True)

## Labeling at 3'-end

In [1026]:
names_methyl = ['C01','H01','H02','H03']

base_resn = ('deoxythymidine', 'DTO')
#base_resn = ('deoxyadenosine', 'DAO')
#base_resn = ('deoxyguanosine', 'DGO')
#base_resn = ('deoxycytidine', 'DCO')
#base_resn = ('uridine', 'RUO')
#base_resn = ('adenosine', 'RAO')
#base_resn = ('guanosine', 'RGO')
#base_resn = ('cytidine', 'RCO')

cmd_gui.reinitialize()
cmd_gui.load('../fragments/bases/out/{}.mol2'.format(base_resn[0]))
cmd_gui.load('../fragments/linkers/POS/out/POS_capped_resp_3prime.mol2')
cmd_gui.alter('POS_capped_resp_3prime and name P','name="P1"')
cmd_gui.alter('POS_capped_resp_3prime and name O1P','name="O3P"')
cmd_gui.alter('POS_capped_resp_3prime and name O2P','name="O4P"')
cmd_gui.pair_fit('POS_capped_resp_3prime and name O5\'','{} and name O3\''.format(base_resn[0]), 
                 'POS_capped_resp_3prime and name C01', '{} and name C3\''.format(base_resn[0]), 
                 'POS_capped_resp_3prime and name H03', '{} and name H3\''.format(base_resn[0]))
cmd_gui.remove('POS_capped_resp_3prime and (name {} or name O5\')'.format('+'.join(str(i) for i in names_methyl)))
cmd_gui.create(base_resn[1], 'POS_capped_resp_3prime or {}'.format(base_resn[0]))
cmd_gui.delete(base_resn[0])
cmd_gui.delete('POS_capped_resp_3prime')
cmd_gui.bond('{} and name P and resn POS'.format(base_resn[1]), '{} and name O3\''.format(base_resn[1]))
cmd_gui.alter('all', 'type="ATOM"')
cmd_gui.alter('all', 'elem=""')
cmd_gui.h_add('name N99')
cmd_gui.set_title(base_resn[1],1,base_resn[1])

In [1027]:
fd.ff.pymol_savemol2('../fragments/base_linkers/{}.mol2'.format(base_resn[1]), base_resn[1], overwrite=True)

In [1028]:
dyes = ['C3W', 'C5W']
for dye in dyes:
    fd.ff.couple_dye2baselinker(dye, base_resn[1], ['O91', 'C99', 'C27'], ['O98', 'C16', 'C17'], ['O98', 'C16', 'C17', 'H95', 'H96', 'H97'])
    fd.ff.save_molecule('../fluorlabel/dyes/{}_{}.pdb'.format(dye, base_resn[1]), '{}_{}'.format(dye, base_resn[1]), 'pdb', overwrite=True)

## Forcefield parameterization for fragments

Read force field of amberdyes

In [1072]:
amberdyes_ff = fd.ff.Parameters.read_amberdyes(['../forcefields/amberdyes/ffbonded_amberdyes.itp', '../forcefields/amberdyes/ffnonbonded_amberdyes.itp'])

internal DTM and RUM

```
cd ../fragments/acpype

base_linker=DTM
base=DT

linker=MLE
filename=../base_linkers/"$base_linker".mol2
sed "s/${base}/${base_linker}/g" "$filename" | sed "s/${linker}/${base_linker}/g" > "$base_linker"_ff.mol2
acpype -i "$base_linker"_ff.mol2 -o gmx -n -1 -a amber -c user
```

In [1073]:
baselinkers_itp = {}
baselinkers_ff = {}

In [1074]:
moleculetypes = ['DTM']

for mol in moleculetypes:
    baselinkers_itp[mol] = fd.ff.Molecule.read_molecule('../fragments/acpype/{}_ff.acpype/{}_ff_GMX.itp'.format(mol,mol), 'FLUOR-DYNAMICS')
    baselinkers_itp[mol].change_type('O3\'', 'OS') # residue is internal not terminal
    for a in ['O98', 'C16', 'C17', 'H95', 'H96', 'H97']:
        baselinkers_itp[mol].remove_atom(a)
        baselinkers_ff[mol] = fd.ff.Parameters.read_frcmod('../fragments/acpype/{}_ff.acpype/{}_ff_AC.frcmod'.format(mol,mol), baselinkers_itp[mol].atomtypes_molecule)
        amberdyes_ff.append(baselinkers_ff[mol])

In [1075]:
atoms_amberdyes = {'bondtypes' : [['ng', 'cg']],
                   'angletypes': [['c3g', 'ng', 'cg'],
                                  ['hng', 'ng', 'cg'],
                                  ['ng', 'cg', 'og'],
                                  ['ng', 'cg', 'c3g']],
                   'propertypes' : [['c3g', 'c3g', 'cg', 'ng'],
                                    ['hcg', 'c3g', 'cg', 'ng'],
                                    ['c3g', 'cg', 'ng', 'hng'],
                                    ['og', 'cg', 'ng', 'hng'],
                                    ['c3g', 'cg', 'ng', 'c3g'],
                                    ['og', 'cg', 'ng', 'c3g']]}

atoms_linker = {'bondtypes': [['N', 'cg']],
                'angletypes': [['CT', 'N', 'cg'],
                               ['H', 'N', 'cg'],
                               ['N', 'cg', 'og'],
                               ['N', 'cg', 'c3g']],
                'propertypes': [['c3g', 'c3g', 'cg', 'N'],
                                ['hcg', 'c3g', 'cg', 'N'],
                                ['c3g', 'cg', 'N', 'H'],
                                ['og', 'cg', 'N', 'H'],
                                ['c3g', 'cg', 'N', 'CT'],
                                ['og', 'cg', 'N', 'CT']]}

In [1076]:
specialbond_ff = fd.ff.Parameters.read_specialbond(amberdyes_ff, atoms_amberdyes, atoms_linker)
amberdyes_ff.append(specialbond_ff)

### Rational for linker RESP charge derivation at 3'/5'-ends

Reference: https://ambermd.org/tutorials/advanced/tutorial1/section1.htm

5'-end nucleotide: -0.3079 (DNA)
3'-end nucleotide: -0.6921 (DNA)

5'-end nucleotide will become an internal one (the phosphate of the linker will be transferred to the nucleotide) and linker will be the 5'-terminus (i.e. have a charnge of 0.3079). To achieve a net charge of -0.3079 when the phosphate of the linker is removed the PO2-O-CH3 cap must be constrained to must sum up to -0.6921, since -1-(-0.6921)=-0.6921.

to make the fragment neutral for Acpype make dinucleotide

In [1070]:
end = {'base':'DA', '5':'DAP', '3':'DAO'}
#end = {'base':'DG', '5':'DGP', '3':'DGO'}
#end = {'base':'DC', '5':'DCP', '3':'DCO'}
#end = {'base':'DT', '5':'DTP', '3':'DTO'}
#end = {'base':'RA', '5':'RAP', '3':'RAO'}
#end = {'base':'RG', '5':'RGP', '3':'RGO'}
#end = {'base':'RC', '5':'RCP', '3':'RCO'}
#end = {'base':'RU', '5':'RUP', '3':'RUO'}

cmd_gui.reinitialize()
cmd_gui.load('../fragments/base_linkers/{}.mol2'.format(end['5']))
cmd_gui.load('../fragments/base_linkers/{}.mol2'.format(end['3']))

# reassigning residue numbers and segment ids preserves the atom numbering after running through acpype
cmd_gui.alter('resn POS and {}'.format(end['5']), 'resi="1"')
cmd_gui.alter('resn POS and {}'.format(end['5']), 'segi="1"')
cmd_gui.alter('resn {}'.format(end['base']), 'resi="2"')
cmd_gui.alter('resn {}'.format(end['base']), 'segi="2"')
cmd_gui.alter('resn POS and {}'.format(end['3']), 'resi="3"')
cmd_gui.alter('resn POS and {}'.format(end['3']), 'segi="3"')
cmd_gui.remove('{} and resn {}'.format(end['3'], end['base']))
cmd_gui.create('{}_{}'.format(end['5'],end['3']), '{} or {}'.format(end['5'],end['3']))
cmd_gui.bond('resn {} and name O3\''.format(end['base']), 'resn POS and name P*')
cmd_gui.alter('resn POS', 'resn="{}"'.format(end['5']))
cmd_gui.alter('resn {}'.format(end['base']), 'resn="{}"'.format(end['5']))

60

In [1071]:
fd.ff.pymol_savemol2('../fragments/acpype/{}_{}_ff.mol2'.format(end['5'], end['3']), '{}_{}'.format(end['5'], end['3']), overwrite=True)

```
cd ../fragments/acpype/
fusion=DAP_DAO
fusion=DGP_DGO
fusion=DCP_DCO
fusion=DTP_DTO
fusion=RAP_RAO
fusion=RGP_RGO
fusion=RCP_RCO
fusion=RUP_RUO
acpype -i "$fusion"_ff.mol2 -o gmx -n -1 -a amber -c user
```

In [1077]:
moleculetypes = [{'base':'DA', '5':'DAP', '3':'DAO'},
       {'base':'DG', '5':'DGP', '3':'DGO'},
       {'base':'DC', '5':'DCP', '3':'DCO'},
       {'base':'DT', '5':'DTP', '3':'DTO'},
       {'base':'RA', '5':'RAP', '3':'RAO'},
       {'base':'RG', '5':'RGP', '3':'RGO'},
       {'base':'RC', '5':'RCP', '3':'RCO'},
       {'base':'RU', '5':'RUP', '3':'RUO'}]

#moleculetypes = [{'base':'A', '5':'RAP', '3':'RAO'}]

for end in moleculetypes:
    fusion_itp = fd.ff.Molecule.read_molecule('../fragments/acpype/{}_{}_ff.acpype/{}_{}_ff_GMX.itp'.format(end['5'],end['3'],end['5'],end['3']), 'FLUOR-DYNAMICS')
    fusion_itp.change_type('O3\'', 'OS') # residue is internal not terminal
    baselinkers_ff = fd.ff.Parameters.read_frcmod('../fragments/acpype/{}_{}_ff.acpype/{}_{}_ff_AC.frcmod'.format(end['5'],end['3'],end['5'],end['3']), fusion_itp.atomtypes_molecule)
    amberdyes_ff.append(baselinkers_ff)
        
    ff_mol2 = PandasMol2().read_mol2('../fragments/acpype/{}_{}_ff.mol2'.format(end['5'],end['3']))
    atoms5 = fusion_itp.atoms[(ff_mol2.df['subst_id']==1) | (ff_mol2.df['subst_id']==2)]
    atoms3 = fusion_itp.atoms[(ff_mol2.df['subst_id']==2) | (ff_mol2.df['subst_id']==3)]
    bonds5 = fusion_itp.bonds[fusion_itp.bonds['i'].isin(atoms5['nr']) & fusion_itp.bonds['j'].isin(atoms5['nr'])]
    bonds3 = fusion_itp.bonds[fusion_itp.bonds['i'].isin(atoms3['nr']) & fusion_itp.bonds['j'].isin(atoms3['nr'])]
    impropers5 = fusion_itp.impropers[fusion_itp.impropers['i'].isin(atoms5['nr']) & fusion_itp.impropers['j'].isin(atoms5['nr']) & fusion_itp.impropers['k'].isin(atoms5['nr']) & fusion_itp.impropers['l'].isin(atoms5['nr'])]
    impropers3 = fusion_itp.impropers[fusion_itp.impropers['i'].isin(atoms3['nr']) & fusion_itp.impropers['j'].isin(atoms3['nr']) & fusion_itp.impropers['k'].isin(atoms3['nr']) & fusion_itp.impropers['l'].isin(atoms3['nr'])]
    
    baselinkers_itp[end['5']] = fd.ff.Molecule(end['5'], atoms5, bonds5, None, None, impropers5)
    baselinkers_itp[end['3']] = fd.ff.Molecule(end['3'], atoms3, bonds3, None, None, impropers3)
    for a in ['O98', 'C16', 'C17', 'H95', 'H96', 'H97']:
        baselinkers_itp[end['5']].remove_atom(a)
        baselinkers_itp[end['3']].remove_atom(a)

In [1078]:
amberdyes_ff.add2ff('ff14sb', '../forcefields/fluordyes/')

In [1079]:
fd.ff.write_rtp('../forcefields/fluordyes/fluordyes.rtp', [baselinkers_itp[mol] for mol in baselinkers_itp.keys()])

In [40]:
i = 0
for resi in ['DAO', 'DGO', 'DCO', 'DTO', 'DAP', 'DGP', 'DCP', 'DTP', 'DTM']:
    if i == 0:
        fd.ff.update_residuetypes('{} DNA'.format(resi), '../forcefields/amberdyes/residuetypes_amberdyes.dat', '../forcefields/fluordyes/residuetypes.dat', overwrite=True)
    else:
        fd.ff.update_residuetypes('{} DNA'.format(resi), '../forcefields/fluordyes/residuetypes.dat', '../forcefields/fluordyes/residuetypes.dat', overwrite=True)
    i += 1
    
for resi in ['RAO', 'RGO', 'RCO', 'RUO', 'RAP', 'RGP', 'RCP', 'RUP']:
    fd.ff.update_residuetypes('{} RNA'.format(resi), '../forcefields/fluordyes/residuetypes.dat', '../forcefields/fluordyes/residuetypes.dat', overwrite=True)
    i += 1
    
for resi in ['C3W', 'C5W']:
    fd.ff.update_residuetypes('{} DNA'.format(resi), '../forcefields/fluordyes/residuetypes.dat', '../forcefields/fluordyes/residuetypes.dat', overwrite=True)
    i += 1
print('{} new entries in residuetypes.dat'.format(i))

19 new entries in residuetypes.dat


In [41]:
i = 0
for resi1 in ['C3W', 'C5W']:
    for resi2 in ['DAO', 'DGO', 'DCO', 'DTO', 'DAP', 'DGP', 'DCP', 'DTP', 'RAO', 'RGO', 'RCO', 'RUO', 'RAP', 'RGP', 'RCP', 'RUP', 'DTM']:
        if i == 0:
            fd.ff.update_specbond('{} C99 1 {} N99 1 0.132 {} {}'.format(resi1, resi2, resi1, resi2), '../forcefields/amberdyes/specbond_amberdyes.dat', '../forcefields/fluordyes/specbond.dat', overwrite=True)
        else:
            fd.ff.update_specbond('{} C99 1 {} N99 1 0.132 {} {}'.format(resi1, resi2, resi1, resi2), '../forcefields/fluordyes/specbond.dat', '../forcefields/fluordyes/specbond.dat', overwrite=True)
        i += 1
        
for resi1 in ['DA', 'DG', 'DC', 'DT']:
    for resi2 in ['DAO', 'DGO', 'DCO', 'DTO']:
        fd.ff.update_specbond(' {} O3\' 1 {} P 1 0.155 {} {}'.format(resi1, resi2, resi1, resi2), '../forcefields/fluordyes/specbond.dat', '../forcefields/fluordyes/specbond.dat', overwrite=True)
        i += 1
    for resi2 in ['DAP', 'DGP', 'DCP', 'DTP']:
        fd.ff.update_specbond('{} O3\' 1  {} P 1 0.155 {} {}'.format(resi2, resi1, resi2, resi1), '../forcefields/fluordyes/specbond.dat', '../forcefields/fluordyes/specbond.dat', overwrite=True)
        i += 1
    for resi2 in ['DTM']:
        fd.ff.update_specbond('{} O3\' 1  {} P 1 0.155 {} {}'.format(resi2, resi1, resi2, resi1), '../forcefields/fluordyes/specbond.dat', '../forcefields/fluordyes/specbond.dat', overwrite=True)
        fd.ff.update_specbond(' {} O3\' 1 {} P 1 0.155 {} {}'.format(resi1, resi2, resi1, resi2), '../forcefields/fluordyes/specbond.dat', '../forcefields/fluordyes/specbond.dat', overwrite=True)    
        i += 1
        
for resi1 in ['RA', 'RG', 'RC', 'RU']:
    for resi2 in ['RAO', 'RGO', 'RCO', 'RUO']:
        fd.ff.update_specbond(' {} O3\' 1 {} P 1 0.155 {} {}'.format(resi1, resi2, resi1, resi2), '../forcefields/fluordyes/specbond.dat', '../forcefields/fluordyes/specbond.dat', overwrite=True)
        i += 1
    for resi2 in ['RAP', 'RGP', 'RCP', 'RUP']:
        fd.ff.update_specbond('{} O3\' 1  {} P 1 0.155 {} {}'.format(resi2, resi1, resi2, resi1), '../forcefields/fluordyes/specbond.dat', '../forcefields/fluordyes/specbond.dat', overwrite=True)
        i += 1
    for resi2 in ['DTM']:
        fd.ff.update_specbond('{} O3\' 1  {} P 1 0.155 {} {}'.format(resi2, resi1, resi2, resi1), '../forcefields/fluordyes/specbond.dat', '../forcefields/fluordyes/specbond.dat', overwrite=True)
        fd.ff.update_specbond(' {} O3\' 1 {} P 1 0.155 {} {}'.format(resi1, resi2, resi1, resi2), '../forcefields/fluordyes/specbond.dat', '../forcefields/fluordyes/specbond.dat', overwrite=True)    
        i += 1
print('{} new entries in specbond.dat'.format(i))

106 new entries in specbond.dat
