In [78]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [79]:
import pandas as pd 
import nupack as nu
from hdna import *
from tqdm import tqdm

EXPNAME = 'STACKING_z6e7s3e7_120180_sf3_2nd'

# Import experimental data from Hertel 
expdata = pd.read_csv('./data/herteldata.csv', names=['sequences', 'experimental'])
# Clean the dataframe 
expdata = expdata.drop(0)
expdata['experimental'] = ['{:e}'.format(float(e)) for e in expdata['experimental']]

limit = len(expdata)
data = expdata.copy().iloc[:limit]
data['index'] = data.index 
data.set_index(data['sequences'], inplace=True)

MOD = Model(stacking='stacking')
MOD.setparams(sliding_filter=3)
MOD.setgeometry(theta=120, phi = 180)

H = HDNA(data, EXPNAME, model=MOD)
# bounds = [(2e7, 2e8), (2e6, 2e8)]
# results = dual_annealing(H.run, bounds, maxiter=5, initial_temp=500)

# H.run([3e7, 2e7])

In [80]:
MOD.nupack

<nupack.model.Model at 0x7f300294e7c0>

In [81]:
A = nu.Strand('CCAAAACCAA', name='a')
B = nu.Strand('GGTTTTGGTT'[::-1], name='b')

C = nu.Complex([A, B])
Z = nu.pfunc(C, MOD.nupack)

s1 = ('.(........+.........)')

dg1 = nu.structure_energy(strands=[A, B], structure=s1, model=MOD.nupack)
# dg2 = nu.structure_energy(strands=[A, B], structure=s2, model=M)

p1 = nu.structure_probability(strands=[A, B], structure=s1, model=MOD.nupack)
# p2 = nu.structure_probability(strands=[A, B], structure=s2, model=M)

print(dg1)

-3.260451097333486


In [82]:
# Total state space of two sequences L1 and L2
import numpy as np 


L = 14


Gamma = 0
Gamma2 = 0
for n in range(1, L):
    add = (L-n+1)
    Gamma += add
    Gamma2 += np.power(add,2)

Zipoff = Gamma2-Gamma

In [83]:
Zipoff

910

In [84]:
import pandas as pd
data = '/home/marco/Desktop/HDNAback/hDNA/results/sumofinchandpk33/run_1/simulationdata.csv'
df = pd.read_csv(data)

In [85]:
# df.drop('Unnamed: 5', axis=1, inplace=True)
# df.drop('sequences.1', axis=1, inplace=True)
wk = df[['singlestranded','duplex','zipping','on_nucleation','off_nucleation','backfray','sliding']].astype(float)
wk['sequences'] = df['sequences']
wk['computational'] = df['computational']
wk['experimental'] = df['experimental']
wk['index'] = df['index']

In [86]:
wk

Unnamed: 0,singlestranded,duplex,zipping,on_nucleation,off_nucleation,backfray,sliding,sequences,computational,experimental
0,1.0,1.0,44.0,10.0,20.0,0.0,4.0,GTTCGGTCTA,5060009.0,1145950.0
1,1.0,1.0,44.0,10.0,42.0,14.0,10.0,CCAAAACCAA,4494166.0,1668154.0
2,1.0,1.0,44.0,10.0,42.0,10.0,10.0,AAACCACACA,5065145.0,1809459.0
3,1.0,1.0,44.0,10.0,40.0,10.0,8.0,ACCAAACCAC,5090068.0,2167675.0
4,1.0,1.0,44.0,10.0,40.0,4.0,10.0,ACACCAAACC,3146038.0,2178513.0
5,1.0,1.0,44.0,10.0,42.0,14.0,10.0,AAAAACCCAC,3529451.0,2364640.0
6,1.0,1.0,44.0,10.0,40.0,4.0,10.0,CAACACCCAA,2269859.0,2511513.0
7,1.0,1.0,44.0,10.0,40.0,8.0,8.0,AAACCCACCA,3599345.0,2527035.0
8,1.0,1.0,44.0,10.0,40.0,8.0,14.0,ACAACACCAC,2869091.0,2559279.0
9,1.0,1.0,44.0,10.0,40.0,8.0,10.0,CAAAACCCCA,4531013.0,2651666.0


In [87]:
import pandas as pd 

M = Model(standard=True)
M.setgeometry(theta=120,phi=270)

ETINI = []

for seq in wk['sequences']:
    A = Strand(M, seq)
    B = A.complementary()    
    S = Simulator(M, A, B)

    etaoffss = pd.Series(S.kinet.etaoff).value_counts().drop(0)
    etaoff = 0
    for eta in etaoffss:
        for i in range(1,eta):
            etaoff += (eta-i+1)**2
    ETINI.append(etaoff)

initial fwd 4.972e+07
pseudoknotting overlap: 0.29633852194593613
fwdpseudoknot 3.951e+06
sum fwd 106178.4695927915
..((......+..))...... 1.062e+05 0.000e+00 -1.907
initial fwd 5.603e+07
pseudoknotting overlap: 0.15360490848242084
fwdpseudoknot 2.048e+06
sum fwd 73813.98779765613
((........+...))..... 7.381e+04 0.000e+00 -2.049
initial fwd 5.603e+07
pseudoknotting overlap: 0.15360490848242084
fwdpseudoknot 2.048e+06
sum fwd 73813.98779765613
.....((...+........)) 7.381e+04 0.000e+00 -2.049
initial fwd 4.972e+07
pseudoknotting overlap: 0.29633852194593613
fwdpseudoknot 3.951e+06
sum fwd 106178.4695927915
......((..+......)).. 1.062e+05 0.000e+00 -1.907
initial fwd 9.898e+07
pseudoknotting overlap: 0.18337464387262115
fwdpseudoknot 2.445e+06
sum fwd 29111661.91830162
..(((.....+....)))... 2.911e+07 0.000e+00 -2.725
initial fwd 3.474e+07
pseudoknotting overlap: 0.2658796227527963
fwdpseudoknot 9.236e+06
sum fwd 439749.0535286364
..((......+....)).... 4.397e+05 0.000e+00 -1.480
initial fwd

In [88]:
wk['ETAZ'] = pd.Series(ETINI)

In [121]:
ratios = []
corrected = []
for i, (bfr, sld, etaz, mod) in wk[['backfray', 'sliding', 'ETAZ', 'computational']].iterrows():
    ratio = (bfr+sld)/(0.5*etaz)
    ratios.append(ratio)
    corr = mod/(1+ratio)
    corrected.append(corr)
wk['gamma'] = pd.Series(ratios)
wk['corrected'] = pd.Series(corrected)
wk['index'] = df['index']

In [122]:
valplot(wk, corr=True, log=True, name='correction')