In [18]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [19]:
import sys 
sys.path.append('..')

In [101]:
import pandas as pd 
import numpy as np 
from hdna import *

OPTDIR = 'optimization'

# Import experimental data from Hertel 
expdata = pd.read_csv('../data/herteldata.csv', names=['seq', 'expvalue'])
# Clean the dataframe 
expdata = expdata.drop(0)
expdata['expvalue'] = ['{:e}'.format(float(e)) for e in expdata['expvalue']]

limit = len(expdata)
torun = expdata.copy().iloc[:limit]
torun.set_index(torun['seq'], inplace=True, drop=True)

sequences = torun['seq']
expvalues = torun['expvalue']

data = [(s, e) for s, e in zip(sequences, expvalues)]


In [104]:
torun['sequence', 'expvalue']

KeyError: ('sequence', 'expvalue')

In [92]:
DF = pd.DataFrame(data, columns=['sequence', 'experimental'])
DF = DF.set_index('sequence', drop=False)

In [93]:
diz = {'a': 3, 'b': 5, 'c': 9}

In [96]:
 DF.loc['ACACCACCAC']

sequence          ACACCACCAC
experimental    5.613147e+06
a                        3.0
b                        5.0
c                        9.0
Name: ACACCACCAC, dtype: object

In [95]:
for col, val in zip(diz.keys(), diz.values()):
    print(col, val)
    DF.loc['ACACCACCAC', col] = val

a 3
b 5
c 9


In [129]:
for a in DF[['experimental']].iterrows():
    print(a[1]['experimental'])

1.145950e+06
1.668154e+06
1.809459e+06
2.167675e+06
2.178513e+06
2.364640e+06
2.511513e+06
2.527035e+06
2.559279e+06
2.651666e+06
2.655670e+06
2.695608e+06
2.765864e+06
2.825047e+06
3.843880e+06
3.865948e+06
3.960894e+06
4.159347e+06
5.448523e+06
5.613147e+06
1.042173e+06
1.186657e+06
1.779672e+06
1.831343e+06
1.892296e+06
2.124654e+06
2.304253e+06
2.433452e+06
2.745623e+06
2.975581e+06
3.186916e+06
3.242444e+06
3.629903e+06
3.659749e+06
3.763039e+06
3.805400e+06
3.961922e+06
4.062258e+06
4.442077e+06
4.818114e+06
5.208461e+06


In [42]:
DF.loc[0, 'mod'] = 123

In [41]:
pd.read_csv('../results/goodmorning-5/simulationdata.csv')

Unnamed: 0,seq,seq.1,expvalue,index,computed,Unnamed: 5,zipping,on_nucleation,off_nucleation,backfray,sliding,failed,fail%
0,GTTCGGTCTA,GTTCGGTCTA,1145950.0,1,18479200.0,0.0,44.0,10.0,0.0,0.0,0.0,0.0,0.0
1,CCAAAACCAA,CCAAAACCAA,1668154.0,2,4040170.0,0.0,44.0,10.0,8.0,10.0,2.0,0.0,0.0
2,AAACCACACA,AAACCACACA,1809459.0,3,9536979.0,0.0,44.0,10.0,10.0,18.0,2.0,0.0,0.0
3,ACCAAACCAC,ACCAAACCAC,2167675.0,4,5120656.0,0.0,44.0,10.0,8.0,10.0,2.0,0.0,0.0
4,ACACCAAACC,ACACCAAACC,2178513.0,5,18092840.0,0.0,44.0,10.0,0.0,0.0,0.0,0.0,0.0
5,AAAAACCCAC,AAAAACCCAC,2364640.0,6,8644970.0,0.0,44.0,10.0,12.0,28.0,2.0,0.0,0.0
6,CAACACCCAA,CAACACCCAA,2511513.0,7,19024210.0,0.0,44.0,10.0,0.0,0.0,0.0,0.0,0.0
7,AAACCCACCA,AAACCCACCA,2527035.0,8,19068740.0,0.0,44.0,10.0,0.0,0.0,0.0,0.0,0.0
8,ACAACACCAC,ACAACACCAC,2559279.0,9,18771020.0,0.0,44.0,10.0,0.0,0.0,0.0,0.0,0.0
9,CAAAACCCCA,CAAAACCCCA,2651666.0,10,18227570.0,0.0,44.0,10.0,0.0,0.0,0.0,0.0,0.0


In [8]:
a = 1
b = 2
c = 3

H(a, b, c)

1
2
3


In [9]:
import sys
sys.path.append('..')

In [10]:
from hdna import * 
import nupack as nu
import networkx as nx 
import numpy as np
from fitter import Fitter

In [11]:
HP = {
    #model free parameters  
    'minimum_nucleation': 1,
    'sliding_cutoff':     200,
    'sliding_filter':     4,
    'zipping_rate':       2e7,
    'sliding_rate':       5e6,
    #temperature
    'temperature':        25,       #### HERTEL EXPERIMENTAL TEMPERATURE 
    #angles
    'azimutal_angle':     120,
    'longitudinal_angle': 270,
}

OPT = {
    #simulation options
    'runtime': 4e-6,
    'N_simul': 1000,
    'trajstosave': 35,
    #nupack options
    'stacking': 'nostacking'
}

SOPT = {    
    #datasaving options 
    'G_saving': 'strand_folder'
}

# Actual computation 
MOD = Model('dna', '3D', 
        min_nucleation=HP['minimum_nucleation'], 
        sliding_cutoff=HP['sliding_cutoff'],
        sliding_filter=HP['sliding_filter'],
        stacking=OPT['stacking'],
        sliding=HP['sliding_rate'],
        zipping=HP['zipping_rate'],
        celsius=HP['temperature'])

In [12]:
s1 = Strand(MOD, "AAAAAAAAA")
s2 = s1.complementary()
C = Chamber(MOD, s1, s2)
N = Kinetwork(MOD, s1, s2, Geometry(120,270))
OZZ = Options(Nsim=10000)
S = Simulator(MOD, N, OZZ)

In [13]:
Graph = S.BSGraph()
matrix = nx.to_numpy_array(Graph)

In [14]:
a = 6983958.142024048
'{:.2e}'.format(a)

'6.98e+06'

In [15]:
for s, data in Graph.nodes.data():
    print(s)
    for key in data.keys():
        try: Graph.nodes[s][key] = '{:.3f}'.format(data[key])
        except: print('data type not numerical')
    print()

for e1, e2, data in Graph.edges.data():
    print(e1, e2)
    for key in data.keys():
        try: Graph[e1][e2][key] = '{:.3e}'.format(float(data[key]))
        except ValueError: pass

.........+.........
data type not numerical
data type not numerical

(........+...).....
data type not numerical
data type not numerical
data type not numerical

.(.......+..)......
data type not numerical
data type not numerical
data type not numerical

..(......+.).......
data type not numerical
data type not numerical
data type not numerical

...(.....+)........
data type not numerical
data type not numerical
data type not numerical

.....(...+........)
data type not numerical
data type not numerical
data type not numerical

......(..+.......).
data type not numerical
data type not numerical
data type not numerical

.......(.+......)..
data type not numerical
data type not numerical
data type not numerical

........(+.....)...
data type not numerical
data type not numerical
data type not numerical

(........+....)....
data type not numerical
data type not numerical
data type not numerical

.(.......+...).....
data type not numerical
data type not numerical
data type not numerical

.

In [16]:
pd.DataFrame(matrix).to_csv('./mbare.csv')

NameError: name 'pd' is not defined

In [None]:
sim = S.ensemble()

100%|██████████| 10000/10000 [00:00<00:00, 14205.92it/s]


In [None]:
tau = S.fpts(sim)

0 simulations didn't produce a duplex.
That's 0.0% of simulations


In [None]:
import pandas as pd 

In [None]:
pd.Series(tau).to_csv('./tau.csv')

In [None]:
from scipy.stats import gamma

In [None]:
mp = S.mfpt(sim)
fit_alpha, fit_loc, fit_beta = gamma.fit(tau)

0 simulations didn't produce a duplex.
That's 0.0% of simulations


In [None]:
print('alpha ', fit_alpha)
print('loc   ', fit_loc)
print('beta  ', fit_beta)

alpha  1.9339518854908637
loc    6.805693265495691e-09
beta   1.2455848111976055e-07


In [None]:
1/np.mean(tau)

4037221.8218196044

In [None]:
1/gamma.mean(a=fit_alpha, loc=fit_loc, scale=fit_beta)

4037210.1148929577

In [None]:
from hdna.reporting import histotime as ht

In [None]:
rv = gamma(a=fit_alpha, loc=fit_loc, scale=fit_beta)

In [None]:
mod = 1/rv.mean()

In [None]:
ht(tau, rv, OZZ.runtime, exp=8,mod=mod)

In [None]:
from hdna.reporting import percomplot
percomplot(tau, theme='dark')

In [None]:
import plotly.graph_objects as go
import numpy as np 
nbins = 150
X = np.linspace(0,OZZ.runtime,500)
# Y = [gamma.pdf(j, a=fit_alpha, scale=fit_beta) for j in X]
data1 = go.Scatter(x=X, y=gamma.pdf(X, a=fit_alpha, scale=fit_beta))
data2 = go.Histogram(x=tau, histnorm='probability density', nbinsx=nbins)

layout = go.Layout(xaxis=dict(tickformat=".0e"),
                   yaxis=dict(tickformat=".0e"))

fig = go.Figure(data=[data1, data2], layout=layout)
fig.update_traces(opacity=0.85)
fig.show()

In [None]:
np.var(tau), np.average(tau)

(3.138292878541905e-14, 2.476950844254807e-07)

In [None]:
1/np.min(tau), 1/np.max(tau)

(144050613.38394898, 664972.5032762754)