In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys; sys.path.append('..')

In [3]:
import os 
import sys 
import numpy as np 
import pandas as pd
from hdna import *

In [4]:
# Import experimental data from Hertel 
expdata = pd.read_csv('../data/herteldata.csv', names=['seq', 'expvalue'])
# Clean the dataframe 
expdata = expdata.drop(0)
expdata['expvalue'] = ['{:e}'.format(float(e)) for e in expdata['expvalue']]

limit = 3
torun = expdata.copy().iloc[:limit]
torun.set_index(torun['seq'], inplace=True)

In [5]:

EXPNAME = 'jupyter_OW2'
RESULTS_DIR = f"results/{EXPNAME}"

if os.path.isdir(RESULTS_DIR): 
    i = 0
    while True: 
        i += 1
        permission = input('Folder already exists, do you want to overwrite old experiments? [Y,N]')
        if permission.lower().startswith('y'):
            print('>>>> overwriting old simulations')
            break
        elif permission.lower().startswith('n') or i == 3:
            print(">>>> stopping the program")
            sys.exit()
        print("yes or not?") 
else:
    os.makedirs(RESULTS_DIR)


HP = {
    
    #model free parameters  
    'minimum_nucleation': 4,
    'zipping_rate':       2e9,
    'sliding_rate':       2e7,
    
    #temperature
    'temperature':        25,       #### HERTEL EXPERIMENTAL TEMPERATURE 
    
    #angles
    'azimutal_angle':     120,
    'longitudinal_angle': 270,
}

OPT = {

    #simulation options
    'runtime': 5e-6,
    'N_simul': 3000
}

hyperparams = pd.DataFrame.from_dict([dict(**HP,**OPT)]).T
hyperparams.rename(columns={np.int64(0):'values'}, inplace=True)
hyperparams.index.rename('hyperparameters', inplace=True)
hyperparams.to_csv(f'{RESULTS_DIR}/hyperparameters.csv')

# Actual computation 
rates = []
model = Model('dna', '3D', celsius=HP['temperature'])

for i, (seq, exp) in enumerate(zip(torun['seq'], torun['expvalue'])):
    print(f'Strand number {i}: {seq}')
    print(f'Creating network from sequence...')
    A = Strand(model, seq)
    B = A.complementary()
    kinet = Kinetwork(model, A, B, HP['minimum_nucleation'])
    geo = Geometry(HP['azimutal_angle'], HP['longitudinal_angle'])
    K = Kinetics(model, kinet, geo)
    K.set_slidingrate(HP['sliding_rate'])
    K.set_zippingrate(HP['zipping_rate'])
    opts = Options(method='direct', runtime=OPT['runtime'], Nsim=OPT['N_simul'], results_dir=RESULTS_DIR, stranditer=i)
    print('embedding network into biosimulator network model...')
    simulatore = Simulator(model, kinet, K, options=opts)
    print('start running simulations...')
    results = simulatore.ensemble()
    mfpt = simulatore.mfpts(results)
    rates.append(1/mfpt)

    df = pd.DataFrame.from_dict([simulatore.overview])
    df.drop(['duplex','singlestranded'], axis=1, inplace=True)
    newcols = list(df.columns)
    newvals = list(df.loc[0,df.columns])
    torun.loc[seq, newcols] = newvals

    print(f"experimental rate: {'{:e}'.format(float(exp))}")
    print(f"computed rate:     {'{:e}'.format(1/mfpt)}", '\n')
    del results

torun['computed'] = rates
torun.to_csv(f"{RESULTS_DIR}/simulationdata.csv")
valplot(torun, EXPNAME, writepath=RESULTS_DIR, theme='dark')


#TODO
""" Also for error checking put some csv with nodes and edges of the 
    network and with the same information but from the biosim model. 
"""

>>>> overwriting old simulations
Strand number 0: GTTCGGTCTA
Creating network from sequence...
embedding network into biosimulator network model...
start running simulations...


100%|██████████| 3000/3000 [00:02<00:00, 1117.79it/s]


39 simulations didn't produce a duplex.
That's 1.3% of simulations
experimental rate: 1.145950e+06
computed rate:     2.169387e+06 

Strand number 1: CCAAAACCAA
Creating network from sequence...
embedding network into biosimulator network model...
start running simulations...


100%|██████████| 3000/3000 [00:01<00:00, 2314.36it/s]


2 simulations didn't produce a duplex.
That's 0.06666666666666667% of simulations
experimental rate: 1.668154e+06
computed rate:     5.162517e+06 

Strand number 2: AAACCACACA
Creating network from sequence...
embedding network into biosimulator network model...
start running simulations...


100%|██████████| 3000/3000 [00:17<00:00, 176.42it/s]


0 simulations didn't produce a duplex.
That's 0.0% of simulations
experimental rate: 1.809459e+06
computed rate:     2.098110e+07 



' Also for error checking put some csv with nodes and edges of the \n    network and with the same information but from the biosim model. \n'

In [6]:
torun;;;;;;;;;

SyntaxError: invalid syntax (1613367156.py, line 1)

In [None]:
tt = torun.copy()

In [None]:
df = pd.DataFrame.from_dict([simulatore.overview])
df

Unnamed: 0,singlestranded,off_register,on_register,zipping,duplex,failed,fail%
0,1,2,7,14,1,0,0.0


In [None]:
df.drop(['duplex','singlestranded'], axis=1, inplace=True)

In [None]:
newcols = list(df.columns)
newvals = list(df.loc[0,df.columns])

In [None]:
tt.set_index(tt['seq'], inplace=True)

In [None]:
tt.loc['GTTCGGTCTA', newcols] = newvals

In [None]:
tt.index

Index(['GTTCGGTCTA', 'CCAAAACCAA', 'AAACCACACA'], dtype='object', name='seq')

In [None]:
tt

Unnamed: 0_level_0,seq,expvalue,off_register,on_register,zipping,failed,fail%
seq,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
GTTCGGTCTA,GTTCGGTCTA,1145950.0,2.0,7.0,14.0,0.0,0.0
CCAAAACCAA,CCAAAACCAA,1668154.0,,,,,
AAACCACACA,AAACCACACA,1809459.0,,,,,
