In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os
import pandas as pd
from tqdm import tqdm
from scipy.optimize import curve_fit

In [2]:
df_fusion = pd.read_excel('real_snare_new_linker_lp_05_27aug24.xlsx')
df_fusion.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 120 entries, 0 to 119
Data columns (total 9 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   N_snare               120 non-null    int64  
 1   seed                  120 non-null    int64  
 2   lp                    120 non-null    float64
 3   t_fus (nframe)        120 non-null    int64  
 4   t_hfus_1 (nframe)     120 non-null    int64  
 5   t_hfus_last (nframe)  120 non-null    int64  
 6   nstalks_rev           120 non-null    int64  
 7   tension (pN/nm)       120 non-null    float64
 8   no_fusion             120 non-null    int64  
dtypes: float64(2), int64(7)
memory usage: 8.6 KB


In [3]:
df_fusion.head()

Unnamed: 0,N_snare,seed,lp,t_fus (nframe),t_hfus_1 (nframe),t_hfus_last (nframe),nstalks_rev,tension (pN/nm),no_fusion
0,9,114,0.5,48,10,10,0,0.05,0
1,9,172,0.5,230,111,111,0,0.05,0
2,9,802,0.5,95,24,24,0,0.05,0
3,9,843,0.5,105,85,85,0,0.05,0
4,9,894,0.5,1110,1064,1064,0,0.05,0


In [4]:
def resampling(fusion_record, n_snare, lp, n_sample):
    select_n_snare = fusion_record[fusion_record['N_snare']==n_snare]
    select_lp = select_n_snare[select_n_snare['lp']==lp]
    print('Number of fusion events:', len(select_lp))
    resampled_runs = []
    for _ in range(n_sample):
        resampled_runs.append(select_lp.sample(n=len(select_lp), replace = True))
    return resampled_runs

In [5]:
def get_survival_curve(sample, mode, n_total_frame = 1500):
    if mode == 0: # calculate first hemifus time
        chara_times = sample['t_hfus_1 (nframe)']
    elif mode == 1: # calculate fus time
        chara_times = sample['t_fus (nframe)']
    elif mode == 2: # calculate irrev hemifus to fus time
        sample = sample[sample['no_fusion'] == 0]
        chara_times = sample['t_fus (nframe)'] - sample['t_hfus_last (nframe)']
    elif mode == 3: # calculate irrev hemifus time
        #sample = sample[sample['no_fusion'] == 0]
        chara_times = sample['t_hfus_last (nframe)']
    survivals = np.ones((n_total_frame, len(sample)))
    for i in range(n_total_frame):
        survivals[i] = chara_times > i
    if len(sample) > 0:
        return survivals.sum(axis = 1)/len(sample) # survival curve starts from 1
    else:
        return np.ones((n_total_frame))*np.nan

In [6]:
nsnare_arr = [9,7,6,5,4,3]
lp_arr = [0.5]

In [7]:
rev_stalks_all = []
for lp in lp_arr:
    for n_snare in nsnare_arr:
        samples = resampling(df_fusion, n_snare, lp, 200)
        all_survivals = []
        rev_stalks = []
        for sample_idx in tqdm(range(len(samples))):
            surv_a_sample = []
            nstalk_sample = samples[sample_idx]['nstalks_rev']
            rev_stalks.append(np.mean(nstalk_sample))
            for mode in [0,1,2,3]:
                surv_s = get_survival_curve(samples[sample_idx], mode, n_total_frame = 1500)
                surv_a_sample.append(surv_s)
            all_survivals.append(surv_a_sample)
        all_survivals = np.array(all_survivals)
        np.save('all_survivals_n_snare_%d_lp_%.1f_27aug24'%(n_snare, lp),all_survivals)
        rev_stalks_all.append(np.mean(rev_stalks))
        rev_stalks_all.append(np.std(rev_stalks))
rev_stalks_all = np.array(rev_stalks_all)
np.savetxt('rev_stalks_all_27aug24.txt',rev_stalks_all)

Number of fusion events: 20


100%|██████████| 200/200 [01:46<00:00,  1.89it/s]


Number of fusion events: 20


100%|██████████| 200/200 [00:48<00:00,  4.14it/s]


In [8]:
for lp in lp_arr:
    for n_snare in nsnare_arr:
        surv_curves = np.load('all_survivals_n_snare_%d_lp_%.1f_27aug24.npy'%(n_snare, lp))
        print(n_snare, surv_curves.shape)
        
        mean_surv_curves = np.nanmean(surv_curves, axis=0)#surv_curves.mean(axis=0)
        surv_curves_low = np.nanquantile(surv_curves, 0.025, axis=0)
        surv_curves_high = np.nanquantile(surv_curves, 0.975, axis=0)
        surv_curves_all_info = np.concatenate([mean_surv_curves,surv_curves_low,surv_curves_high])
        print(surv_curves_all_info.shape)
        np.savetxt('statistics_survivals_n_snare_%d_lp_%.1f_27aug24.txt'%(n_snare, lp),surv_curves_all_info)

9 (200, 4, 1500)
(12, 1500)
7 (200, 4, 1500)
(12, 1500)


In [9]:
def exponential_fit(survival_curve):
    def exponential_func(x, b):
        return np.exp(b * x)
    x_data = np.arange(1500) *1.36/1000 # time in ms
    y_data = survival_curve
    if np.any(np.isnan(y_data)):
        return np.nan
    else:
        popt, pcov = curve_fit(exponential_func, x_data, y_data)
    if popt[0] > 0:
        print('positive fit!')
        return np.nan
    else: 
        return -1/popt[0]

In [10]:
for lp in lp_arr:
    for n_snare in nsnare_arr:
        print(n_snare, lp)
        surv_curves = np.load('all_survivals_n_snare_%d_lp_%.1f_27aug24.npy'%(n_snare, lp))
        print(np.shape(surv_curves))
        all_cha = []
        for sample_s in surv_curves:
            cha = []
            for sample_mode in sample_s:
                try:
                    cha.append(exponential_fit(sample_mode))
                except:
                    continue
            #print(cha)
            all_cha.append(cha)
        print(all_cha)
        all_cha = np.array(all_cha)
        
        np.save('all_chara_time_n_snare_%d_lp_%.1f'%(n_snare, lp),all_cha)
        mean_chara_time = np.nanmean(all_cha,axis=0)
        std_chara_time = np.nanstd(all_cha,axis=0)
        statis_chara_time = np.concatenate([mean_chara_time,std_chara_time])
        np.savetxt('statistics_chara_time_n_snare_%d_lp_%.1f.txt'%(n_snare, lp),statis_chara_time)
            

9 0.5
(200, 4, 1500)
[[0.41686270847939544, 0.6314793519876887, 0.0752945136202286, 0.5553768461354933], [0.22133313598583942, 0.32933721423003104, 0.07691660690283453, 0.23979557216629868], [0.43796175564150935, 0.7033248305408913, 0.06406792716129411, 0.635638169438527], [0.430679801262091, 0.712604655339393, 0.07028716800445661, 0.6407449750315906], [0.2898360106159646, 0.42444893816988327, 0.08383108845322994, 0.33195517900204385], [0.389194081744623, 0.6293252308448919, 0.06307110527321559, 0.5635655079152941], [0.3001655930963682, 0.4306352010118766, 0.07399145907648005, 0.35703062084572756], [0.4079206603160946, 0.7542890158955406, 0.07403480020277287, 0.6806065808824481], [0.5342740477678469, 0.849326695970829, 0.0695575890835912, 0.7728174434360018], [0.30130568471627756, 0.474873886688718, 0.0626641812443928, 0.4056374294674071], [0.5658510388721445, 0.8292721641679934, 0.05759462483139582, 0.7618876717664916], [0.5573639872310568, 0.8498488491633127, 0.06031807683338387, 0.7