In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os
import pandas as pd
from tqdm import tqdm
from scipy.optimize import curve_fit

In [2]:
df_fusion = pd.read_excel('real_snare_linker_scan_27aug24.xlsx')
df_fusion.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 45 entries, 0 to 44
Data columns (total 8 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   N_snare               45 non-null     int64  
 1   seed                  45 non-null     int64  
 2   Nunzip                45 non-null     int64  
 3   lp                    45 non-null     float64
 4   t_fus (nframe)        45 non-null     int64  
 5   t_hfus_1 (nframe)     0 non-null      float64
 6   t_hfus_last (nframe)  0 non-null      float64
 7   no_fusion             45 non-null     int64  
dtypes: float64(3), int64(5)
memory usage: 2.9 KB


In [3]:
df_fusion.head()

Unnamed: 0,N_snare,seed,Nunzip,lp,t_fus (nframe),t_hfus_1 (nframe),t_hfus_last (nframe),no_fusion
0,7,18,7,0.3,5,,,0
1,7,77,7,0.3,4,,,0
2,7,262,7,0.3,8,,,0
3,7,335,7,0.3,8,,,0
4,7,567,7,0.3,9,,,0


In [4]:
def resampling(fusion_record, nunz, lp, n_sample):
    select_n_snare = fusion_record[fusion_record['Nunzip']==nunz]
    select_lp = select_n_snare[select_n_snare['lp']==lp]
    resampled_runs = []
    for _ in range(n_sample):
        resampled_runs.append(select_lp.sample(n=len(select_lp), replace = True))
    return resampled_runs

In [5]:
def get_survival_curve(sample, mode, n_total_frame = 299):
    if mode == 0: # calculate first hemifus time
        chara_times = sample['t_hfus_1 (nframe)']
    elif mode == 1: # calculate fus time
        chara_times = sample['t_fus (nframe)']
    elif mode == 2: # calculate irrev hemifus to fus time
        sample = sample[sample['no_fusion'] == 0]
        chara_times = sample['t_fus (nframe)'] - sample['t_hfus_last (nframe)']
    elif mode == 3: # calculate irrev hemifus time
        #sample = sample[sample['no_fusion'] == 0]
        chara_times = sample['t_hfus_last (nframe)']
    survivals = np.ones((n_total_frame, len(sample)))
    for i in range(n_total_frame):
        survivals[i] = chara_times > i
    if len(sample) > 0:
        return survivals.sum(axis = 1)/len(sample) # survival curve starts from 1
    else:
        return np.ones((n_total_frame))*np.nan

In [6]:
nsnare = 7
lp_arr = [0.3,0.5,0.7]
nunz_arr = [7,10,14]

In [7]:
for lp in lp_arr:
    for nunz in nunz_arr:
        samples = resampling(df_fusion, nunz, lp, 200)
        all_survivals = []
        for sample_idx in tqdm(range(len(samples))):
            surv_a_sample = []
            for mode in [1]:
                surv_s = get_survival_curve(samples[sample_idx], mode, n_total_frame = 299)
                surv_a_sample.append(surv_s)
            all_survivals.append(surv_a_sample)
        all_survivals = np.array(all_survivals)
        np.save('all_survivals_n_snare_7_Nunz_%d_lp_%.1f_27aug24'%(nunz, lp),all_survivals)

100%|██████████| 200/200 [00:04<00:00, 45.00it/s]
100%|██████████| 200/200 [00:04<00:00, 43.69it/s]
100%|██████████| 200/200 [00:04<00:00, 45.24it/s]
100%|██████████| 200/200 [00:05<00:00, 38.17it/s]
100%|██████████| 200/200 [00:10<00:00, 18.89it/s]
100%|██████████| 200/200 [00:11<00:00, 16.85it/s]
100%|██████████| 200/200 [00:11<00:00, 17.79it/s]
100%|██████████| 200/200 [00:08<00:00, 23.74it/s]
100%|██████████| 200/200 [00:04<00:00, 45.44it/s]


In [8]:
for lp in lp_arr:
    for nunz in nunz_arr:
        surv_curves = np.load('all_survivals_n_snare_7_Nunz_%d_lp_%.1f_27aug24.npy'%(nunz, lp))
        print(nunz, surv_curves.shape)
        
        mean_surv_curves = np.nanmean(surv_curves, axis=0)#surv_curves.mean(axis=0)
        surv_curves_low = np.nanquantile(surv_curves, 0.025, axis=0)
        surv_curves_high = np.nanquantile(surv_curves, 0.975, axis=0)
        surv_curves_all_info = np.concatenate([mean_surv_curves,surv_curves_low,surv_curves_high])
        print(surv_curves_all_info.shape)
        np.savetxt('statistics_survivals_n_snare_7_Nunz_%d_lp_%.1f_27aug24.txt'%(nunz, lp),surv_curves_all_info)

7 (200, 1, 299)
(3, 299)
10 (200, 1, 299)
(3, 299)
14 (200, 1, 299)
(3, 299)
7 (200, 1, 299)
(3, 299)
10 (200, 1, 299)
(3, 299)
14 (200, 1, 299)
(3, 299)
7 (200, 1, 299)
(3, 299)
10 (200, 1, 299)
(3, 299)
14 (200, 1, 299)
(3, 299)


In [9]:
def exponential_fit(survival_curve):
    def exponential_func(x, b):
        return np.exp(b * x)
    x_data = np.arange(299) * 5*1.36/1000 # time in ms
    y_data = survival_curve
    if np.any(np.isnan(y_data)):
        return np.nan
    else:
        popt, pcov = curve_fit(exponential_func, x_data, y_data)
    if popt[0] > 0:
        print('positive fit!')
        return np.nan
    else: 
        return -1/popt[0]

In [11]:
for lp in lp_arr:
    for n_snare in nunz_arr:
        print(n_snare, lp)
        surv_curves = np.load('all_survivals_n_snare_7_Nunz_%d_lp_%.1f_27aug24.npy'%(n_snare, lp))
        print(np.shape(surv_curves))
        all_cha = []
        for sample_s in surv_curves:
            cha = []
            for sample_mode in sample_s:
                try:
                        cha.append(exponential_fit(sample_mode))
                except:
                    continue
            #print(cha)
            all_cha.append(cha)
        all_cha = np.array(all_cha)
        
        np.save('all_chara_time_n_snare_%d_lp_%.1f'%(n_snare, lp),all_cha)
        mean_chara_time = np.nanmean(all_cha,axis=0)
        std_chara_time = np.nanstd(all_cha,axis=0) # rhis should be the sem from bootstrapping..?
        statis_chara_time = np.concatenate([mean_chara_time,std_chara_time])
        np.savetxt('statistics_chara_time_n_snare_%d_lp_%.1f.txt'%(n_snare, lp),statis_chara_time)
            

7 0.3
(200, 1, 299)
10 0.3
(200, 1, 299)
14 0.3
(200, 1, 299)
positive fit!
positive fit!
positive fit!
positive fit!


  popt, pcov = curve_fit(exponential_func, x_data, y_data)


positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
7 0.5
(200, 1, 299)
10 0.5
(200, 1, 299)
14 0.5
(200, 1, 299)
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!


  mean_chara_time = np.nanmean(all_cha,axis=0)
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,


positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
14 0.7
(200, 1, 299)
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!
positive fit!