### Imports

In [1]:
# Import time
import time

# Numpy imports:    
import numpy as np

# Pandas for csv 
import pandas as pd

# for extracting filenames 
import glob

#Matplotlib imports
import matplotlib.pyplot as plt

# skimage submodules we need
import skimage.io

#Scipy imports
import scipy
from scipy import optimize
from scipy.optimize import curve_fit

import os

import atp_hydro
atp_hydro.pboc_style_mpl()
# show images in viridis by default (pboc is fire?)
plt.rcParams['image.cmap'] = 'viridis'

# Import seaborn for aesthetic plots 
import seaborn as sns

from tqdm.notebook import tqdm

### Find Files

Input imaging parameters to get time steps right

In [2]:
frame_int = 20 #s
Motconc = 1 #uM, NCD Motors 
skip_int = 5 #data frames to skip 

# Declare where data is stored 
datapath = '/Volumes/Najma/'

# Declaring folders to iterate over
datafolders = ['ATP/', 'ADP/', 'Phosphate/']; 
datafolders = ['ATP/']; # Picking one datafolder at a time makes processing easier. 

Function to find all file paths that have folders that contain tiff files. This makes it easier to locate the subfolders that contain the tiff files.

In [3]:
# Function to find all file paths that have folders that contain tiff files. This makes it easier to locate the subfolders that contain the tiff files.
def find_file_paths(root_dir, file_name, result=None):
    if result is None:
        result = []

    # Iterate over all files and directories in the current directory
    for item in os.listdir(root_dir):
        item_path = os.path.join(root_dir, item)

        # Check if the current item is a file with the desired name
        if os.path.isfile(item_path) and file_name in item:
            result.append("/".join(item_path.split('/')[:-1]))
            break

        # Check if the current item is a directory, then recurse into it
        elif os.path.isdir(item_path):
            find_file_paths(item_path, file_name, result)

    return result

In [4]:
tiff_folders = []; 
for folder in datafolders: 

    # Example usage:
    root_directory = datapath + folder; 

    target_file_name = ".tif"; 
    file_paths = find_file_paths(root_directory, target_file_name); 

    [tiff_folders.append(file_paths)]; 
tiff_folders = [item for sublist in tiff_folders for item in sublist]; 

print('Number of tiff files: ', len(tiff_folders))

Number of tiff files:  17


### Analyze Data

In [5]:
# Function that calls automated data processing function from atp hydro packages

def process_folder(datafolder):
    #--------- Read in Files -----------#
    # bound Images
    included_bound = '*405*.tif'
    bound_files = np.sort(glob.glob(datafolder+'/'+included_bound))[::skip_int]

    # unbound Images
    included_unbound = '*480*.tif'
    unbound_files = np.sort(glob.glob(datafolder+'/'+included_unbound))[::skip_int]

    # ------------------ Process file ------------------ #
    
    linear_params, linear_r2, exp_params, rate, exp_r2, times, ratio_hydro_uM, ratio_hydro, bound_hydro, unbound_hydro, linear_data_regime, exponential_fit_start_time = atp_hydro.analyze_hydrolysis(
                                                            bound_files, 
                                                            unbound_files, 
                                                            frame_int, 
                                                            skip_int, 
                                                            cal_params = [67.60201128,  3.36417414,  1.06783864,  1.17289855], # to do: Try recallibration with exponent = 1. 
                                                            p0=[2000, 500, 5],
                                                            Motconc=Motconc
                                                            )

    return linear_params, linear_r2, exp_params, rate, exp_r2, times, ratio_hydro_uM, ratio_hydro, bound_hydro, unbound_hydro, linear_data_regime, exponential_fit_start_time

In [6]:
# Stores results
linear_parameters_list = []; 
linear_r2_list = []; 
exponential_parameters_list = []; 
exponential_hydrolysis_rates_list = []; 
exp_r2_list = []; 
exponential_fit_start_time_list = []; 
linear_data_regime_list = []; 
ATP_data_list = []; 
Bound_data_list = []; 
Unbound_data_list = []; 
Ratio_list = []; 
time_list = []; 

for i, folder in enumerate(tiff_folders): 
    print(f"File {i+1} of {len(tiff_folders)}")    
    
    # Record the start time
    linear_params, linear_r2, exp_params, rate, exp_r2, times, ratio_hydro_uM, ratio_hydro, bound_hydro, unbound_hydro, linear_data_regime, exponential_fit_start_time = process_folder(folder)

    linear_parameters_list.append(linear_params); 
    linear_r2_list.append(linear_r2); 
    exponential_parameters_list.append(exp_params); 
    exponential_hydrolysis_rates_list.append(rate); 
    exp_r2_list.append(exp_r2);  
    linear_data_regime_list.append(linear_data_regime); 
    exponential_fit_start_time_list.append(exponential_fit_start_time); 
    ATP_data_list.append(ratio_hydro_uM); 
    Ratio_list.append(ratio_hydro); 
    Bound_data_list.append(bound_hydro); 
    Unbound_data_list.append(unbound_hydro); 
    time_list.append(times); 

File 1 of 17


  return a * ((c - array) / (array - b)) ** (1/d)


File 2 of 17
File 3 of 17
File 4 of 17
File 5 of 17
File 6 of 17




File 7 of 17
File 8 of 17
File 9 of 17


  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  return 1 - ssres/sstot


File 10 of 17
File 11 of 17
File 12 of 17
File 13 of 17
File 14 of 17
File 15 of 17
File 16 of 17
File 17 of 17


### Saving in CSV files

In [7]:
#--------Collect conditions from file names ------# 

ATP_list = []; 
ADP_list = []; 
Phosphate_list = []; 
A81D_list = []; 
ExposureTime_405_list = []; 
ExposureTime_480_list = []; 
FrameInterval_list = []; 

for i, file in enumerate(tiff_folders): 
    split_file = file.split('/')

    # For ATP folder
    if split_file[3] == 'ATP': 
        ADP_list.append(0)
        Phosphate_list.append(0)
        ATP_list.append(int(split_file[-1].split("_")[0][:-5])) 
    
    # For ADP folder
    elif split_file[3] == 'ADP':
        Phosphate_list.append(0); 
        ADP_list.append(int(split_file[-1].split("_")[0][:-5]))
        ATP_list.append(int(split_file[4].split("_")[-1][:-5])) 

    # For Phosphate folder
    elif split_file[3] == 'Phosphate':
        
        ADP_list.append(0);    

        ATP_conc = int(split_file[4].split('_')[-1][:-5])
        ATP_list.append(ATP_conc)

        if split_file[5].split('_')[0] == "Nikon":
            P_conc = int(split_file[6].split('_')[0][:-4])
        else: 
            P_conc = int(split_file[5].split('_')[0][:-4])
        Phosphate_list.append(P_conc)

In [8]:
# Info below taken from file names (eg: Nikon_10X_bin1_20sFrameInterval_100ms480_150ms405_50uMATP_1uMmicro_1400nM_A81D_2). Stays constant for all experiments.
FrameInterval = 20; #seconds
Channel480ExposureTime = 100/1e3; #seconds
Channel405ExposureTime = 150/1e3; #seconds
A81D_conc = 1400; #nM 

df_anal = pd.DataFrame({'Data Location' : tiff_folders,
                        'ATP Concentration (uM)' : ATP_list, #convert into string
                        'ADP Concentration (uM)' : ADP_list,
                        'P Concentration (uM)' : Phosphate_list,
                        'NCD Micro Motor Concentration (uM)' : [1]*len(tiff_folders), # Same motor concentrations for this set of experiments
                        'r-squared for exponential fit' : exp_r2_list,
                        'Tau (s)' : [params[0] for params in exponential_parameters_list],
                        'A0 (uM)' : [params[1] for params in exponential_parameters_list],
                        'Ainf (uM)' : [params[2] for params in exponential_parameters_list],
                        'Exponential Fitting Start Time (seconds)' : exponential_fit_start_time_list, 
                        'Hydrolysis Rate (uM/s/motor) from Exponential Curve' : exponential_hydrolysis_rates_list,
                        'Linear Data Regime (start and end time in seconds)' : linear_data_regime_list,
                        'Hydrolysis Rate (uM/s/motor) from Linear Fitting (-abs(Slope)/Motconc)' : [-lparam[0]/Motconc for lparam in linear_parameters_list],
                        'Y-intercept of Linear Curve from Linear Fitting' : [lparam[1] for lparam in linear_parameters_list],
                        'r-squared for linear fit' : linear_r2_list,
                        'Cal_Param [Km, Rmax, Rmin, n]' : [[67.60201128,  3.36417414,  1.06783864,  1.17289855]]*len(tiff_folders), # Same callibration for each experiment 
                        'Frame Interval (s)': [20]*len(tiff_folders), 
                        '480 Channel Exposure Time (s)': [Channel480ExposureTime]*len(tiff_folders), 
                        '405 Channel Exposure Time (s)': [Channel405ExposureTime]*len(tiff_folders), 
                        'A81D Concentration (nM)': [A81D_conc]*len(tiff_folders),
                        'Time Array (s)': [list(time) for time in time_list], 
                        'ATP Curve (uM)':  [list(ATP) for ATP in ATP_data_list],
                        'Bound Curve':  [list(bound_array) for bound_array in Bound_data_list],
                        'Unbound Curve':  [list(unbound_array) for unbound_array in Unbound_data_list],
                        'Ratio (A.U.)':  [list(ratio) for ratio in Ratio_list]
                        })

df_anal.to_csv('../../analyzed_data/atp-hydro/ATP.csv')