## Import required packages

In [1]:
from typing import Dict, List, Tuple

import tqdm

import numpy as np
import pandas as pd

import scipy
import scipy.stats

import seaborn as sns
import matplotlib.pyplot as plt

import pathlib
import tttrlib

import os

from feda_tools import twodim_hist as tdh
from feda_tools import utilities as utils
from feda_tools import analysis as an

from decimal import Decimal, getcontext

import numpy.ma as ma
from scipy.stats import norm
from scipy.stats import halfnorm

import pickle

## Load the target PTU file

-  **repo_path** is the path to the feda_tools repository. It must be updated to reflect the location of the repo on the system running this notebook before continuing with the analysis. <br>
-  **dir** is the directory in which the target PTU file is located. <br>
- **file_ptu** is the target PTU file on which the analysis will be performed. <br>


In [3]:
# Load PTU Files
# file_path = pathlib.Path('//130.127.188.19/projects/FoxP_FKH-DNA/20220314_FoxP1_data_all_NK/20220310_V78C_monomer_NK/V78C_monomer_FoxP1_1hr/burstwise_All 0.2027#30')
# bid_path = pathlib.Path('//130.127.188.19/projects/FoxP_FKH-DNA/20220314_FoxP1_data_all_NK/20220310_V78C_monomer_NK/V78C_monomer_FoxP1_1hr/burstwise_All 0.2027#30/BIDs_30ph')

### for testing purposes ###
# file_path = pathlib.Path('C:/Users/2administrator/Documents/source/repos/feda_tools/test data/2022/03_02_22_Troubleshooting_detection_efficiencies/Combined_old_thresholds/Split_After_Adjust_HF_54000s_pinhole6-000000.ptu')

# #total time 816.9 seconds for this file
# dir = 'C:/Users/2administrator/Documents/source/repos/feda_tools/test data/2022/03_02_22_Troubleshooting_detection_efficiencies/'
# file_ptu = dir + 'Combined_old_thresholds/Split_After_Adjust_HF_54000s_pinhole6-000000.ptu'
# file_irf = dir + 'H2O_300s_adjust_thresholds.ptu'
# file_bkg = dir + 'PBS_300s_adjust_thresholds.ptu'


In [45]:
# Absolute path to the repository
repo_path = '/Users/frankie/Documents/source/repos/feda_tools/'

# Load PTU Files
# file_path = pathlib.Path('//130.127.188.19/projects/FoxP_FKH-DNA/20220314_FoxP1_data_all_NK/20220310_V78C_monomer_NK/V78C_monomer_FoxP1_1hr/burstwise_All 0.2027#30')
# bid_path = pathlib.Path('//130.127.188.19/projects/FoxP_FKH-DNA/20220314_FoxP1_data_all_NK/20220310_V78C_monomer_NK/V78C_monomer_FoxP1_1hr/burstwise_All 0.2027#30/BIDs_30ph')

### for testing purposes ###
# file_path = pathlib.Path('C:/Users/2administrator/Documents/source/repos/feda_tools/test data/2022/03_02_22_Troubleshooting_detection_efficiencies/Combined_old_thresholds/Split_After_Adjust_HF_54000s_pinhole6-000000.ptu')

# get all PTU files in the target directory.
dir = repo_path + '/test data/2022/03_02_22_Troubleshooting_detection_efficiencies/Combined_old_thresholds/'
ptu_files = utils.get_ptu_files(dir)
ptu_files.sort()
file_ptu = ptu_files[0]
# file_irf = dir + 'H2O_300s_adjust_thresholds.ptu'
# file_bkg = dir + 'PBS_300s_adjust_thresholds.ptu'

if pathlib.Path(file_ptu).exists():
    print("The path exists.")
else:
    print("The path does not exist.")


The path does not exist.


## Initialize tttrlib data and extract important global data

In [22]:
# define analysis window for subset of PTU
min_event = 0
max_event = 300000

data_ptu = tttrlib.TTTR(file_ptu, 'PTU')
routing_channels =  data_ptu.routing_channels

# total duration in seconds
all_macro_times = data_ptu.macro_times
micro_res = data_ptu.get_header().micro_time_resolution
macro_res =data_ptu.get_header().macro_time_resolution
total_duration = all_macro_times[-1] * macro_res

# data_irf = tttrlib.TTTR(file_irf, 'PTU')
# all_macro_times_irf = data_irf.macro_times
# all_micro_times_irf = data_irf.micro_times
# routing_channels_irf =  data_irf.routing_channels

# data_bkg = tttrlib.TTTR(file_bkg, 'PTU')
# all_macro_times_bkg = data_bkg.macro_times
# all_micro_times_bkg = data_bkg.micro_times
# routing_channels_bkg =  data_bkg.routing_channels

## Determine analysis settings for bur, bg4, by4, and br4 calculations.

In [23]:
# photon count threshold for burst selection
min_photon_count = 60

# this window changes for br4 and by4
# bg4 parameters
bg4_micro_time_min = 0
bg4_micro_time_max = 12499

# flourescence anisotropy parameters
g_factor = 0.4
l1_japan_corr = 0.0308
l2_japan_corr = 0.0368

# # bkg signals required for r Scatter calculations
bg4_bkg_para = 0
bg4_bkg_perp = 0


# MLE parameters
num_bins = 128
bin_width = macro_res/micro_res/num_bins/1000 # in nanoseconds

## Burst Selection: Calculate Interphoton Arrival Time 

- Each detected photon has a time of detection encoded by the macro time + the micro time. **all_macro_times** and **all_micro_times** are arrays whose index is represents the detected photons in order of detection, while the value represents the associated macro or micro time for each photon.
- **macro_res** and **micro_res** represent the resolution of the macro and micro times in seconds.
- The **macro time** indicates the time in units of **macro_res** that the excitation laser was last fired directly before this photon was detected.
- The **micro time** indicates the amount of time in units of **micro_res** that has elapsed since the excitation laser was last fired at which the photon was detected, i.e. it's the amount of time elapsed from the macro time at which the photon was detected.
- The interphoton arrival time is calculated by iterating through **all_macro_times** and **all_micro_times** and calculating the time elapsed between each photon detection event.

In [24]:
photon_time_intervals = an.calc_interphoton_arrival_times(data_ptu)

In [26]:
print(photon_time_intervals[-1])

2.3073616332567326


## Burst Selection: Calculate the Log of the Running Average

In [27]:
# Set the window size for the running average
window_size = 30

# Calculate the running average
running_avg = an.calc_running_average(photon_time_intervals, window_size)

# Create x axis array to match the data.
xarr = np.arange(window_size - 1, len(photon_time_intervals))

# Calculate the base 10 log of the running average
logrunavg = np.log10(running_avg)

## Burst Selection: Visualize the Photon Events with an Interactive Plot

In [29]:
# plot the running average as a 2D histogram with 1D histograms on the margins
%matplotlib qt
bins = {"x":141, "y": 141}
xrange = {"min" : min_event, "max" : max_event}
yrange = {"min" : -6, "max" : 2}
fig, ax, twodimdata = tdh.make_plot(xarr, logrunavg, "x", "y",xrange ,yrange, bins)

## Burst Selection: Estimate the Mean of the Gaussian Background Noise 


Check that the mean is estimated well by the max counts. The data on the right-half (blue/purple) estimates the right half of the Gaussian noise. The left-most bin of the right-half data is the estimated mean. When the estimated mean is well aligned with the peak, then you may continue to the next step.

In [31]:
# Bin the log of the running avg interphoton arrival times to find the overall data profile.
counts_logrunavg, bins_logrunavg = np.histogram(logrunavg, bins = bins['y'])

# Find the index of the maximum counts value.
index_of_max = np.argmax(counts_logrunavg)

# Use the index of the max counts to find the corresponding interphoton time bin.
mean_est = bins_logrunavg[index_of_max]

# Compress the filtered data to remove the masked values for plotting
filtered_logrunavg = ma.masked_less(logrunavg, mean_est).compressed()

# Plot to inspect the result
counts_logrunavg, bins_logrunavg, _ = plt.hist(logrunavg, bins = bins['y'], alpha=0.6, color='r')
plt.hist(filtered_logrunavg, bins = bins_logrunavg, alpha=0.6, color='b')
plt.show()

## Burst Selection: Fit a Half-Norm to the Right Tail of the Data and Extract the Std. Dev.

In [33]:
# fit with halfnorm. visualize for best fit testing. get mu and std dev. consider finding max and setting location as mean
mu, std = halfnorm.fit(filtered_logrunavg)

# counts_logrunavg, bins_logrunavg, _ = plt.hist(logrunavg, bins = bins['y'], density= True, alpha=0.6, color='r')
plt.hist(filtered_logrunavg, bins = bins['y'], density = True, alpha=0.6, color='r')

# Plot the PDF.
xmin, xmax = plt.xlim()
x = np.linspace(xmin, xmax, 100)
p = halfnorm.pdf(x, mu, std)

plt.plot(x, p, 'k', linewidth=2)
title = "Fit Values: {:.2f} and {:.2f}".format(mu, std)
plt.title(title)

# Display the plot
plt.show()

## Burst Selection: Filter Out the Noise and Plot to Inspect
Set the threshold to 4sigma to the left of the mean, effectively isolating the protien dynamics.

In [34]:
### Using std from halfnorm fit, set the threshold for filtering out noise. Then, filter out noise. Raise 10 to threshold later for burst selection
threshold_value = mu - 4*std #raise 10 to the power of this threshold to obtain the threshold in Paris.
filtered_values = ma.masked_greater(logrunavg, threshold_value)

In [35]:
# visualize the log running average and the threshold values
plt.plot(xarr, logrunavg, label='Running Average', linestyle='None', marker = 'o', markersize = 5)
plt.plot(xarr, filtered_values, label='Threshold Values', linestyle='None', marker = '.', markersize = 5)
plt.xlabel('Photon Event #')
plt.ylabel('log(Photon Interval Time)')
plt.legend()
plt.xlim(min_event, max_event)
plt.show()

  el.exec() if hasattr(el, "exec") else el.exec_()


## Burst Selection: Create the Burst Index By Filtering with the Threshold.
- The **burst_index** will contain a burst at each index. A burst is a collection of photon events that we hope contains protien dynamics information!!!

In [47]:
### Get a burst index. Each list is a burst, and each list contains the indices of 
### the photon events in the original data.
burst_index = an.extract_unmasked_indices(filtered_values)

# Store result in the burst dict.
burst_dict = {file_ptu : burst_index}

## Burst Selection: Generate the Burst Indices for the Remaining PTU Files in the Directory
Using the parameters for the first PTU file, batch process the remaining PTU files.

In [49]:
for file in ptu_files[1:]:
    print(file)
    filtered_values = ma.masked_greater(logrunavg, threshold_value)
    burst_index = an.extract_unmasked_indices(filtered_values)
    burst_dict[file] = burst_index

Split_After_Adjust_HF_54000s_pinhole6-000001.ptu
Split_After_Adjust_HF_54000s_pinhole6-000002.ptu
Split_After_Adjust_HF_54000s_pinhole6-000003.ptu
Split_After_Adjust_HF_54000s_pinhole6-000004.ptu
Split_After_Adjust_HF_54000s_pinhole6-000005.ptu
Split_After_Adjust_HF_54000s_pinhole6-000006.ptu
Split_After_Adjust_HF_54000s_pinhole6-000007.ptu
Split_After_Adjust_HF_54000s_pinhole6-000008.ptu
Split_After_Adjust_HF_54000s_pinhole6-000009.ptu
Split_After_Adjust_LF_7200s_pinhole6-000000.ptu
Split_After_Adjust_LF_7200s_pinhole6-000001.ptu
Split_After_Adjust_LF_7200s_pinhole6-000002.ptu
Split_After_Adjust_LF_7200s_pinhole6-000003.ptu
Split_After_Adjust_LF_7200s_pinhole6-000004.ptu
Split_After_Adjust_LF_7200s_pinhole6-000005.ptu
Split_After_Adjust_LF_7200s_pinhole6-000006.ptu
Split_After_Adjust_LF_7200s_pinhole6-000007.ptu


In [50]:
with open( dir + 'burst_dict.pkl', 'wb') as file:
    pickle.dump(burst_dict, file)