## Import required packages

In [1]:
from typing import Dict, List, Tuple

import tqdm

import numpy as np
import pandas as pd

import scipy
import scipy.stats

import seaborn as sns
import matplotlib.pyplot as plt

import pathlib
import tttrlib

import os

from feda_tools import twodim_hist as tdh
from feda_tools import utilities as utils
from feda_tools import analysis as an
from feda_tools import fileselector as fs

from decimal import Decimal, getcontext

import numpy.ma as ma
from scipy.stats import norm
from scipy.stats import halfnorm

import pickle

ModuleNotFoundError: No module named 'PyQt5'

## Load the target PTU file

-  Run the next cell to be prompted to select your files.


In [28]:
ptu_files = fs.gui_fname()
ptu_files.sort()

In [13]:
# Don't run this, for testing purposes only.

# Absolute path to the repository
repo_path = '/Users/frankie/Documents/source/repos/feda_tools/'
# get all PTU files in the target directory.
dir = repo_path + '/test data/2022/03_02_22_Troubleshooting_detection_efficiencies/Combined_old_thresholds/'
ptu_files = utils.get_ptu_files(dir)
ptu_files.sort()
file_ptu = ptu_files[0]
# file_irf = dir + 'H2O_300s_adjust_thresholds.ptu'
# file_bkg = dir + 'PBS_300s_adjust_thresholds.ptu'

if pathlib.Path(dir+file_ptu).exists():
    print("The path exists.")
else:
    print("The path does not exist.")


The path exists.


## Initialize tttrlib data and extract important global data

In [2]:
# define analysis window for subset of PTU
min_event = 0
max_event = 300000

# data_ptu = tttrlib.TTTR(dir + file_ptu, 'PTU')
data_ptu = tttrlib.TTTR(ptu_files[0], 'PTU')
routing_channels =  data_ptu.routing_channels

# total duration in seconds
all_macro_times = data_ptu.macro_times
micro_res = data_ptu.get_header().micro_time_resolution
macro_res =data_ptu.get_header().macro_time_resolution
total_duration = all_macro_times[-1] * macro_res

NameError: name 'ptu_files' is not defined

## Determine analysis settings for bur, bg4, by4, and br4 calculations.

In [5]:
# photon count threshold for burst selection
min_photon_count = 60

# MLE parameters
num_bins = 128
bin_width = macro_res/micro_res/num_bins/1000 # in nanoseconds

## Burst Selection: Calculate Interphoton Arrival Time 

- Each detected photon has a time of detection encoded by the macro time + the micro time. **all_macro_times** and **all_micro_times** are arrays whose index is represents the detected photons in order of detection, while the value represents the associated macro or micro time for each photon.
- **macro_res** and **micro_res** represent the resolution of the macro and micro times in seconds.
- The **macro time** indicates the time in units of **macro_res** that the excitation laser was last fired directly before this photon was detected.
- The **micro time** indicates the amount of time in units of **micro_res** that has elapsed since the excitation laser was last fired at which the photon was detected, i.e. it's the amount of time elapsed from the macro time at which the photon was detected.
- The interphoton arrival time is calculated by iterating through **all_macro_times** and **all_micro_times** and calculating the time elapsed between each photon detection event.

In [6]:
photon_time_intervals = an.calc_interphoton_arrival_times(data_ptu)

In [7]:
print(photon_time_intervals[-1])

0.4766245069731667


## Burst Selection: Calculate the Log of the Running Average

In [8]:
# Set the window size for the running average
window_size = 30

# Calculate the running average
running_avg = an.calc_running_average(photon_time_intervals, window_size)

# Create x axis array to match the data.
xarr = np.arange(window_size - 1, len(photon_time_intervals))

# Calculate the base 10 log of the running average
logrunavg = np.log10(running_avg)

## Burst Selection: Visualize the Photon Events with an Interactive Plot

In [24]:
# plot the running average as a 2D histogram with 1D histograms on the margins
%matplotlib qt
bins = {"x":141, "y": 141}
xrange = {"min" : min_event, "max" : max_event}
yrange = {"min" : -6, "max" : 2}
fig, ax, twodimdata = tdh.make_plot(xarr, logrunavg, "Photon Event #",r"$\Delta T_{photon events}$" ,xrange ,yrange, bins)

## Burst Selection: Estimate the Mean of the Gaussian Background Noise 


Check that the mean is estimated well by the max counts. The data on the right-half (blue/purple) estimates the right half of the Gaussian noise. The left-most bin of the right-half data is the estimated mean. When the estimated mean is well aligned with the peak, then you may continue to the next step.

In [23]:
# Bin the log of the running avg interphoton arrival times to find the overall data profile.
counts_logrunavg, bins_logrunavg = np.histogram(logrunavg, bins = bins['y'])

# Find the index of the maximum counts value.
index_of_max = np.argmax(counts_logrunavg)

# Use the index of the max counts to find the corresponding interphoton time bin.
mean_est = bins_logrunavg[index_of_max]

# Compress the filtered data to remove the masked values for plotting
filtered_logrunavg = ma.masked_less(logrunavg, mean_est).compressed()

# Plot to inspect the result
counts_logrunavg, bins_logrunavg, _ = plt.hist(logrunavg, bins = bins['y'], alpha=0.6, color='r')
plt.hist(filtered_logrunavg, bins = bins_logrunavg, alpha=0.6, color='b')
plt.xlabel(r"$\Delta T_{photon events}$")
plt.ylabel("Photon Event Counts")
plt.show()

## Burst Selection: Fit a Half-Norm to the Right Tail of the Data and Extract the Std. Dev.

In [19]:
# fit with halfnorm. visualize for best fit testing. get mu and std dev. consider finding max and setting location as mean
mu, std = halfnorm.fit(filtered_logrunavg)

# counts_logrunavg, bins_logrunavg, _ = plt.hist(logrunavg, bins = bins['y'], density= True, alpha=0.6, color='r')
plt.hist(filtered_logrunavg, bins = bins['y'], density = True, alpha=0.6, color='r')

# Plot the PDF.
xmin, xmax = plt.xlim()
x = np.linspace(xmin, xmax, 100)
p = halfnorm.pdf(x, mu, std)

plt.plot(x, p, 'k', linewidth=2)
title = "Fit Values: $\mu = $ {:.2f} and $\sigma = $ {:.2f}".format(mu, std)
plt.title(title)
plt.xlabel(r"$\Delta T_{photon events}$")
plt.ylabel("Photon Event Counts")
# Display the plot
plt.show()

## Burst Selection: Filter Out the Noise and Plot to Inspect
Set the threshold to 4sigma to the left of the mean, effectively isolating the protien dynamics.

In [20]:
### Using std from halfnorm fit, set the threshold for filtering out noise. Then, filter out noise. Raise 10 to threshold later for burst selection
threshold_value = mu - 4*std #raise 10 to the power of this threshold to obtain the threshold in Paris.
filtered_values = ma.masked_greater(logrunavg, threshold_value)

In [21]:
# visualize the log running average and the threshold values
plt.plot(xarr, logrunavg, label='Running Average', linestyle='None', marker = 'o', markersize = 5)
plt.plot(xarr, filtered_values, label='Threshold Values', linestyle='None', marker = '.', markersize = 5)
plt.xlabel('Photon Event #')
plt.ylabel('log(Photon Interval Time)')
plt.legend()
plt.xlim(min_event, max_event)
plt.show()

## Burst Selection: Create the Burst Index By Filtering with the Threshold.
- The **burst_index** will contain a burst at each index. A burst is a collection of photon events that we hope contains protien dynamics information!!!

In [26]:
### Get a burst index. Each list is a burst, and each list contains the indices of 
### the photon events in the original data.
burst_index = an.extract_unmasked_indices(filtered_values)

# Store result in the burst dict.
burst_dict = {ptu_files[0] : burst_index}

## Burst Selection: Generate the Burst Indices for the Remaining PTU Files in the Directory
Using the parameters for the first PTU file, batch process the remaining PTU files.

In [39]:
import time

start_time = time.time()
for file in ptu_files[1:]:
    filename = file.split("/")[-1]
    print("Calculating bursts for " + filename)
    data_ptu = tttrlib.TTTR(file, 'PTU')
    photon_time_intervals = an.calc_interphoton_arrival_times(data_ptu)
    window_size = 30
    running_avg = an.calc_running_average(photon_time_intervals, window_size)
    xarr = np.arange(window_size - 1, len(photon_time_intervals))
    logrunavg = np.log10(running_avg)
    filtered_values = ma.masked_greater(logrunavg, threshold_value)
    burst_index = an.extract_unmasked_indices(filtered_values)
    burst_dict[file] = burst_index
    # Record the end time
end_time = time.time()

# Calculate the duration
duration = end_time - start_time
    
# Print the duration in seconds
print(f"The loop took {duration} seconds to run.")

Calculating bursts for Split_After_Adjust_LF_7200s_pinhole6-000001.ptu
Calculating bursts for Split_After_Adjust_LF_7200s_pinhole6-000002.ptu
Calculating bursts for Split_After_Adjust_LF_7200s_pinhole6-000003.ptu
Calculating bursts for Split_After_Adjust_LF_7200s_pinhole6-000004.ptu
Calculating bursts for Split_After_Adjust_LF_7200s_pinhole6-000005.ptu
Calculating bursts for Split_After_Adjust_LF_7200s_pinhole6-000006.ptu
Calculating bursts for Split_After_Adjust_LF_7200s_pinhole6-000007.ptu
The loop took 166.49379777908325 seconds to run.


In [40]:
directory = os.path.dirname(ptu_files[0])
with open( directory + '/burst_dict.pkl', 'wb' ) as file:
    pickle.dump(burst_dict, file)

In [35]:
# test that the loaded burst is identical to the pickled burst.
with open(directory + '/burst_dict.pkl', 'rb') as file:
    loaded_burst_dict = pickle.load(file)

In [36]:
loaded_burst_dict == burst_dict

True