# Pipeline walkthrough

## Data import

In [1]:
from preprocessing.import_data import import_save_dataset

import napari

trim_series = True
lif_test_name = "test_data/2021-06-14/p2pdpwt"
lsm_test_name = "test_data/2023-04-07/p2pdp_zld-sites-ctrl_fwd_1"

(
    channels_full_dataset,
    original_global_metadata,
    original_frame_metadata,
    export_global_metadata,
    export_frame_metadata,
) = import_save_dataset(lsm_test_name, trim_series=trim_series, mode="tiff")

  warn('Due to an issue with JPype 0.6.0, reading is slower. '
  imsave(collated_data_path, channel_data, plugin="tifffile")
  imsave(collated_data_path, channel_data, plugin="tifffile")


In [2]:
nuclear_channel_metadata = export_frame_metadata[1]
nuclear_channel = channels_full_dataset[1]

In [3]:
viewer = napari.view_image(nuclear_channel, name="Nuclear Channel")
napari.run()

## Starting a Dask Client for parallelization

In [4]:
from nuclear_analysis import segmentation
from tracking import track_features, detect_mitosis

import numpy as np
from dask.distributed import LocalCluster, Client

In [5]:
cluster = LocalCluster(
    host="localhost",
    scheduler_port=8786,
    threads_per_worker=1,
    n_workers=12,
    memory_limit="4GB",
)

In [6]:
client = Client(cluster)

In [7]:
client

0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: http://127.0.0.1:8787/status,

0,1
Dashboard: http://127.0.0.1:8787/status,Workers: 12
Total threads: 12,Total memory: 44.70 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:8786,Workers: 12
Dashboard: http://127.0.0.1:8787/status,Total threads: 12
Started: Just now,Total memory: 44.70 GiB

0,1
Comm: tcp://127.0.0.1:33621,Total threads: 1
Dashboard: http://127.0.0.1:37051/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:42861,
Local directory: /tmp/dask-scratch-space/worker-4t6f21zm,Local directory: /tmp/dask-scratch-space/worker-4t6f21zm

0,1
Comm: tcp://127.0.0.1:43055,Total threads: 1
Dashboard: http://127.0.0.1:46395/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:34137,
Local directory: /tmp/dask-scratch-space/worker-2zp2ttgl,Local directory: /tmp/dask-scratch-space/worker-2zp2ttgl

0,1
Comm: tcp://127.0.0.1:45633,Total threads: 1
Dashboard: http://127.0.0.1:36345/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:41591,
Local directory: /tmp/dask-scratch-space/worker-9ekskksm,Local directory: /tmp/dask-scratch-space/worker-9ekskksm

0,1
Comm: tcp://127.0.0.1:38667,Total threads: 1
Dashboard: http://127.0.0.1:40173/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:46173,
Local directory: /tmp/dask-scratch-space/worker-evl6emna,Local directory: /tmp/dask-scratch-space/worker-evl6emna

0,1
Comm: tcp://127.0.0.1:41289,Total threads: 1
Dashboard: http://127.0.0.1:37617/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:44123,
Local directory: /tmp/dask-scratch-space/worker-xc8v15db,Local directory: /tmp/dask-scratch-space/worker-xc8v15db

0,1
Comm: tcp://127.0.0.1:37961,Total threads: 1
Dashboard: http://127.0.0.1:35713/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:34219,
Local directory: /tmp/dask-scratch-space/worker-9_5y24xz,Local directory: /tmp/dask-scratch-space/worker-9_5y24xz

0,1
Comm: tcp://127.0.0.1:41533,Total threads: 1
Dashboard: http://127.0.0.1:39025/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:35419,
Local directory: /tmp/dask-scratch-space/worker-8__oe_gg,Local directory: /tmp/dask-scratch-space/worker-8__oe_gg

0,1
Comm: tcp://127.0.0.1:34427,Total threads: 1
Dashboard: http://127.0.0.1:33479/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:33225,
Local directory: /tmp/dask-scratch-space/worker-y9glu6m2,Local directory: /tmp/dask-scratch-space/worker-y9glu6m2

0,1
Comm: tcp://127.0.0.1:39719,Total threads: 1
Dashboard: http://127.0.0.1:39633/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:46033,
Local directory: /tmp/dask-scratch-space/worker-762en263,Local directory: /tmp/dask-scratch-space/worker-762en263

0,1
Comm: tcp://127.0.0.1:44775,Total threads: 1
Dashboard: http://127.0.0.1:36509/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:43799,
Local directory: /tmp/dask-scratch-space/worker-gpmghdok,Local directory: /tmp/dask-scratch-space/worker-gpmghdok

0,1
Comm: tcp://127.0.0.1:33233,Total threads: 1
Dashboard: http://127.0.0.1:46837/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:35157,
Local directory: /tmp/dask-scratch-space/worker-6wysuqw5,Local directory: /tmp/dask-scratch-space/worker-6wysuqw5

0,1
Comm: tcp://127.0.0.1:39075,Total threads: 1
Dashboard: http://127.0.0.1:40593/status,Memory: 3.73 GiB
Nanny: tcp://127.0.0.1:42657,
Local directory: /tmp/dask-scratch-space/worker-fhs66sn5,Local directory: /tmp/dask-scratch-space/worker-fhs66sn5


## Nuclear segmentation and tracking

In [8]:
%%time

(
    denoised,
    denoised_futures,
    nuclear_channel_futures,
) = segmentation.denoise_movie_parallel(
    nuclear_channel,
    denoising="gaussian",
    denoising_sigma=3,
    client=client,
)

mask, mask_futures, _ = segmentation.binarize_movie_parallel(
    denoised_futures,
    thresholding="global_otsu",
    closing_footprint=segmentation.ellipsoid(3, 3),
    client=client,
    futures_in=False,
)

markers, markers_futures, _ = segmentation.mark_movie_parallel(
    *nuclear_channel_futures,  # Wrapped in list from previous parallel run, needs unpacking
    mask_futures,
    low_sigma=[3, 5.5, 5.5],
    high_sigma=[10, 14.5, 14.5],
    max_footprint=((1, 25), segmentation.ellipsoid(3, 3)),
    max_diff=1,
    client=client,
    futures_in=False,
)

marker_coords = np.array(np.nonzero(markers)).T

labels, labels_futures, _ = segmentation.segment_movie_parallel(
    denoised_futures,
    markers_futures,
    mask_futures,
    watershed_method="raw",
    min_size=200,
    client=client,
    futures_in=False,
)

num_nuclei_per_fov = {12: (30, 50), 13: (70, 100), 14: (140, 220)}

segmentation_dataframe, division_frames, nuclear_cycle = track_features.segmentation_df(
    labels,
    nuclear_channel,
    nuclear_channel_metadata,
    num_nuclei_per_fov=num_nuclei_per_fov,
    division_peak_height=0.1,
    min_time_between_divisions=10,
)

tracked_dataframe = track_features.link_df(
    segmentation_dataframe,
    search_range=15,
    # adaptive_stop=1,
    # adaptive_step=0.99,
    memory=0,
    pos_columns=["x", "y"],
    t_column="frame_reverse",
    velocity_predict=True,
    velocity_averaging=2,
    reindex=True,
)

centroids = np.unique(
    np.array(
        [
            [row["frame"] - 1, int(row["z"]), int(row["y"]), int(row["x"])]
            for _, row in tracked_dataframe.iterrows()
        ]
    ),
    axis=0,
)

mitosis_dataframe = detect_mitosis.construct_lineage(
    tracked_dataframe,
    pos_columns=["y", "x"],
    search_range_mitosis=35,
    # adaptive_stop=0.05,
    # adaptive_step=0.99,
    antiparallel_coordinate="collision",
    antiparallel_weight=None,
    min_track_length=3,
    image_dimensions=[256, 512],
    exclude_border=0.02,
    minimum_age=5,
)

reordered_labels, reordered_labels_futures, _ = track_features.reorder_labels_parallel(
    labels_futures,
    mitosis_dataframe,
    client=client,
    futures_in=False,
    futures_out=True,
)

Frame 2: 1 trajectories present.
CPU times: user 24.4 s, sys: 30.8 s, total: 55.2 s
Wall time: 3min 5s


Using the rule of thumb $r \approx \sigma \sqrt{2} \ (2D)$ and $r \approx \sigma \sqrt{3} \ (3D)$ as rough bounds for the kernels used for band-pass filtering seems to net a perfect segmentation.

In [9]:
viewer.add_labels(reordered_labels)

<Labels layer 'reordered_labels' at 0x7fdd386904c0>

In [10]:
_ = detect_mitosis.tracks_to_napari(
    viewer, mitosis_dataframe, name="nuclear_tracks", output=False
)

## Spot segmentation and fitting

In [11]:
transcription_channel_metadata = export_frame_metadata[0]
transcription_channel = channels_full_dataset[0]

In [12]:
viewer.add_image(transcription_channel, name="Transcription Channel")

<Image layer 'Transcription Channel' at 0x7fdddb46c1f0>

In [13]:
from spot_analysis import detection, fitting

In [14]:
%%time

spot_dataframe, spot_mask, bandpassed_movie = detection.detect_and_gather_spots(
    transcription_channel,
    frame_metadata=transcription_channel_metadata,
    low_sigma=[0.1, 0.5, 0.5],
    high_sigma=[3, 1.5, 1.5],
    threshold="triangle",
    min_size=6,
    connectivity=1,
    span=[5, 11, 11],
    pos_columns=["z", "y", "x"],
    return_bandpass=False,
    return_spot_mask=True,
    drop_reverse_time=True,
    client=client,
)

fitting.add_fits_spots_dataframe_parallel(
    spot_dataframe,
    sigma_x_y_guess=1.5,
    sigma_z_guess=2,
    client=client,
    amplitude_guess=None,
    offset_guess=None,
    method="trf",
    inplace=True,
)

CPU times: user 8.36 s, sys: 10.8 s, total: 19.2 s
Wall time: 42.4 s


## Spot filtering and tracking

In [15]:
from spot_analysis import track_filtering

In [16]:
%%time

track_filtering.track_and_filter_spots(
    spot_dataframe,
    nuclear_labels=reordered_labels_futures,
    sigma_x_y_bounds=(0, 2.5),
    sigma_z_bounds=(0.35, 2),
    expand_distance=2,
    search_range=20,
    memory=2,
    pos_columns=["y", "x"],
    t_column="frame_reverse",
    velocity_predict=True,
    velocity_averaging=None,
    min_track_length=3,
    choose_by="amplitude",
    min_or_max="maximize",
    client=client,
)

Frame 167: 17 trajectories present.
CPU times: user 9.05 s, sys: 5.38 s, total: 14.4 s
Wall time: 31 s


In [17]:
%%time

(
    reordered_spot_labels,
    reordered_spot_labels_futures,
    _,
) = track_features.reorder_labels_parallel(
    spot_mask,
    spot_dataframe,
    client=client,
    futures_in=False,
    futures_out=True,
)

CPU times: user 2.02 s, sys: 6.91 s, total: 8.93 s
Wall time: 13.8 s


In [18]:
viewer.add_labels(reordered_spot_labels)

<Labels layer 'reordered_spot_labels' at 0x7fdddbc45bd0>

Upcoming functions:
- ~Filter by sigmas~
- ~Perform IoU over trackpy linking by culling short tracks~
- ~Handle multiple spots in single nucleus~
- ~Reconstruct mask (or just use point labeling)~
- ~Quantify spots by weighing pixel values by Gaussian envelope~
- Extract traces

It can be shown for an xy-symmetric Gaussian that
$$
\int_{\mathbb{R}^3} A e^{- \frac{x^2 + y^2}{2 \sigma_{xy}^2} - \frac{z^2}{2 \sigma_z^2}} \ dx \ dy \ dz = 2 \sqrt{2} A \pi^{3/2} \sigma_{xy}^2 \sigma_z
$$
where we have used notation consistent with that used in `spot_analysis.fitting`. We can therefore use simple algebraic manipulation of the fit parameters to estimate the spot intensities.

In [19]:
import matplotlib.pyplot as plt
import numpy as np

In [20]:
from spot_analysis import compile_data

In [51]:
compiled_dataframe = compile_data.compile_traces(
    spot_dataframe,
    nuclear_tracking_dataframe=mitosis_dataframe,
    compile_columns_nuclear=["nuclear_cycle", "division_time"],
)

In [52]:
compiled_dataframe

Unnamed: 0,particle,frame,t_s,intensity_from_fit,nuclear_cycle,division_time
0,1,"[82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 9...","[1429.7821395514761, 1446.4017570046917, 1463....","[25540.338393366874, 68682.3590293864, 121900....",14,1299.679660
1,2,"[21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 3...","[390.20823597341115, 406.9279614150481, 423.28...","[41555.49991897737, 114601.00635528714, 179446...",13,332.753653
2,4,"[92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102...","[1598.5656637979707, 1615.927142259131, 1632.6...","[23380.82522561254, 60235.688018575114, 55347....",14,1185.900772
3,6,"[26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 3...","[499.9692761500573, 517.3482964728448, 533.544...","[35871.837332101866, 60289.60311456024, 76190....","[12, 13, 14]",9.421215
4,8,"[100, 101, 103, 105, 107, 108, 109, 110, 111, ...","[1733.18599874769, 1748.9085463180804, 1781.92...","[26624.67039676178, 20809.43588216069, 43080.8...",14,1188.338628
...,...,...,...,...,...,...
195,455,"[87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 9...","[1513.4246101351587, 1531.2467132503139, 1548....","[22546.621533274738, 48187.48857872356, 51003....",14,1201.047424
196,457,"[88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 9...","[1530.0629198457405, 1547.7252729781246, 1564....","[28779.05915647891, 31838.767187735084, 76679....",14,1220.247020
197,458,"[86, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 9...","[1496.7371247414553, 1531.482723240012, 1548.6...","[26757.798012918483, 44905.02240988416, 55353....",14,1316.725237
198,459,"[88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 9...","[1530.1277743388719, 1547.1990889675494, 1563....","[54133.726812839195, 61480.46563198308, 115757...",14,1348.960424


In [72]:
def _generate_trace_plot(particle_index, compiled_dataframe):
    """
    Generates a tuple of the time and intensity vectors for a given particle, and the
    particle ID and assigned nuclear cycle of the trace.
    """
    time = (
        compiled_dataframe.loc[particle_number, "t_s"]
        - compiled_dataframe.at[particle_number, "division_time"]
    )
    intensity = compiled_dataframe.loc[particle_number, "intensity_from_fit"]
    
    nc = compiled_dataframe.at[particle_number, "nuclear_cycle"]
    particle = compiled_dataframe.at[particle_number, "particle"]

    return (time, intensity, particle, nc)


def generate_trace_plot_list(compiled_dataframe):
    """
    Generates list of tuples of the time and intensity vectors for a given particle,
    and the particle ID and assigned nuclear cycle of the trace.

    :param compiled_dataframe: DataFrame of compiled data indexed by particle.
    :type compiled_dataframe: pandas DataFrame
    :return: List of tuples of the form `(time, intensity, particle, nc)` where `time`
        and `intensity` are vectors corresponding to the trace for `particle`.
    :rtype: List of tuples
    """
    particle_indices = compiled_dataframe.index.values
    trace_list = []
    for particle in particle_indices:
        trace_list.append(_generate_trace_plot(particle, compiled_dataframe))

    return trace_list

In [84]:
# This is taken from https://stackoverflow.com/questions/18390461/scroll-backwards-and-forwards-through-matplotlib-plots

traces = generate_trace_plot_list(compiled_dataframe)

curr_pos = 0

def key_event(e):
    global curr_pos

    if e.key == "right":
        curr_pos = curr_pos + 1
    elif e.key == "left":
        curr_pos = curr_pos - 1
    else:
        return
    curr_pos = curr_pos % len(traces)

    ax.cla()
    ax.plot(traces[curr_pos][0], traces[curr_pos][1], '.')
    ax.set_xlabel("time (s)")
    ax.set_ylabel("Spot intensity (AU)")
    particle = curr_pos[2]
    nc = curr_pos[3]
    ax.set_title(f"Particle {particle}, NC {nc}")
    fig.canvas.draw()

fig = plt.figure()
fig.canvas.mpl_connect('key_press_event', key_event)

ax = fig.add_subplot(111)
ax.plot(traces[curr_pos][0], traces[curr_pos][1], '.')
ax.set_xlabel("time (s)")
ax.set_ylabel("Spot intensity (AU)")
particle = traces[curr_pos][2]
nc = traces[curr_pos][3]
ax.set_title(f"Particle {particle}, NC {nc}")

plt.show()

<IPython.core.display.Javascript object>

In [85]:
particle_number = 100
time = (
    compiled_dataframe.loc[particle_number, "t_s"]
    - compiled_dataframe.at[particle_number, "division_time"]
)
intensity = compiled_dataframe.loc[particle_number, "intensity_from_fit"]
frame = compiled_dataframe.loc[particle_number, "frame"]
nc = compiled_dataframe.at[particle_number, "nuclear_cycle"]
particle = compiled_dataframe.at[particle_number, "particle"]

plt.plot(time, intensity, ".")
plt.xlabel("frame")
plt.ylabel("Intensity (AU)")
plt.title(f"Particle {particle}, NC {nc}")
plt.show()

<IPython.core.display.Javascript object>

In [53]:
compiled_dataframe["nuclear_cycle"].apply(lambda x: x.size > 1).sum()

13

In [71]:
compiled_dataframe.index.values

array([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,
        13,  14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,
        26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,
        39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,
        52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,
        65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,
        78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,
        91,  92,  93,  94,  95,  96,  97,  98,  99, 100, 101, 102, 103,
       104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
       117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129,
       130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
       143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155,
       156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168,
       169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 18