## Prepare Dataset and Specify Parameters (please only edit cells in this section)


In [None]:
# Parameters to specify

# Specify here at what frame NC14 starts
nc14_start_frame = 400

# Any trace with frame number smaller than min_frames will be filtered out
min_frames = 40

# Number of bins you want to split the full embryo into
num_bins = 42

In [14]:
# Dataset Directory

dataset_folder = '/mnt/Data1/Nick/transcription_pipeline/'

RBSPWM_datasets = [
    "test_data/2024-02-26/Halo-RBSPWM_embryo01",
    "test_data/2024-02-26/Halo-RBSPWM_embryo02",
    "test_data/2024-05-07/Halo552-RBSPWM_embryo01",
    "test_data/2024-05-07/Halo552-RBSPWM_embryo02",
    "test_data/2024-05-09/Halo552-RBSPWM_embryo01",
]

RBSVar2_datasets = [
    "test_data/2024-07-23/Halo673_RBSVar2_embryo01",
    "test_data/2024-07-25/Halo673_RBSVar2_embryo01",
    "test_data/2024-10-10/Halo673_RBSVar2_embryo01",
    "test_data/2024-10-10/Halo673_RBSVar2_embryo02",
]
MCP_mSG_datasets = [
    "test_data/2024-10-31/MCP-mSG_ParB-mScar_RBSPWM_embryo01",
    "test_data/2024-10-31/MCP-mSG_ParB-mScar_RBSPWM_embryo02",
    ]

test_dataset_name = dataset_folder + RBSVar2_datasets[1]
print('Dataset Path: ' + test_dataset_name)

Dataset Path: /mnt/Data1/Nick/transcription_pipeline/test_data/2024-07-25/Halo673_RBSVar2_embryo01


In [15]:
# Import pipeline
from transcription_pipeline import nuclear_pipeline
from transcription_pipeline import preprocessing_pipeline

from transcription_pipeline import spot_pipeline
from transcription_pipeline import fullEmbryo_pipeline

from transcription_pipeline.spot_analysis import compile_data
from transcription_pipeline.utils import plottable

import os
import matplotlib.pyplot as plt
import matplotlib as mpl

In [16]:
# Specify how you would want the plots to be shown: Use TkAgg if you use PyCharm, or widget if you use a browser

mpl.use('TkAgg')
# %matplotlib widget

## Import Dataset

### Import MS2 Dataset

Detect whether the dataset has already been converted into `zarr` files, i.e. whether there's "previously" processed data. If so, load the previous results.

In [17]:
ms2_import_previous = os.path.isdir(test_dataset_name + '/collated_dataset')
ms2_import_previous

True

In [18]:
dataset = preprocessing_pipeline.DataImport(
    name_folder=test_dataset_name,
    trim_series=True,
    working_storage_mode='zarr',
    import_previous=ms2_import_previous, 
)

### Import FullEmbryo Dataset

In [19]:
FullEmbryo_dataset = preprocessing_pipeline.FullEmbryoImport(
    name_folder=test_dataset_name,
    #import_previous=True
)
# Loading FullEmbryo dataset is not working currently, but reported to Yovan where it only reads in the last channel
# FullEmbryo_dataset.save()

## Starting a DASK Client for parallel processing

In [None]:
from dask.distributed import LocalCluster, Client

try:
    cluster = LocalCluster(
        host="localhost",
        scheduler_port=37763,
        threads_per_worker=1,
        n_workers=14,
        memory_limit="6GB",
    )
    
    client = Client(cluster)
except:
    print("Cluster already running")
    client = Client('localhost:37763')

print(client)

In [None]:
client.restart()

In [None]:
client

## Nuclear Tracking

Detect whether the nuclear tracking has been done "previously." If so, load the previous results.

In [20]:
nuclear_tracking_previous = os.path.isdir(test_dataset_name + '/nuclear_analysis_results')
nuclear_tracking_previous

True

In [21]:
if nuclear_tracking_previous:
    # Load nuclear tracking results
    print('Load from previous nuclear tracking results')
    
    nuclear_tracking = nuclear_pipeline.Nuclear()
    nuclear_tracking.read_results(name_folder=test_dataset_name)
    
else:
    # Do nuclear tracking and save the results
    print('Do nuclear tracking for the dataset')
    
    nuclear_tracking = nuclear_pipeline.Nuclear(
        data=dataset.channels_full_dataset[0],
        global_metadata=dataset.export_global_metadata[0],
        frame_metadata=dataset.export_frame_metadata[0],
        series_splits=dataset.series_splits,
        series_shifts=dataset.series_shifts,
        search_range_um=1.5,
        stitch=False,
        stitch_max_distance=4,
        stitch_max_frame_distance=2,
        client=client,
        keep_futures=False,
    )
    
    nuclear_tracking.track_nuclei(
            working_memory_mode="zarr",
            working_memory_folder=test_dataset_name,
            trackpy_log_path="".join([test_dataset_name, "trackpy_log"]),
        )
        # Saves tracked nuclear mask as a zarr, and pickles dataframes with segmentation and
        # tracking information.
    nuclear_tracking.save_results(
            name_folder=test_dataset_name, save_array_as=None
        )

Load from previous nuclear tracking results


## Spot Tracking

Detect whether the spot tracking has been done "previously." If so, load the previous results.

In [22]:
spot_tracking_previous = os.path.isdir(test_dataset_name + '/spot_analysis_results')
spot_tracking_previous

True

In [23]:
%%time

if spot_tracking_previous:
    # Load spot tracking results
    print('Load from spot tracking results')
    
    spot_tracking = spot_pipeline.Spot()
    spot_tracking.read_results(name_folder=test_dataset_name)
    
else:
    # Do spot tracking and save the results
    print('Do spot tracking for the dataset')
    
    spot_tracking = spot_pipeline.Spot(
        data=dataset.channels_full_dataset[1],
        global_metadata=dataset.export_global_metadata[1],
        frame_metadata=dataset.export_frame_metadata[1],
        labels=None,#nuclear_tracking.reordered_labels,
        expand_distance=3,
        search_range_um=4.2,
        retrack_search_range_um=4.5,
        threshold_factor=1.3,
        memory=3,
        retrack_after_filter=False,
        stitch=True,
        min_track_length=0,
        series_splits=dataset.series_splits,
        series_shifts=dataset.series_shifts,
        keep_bandpass=False,
        keep_futures=False,
        keep_spot_labels=False,
        evaluate=True,
        retrack_by_intensity=True,
        client=client,
    )
    
    spot_tracking.extract_spot_traces(
        working_memory_folder=test_dataset_name, 
        stitch=True,
        retrack_after_filter=True,
        trackpy_log_path = test_dataset_name+'/trackpy_log'
    )
    
    # Saves tracked spot mask as a zarr, and pickles dataframes with spot fitting and
    # quantification information.
    spot_tracking.save_results(name_folder=test_dataset_name, save_array_as=None)

Load from spot tracking results
CPU times: user 1.07 s, sys: 37.5 ms, total: 1.11 s
Wall time: 1.26 s


### Make Compiled Dataframe

In [24]:
# Load spot tracking dataframe
spot_df = spot_tracking.spot_dataframe

# Remove spots that were not detected
detected_spots = spot_df[spot_df["particle"] != 0]

# Compile traces
compiled_dataframe = compile_data.compile_traces(
    detected_spots,
    compile_columns_spot=[
        "frame",
        "t_s",
        "intensity_from_neighborhood",
        "intensity_std_error_from_neighborhood",
        "x",
        "y"
    ],
    nuclear_tracking_dataframe=None,
)

compiled_dataframe.head()

Unnamed: 0,particle,frame,t_s,intensity_from_neighborhood,intensity_std_error_from_neighborhood,x,y
0,1,"[506, 507, 511, 512, 513, 514, 515, 517, 518, ...","[2300.7059993743896, 2304.7430000305176, 2322....","[72.1570804597701, 53.93129347826087, 95.00423...","[25.268212723555116, 24.193128325434877, 23.27...","[425.48808831836254, 423.8314928296911, 425.13...","[221.00383393743027, 221.58308686421765, 221.5..."
1,2,"[460, 461, 462, 463, 464, 465, 466, 467, 468, ...","[2091.4230003356934, 2095.882999420166, 2100.1...","[57.47963945578232, 38.66103448275862, 90.2138...","[27.24615631798166, 25.318037332850935, 24.569...","[122.58614821001362, 123.76488548423376, 126.0...","[17.180038487563998, 17.208933264743397, 18.12..."
2,3,"[527, 528, 529, 530, 531, 532, 533, 534, 535, ...","[2394.573999404907, 2399.2339992523193, 2403.4...","[67.31827607361961, 10.19521568627451, 67.1475...","[26.219966228377146, 26.957198706766793, 25.65...","[588.016012440591, 586.074051884851, 588.18053...","[48.20524698088067, 48.37057827336589, 48.2503..."
3,4,"[572, 573, 574, 576, 577, 578, 580, 581, 582, ...","[2601.6180000305176, 2605.8780002593994, 2610....","[131.19976923076925, 114.73455625, 112.1208231...","[23.632369088123415, 26.48124363284603, 24.361...","[247.29672998614768, 245.9633552657412, 245.16...","[119.09766185700249, 119.2585299587735, 118.06..."
4,5,"[486, 489, 490, 491, 492, 493, 494, 495, 496, ...","[2211.6900005340576, 2225.287000656128, 2229.7...","[49.39227659574468, 71.93047777777778, 75.6659...","[23.603968369418688, 23.202337971412053, 23.80...","[473.56080767748307, 471.83975365873124, 471.4...","[127.7748944142374, 128.7753637411546, 128.167..."


## Full Embryo Analysis

In [25]:
plt.figure(figsize=(12,6))

plt.subplot(1, 2, 1)
plt.imshow(FullEmbryo_dataset.channels_full_dataset_surf[0][0, :, :], cmap='gray')
plt.title('Full Embryo Surf')

plt.subplot(1, 2, 2)
plt.imshow(FullEmbryo_dataset.channels_full_dataset_mid[0][0, :, :], cmap='gray')
plt.title('Full Embryo Mid')

plt.tight_layout()
plt.show()

In [26]:
fullEmbryo = fullEmbryo_pipeline.FullEmbryo(FullEmbryo_dataset, dataset, his_channel=0)

In [27]:
fullEmbryo.find_ap_axis(make_plots=True)

In [28]:
compiled_dataframe = fullEmbryo.xy_to_ap(compiled_dataframe)
compiled_dataframe.head()

Unnamed: 0,particle,frame,t_s,intensity_from_neighborhood,intensity_std_error_from_neighborhood,x,y,ap,ap90
0,1,"[506, 507, 511, 512, 513, 514, 515, 517, 518, ...","[2300.7059993743896, 2304.7430000305176, 2322....","[72.1570804597701, 53.93129347826087, 95.00423...","[25.268212723555116, 24.193128325434877, 23.27...","[425.48808831836254, 423.8314928296911, 425.13...","[221.00383393743027, 221.58308686421765, 221.5...","[0.3313042994154892, 0.33061939863823764, 0.33...","[0.12989276429317786, 0.13046486912365637, 0.1..."
1,2,"[460, 461, 462, 463, 464, 465, 466, 467, 468, ...","[2091.4230003356934, 2095.882999420166, 2100.1...","[57.47963945578232, 38.66103448275862, 90.2138...","[27.24615631798166, 25.318037332850935, 24.569...","[122.58614821001362, 123.76488548423376, 126.0...","[17.180038487563998, 17.208933264743397, 18.12...","[0.20607289824970965, 0.20656023392145914, 0.2...","[-0.07141580498769587, -0.07138726677781301, -..."
2,3,"[527, 528, 529, 530, 531, 532, 533, 534, 535, ...","[2394.573999404907, 2399.2339992523193, 2403.4...","[67.31827607361961, 10.19521568627451, 67.1475...","[26.219966228377146, 26.957198706766793, 25.65...","[588.016012440591, 586.074051884851, 588.18053...","[48.20524698088067, 48.37057827336589, 48.2503...","[0.39849964210332955, 0.3976967603478775, 0.39...","[-0.04077345373697253, -0.0406101626655149, -0..."
3,4,"[572, 573, 574, 576, 577, 578, 580, 581, 582, ...","[2601.6180000305176, 2605.8780002593994, 2610....","[131.19976923076925, 114.73455625, 112.1208231...","[23.632369088123415, 26.48124363284603, 24.361...","[247.29672998614768, 245.9633552657412, 245.16...","[119.09766185700249, 119.2585299587735, 118.06...","[0.25763308598055856, 0.25708181719549955, 0.2...","[0.029244134641654648, 0.029403017599059004, 0..."
4,5,"[486, 489, 490, 491, 492, 493, 494, 495, 496, ...","[2211.6900005340576, 2225.287000656128, 2229.7...","[49.39227659574468, 71.93047777777778, 75.6659...","[23.603968369418688, 23.202337971412053, 23.80...","[473.56080767748307, 471.83975365873124, 471.4...","[127.7748944142374, 128.7753637411546, 128.167...","[0.35117942468222907, 0.3504678742505178, 0.35...","[0.037814288423547136, 0.038802411760013165, 0..."


## RateExtraction Analysis

### Fit and Average

In [29]:
from transcription_pipeline.RateExtraction import FitAndAverage

In [33]:
faadata = FitAndAverage(compiled_dataframe, nc14_start_frame, 40, num_bins, test_dataset_name)

No previous particle trace fit checking results detected. Do particle trace fitting for the dataframe.


  lnpdiff = f + nlp - state.log_prob[j]
  lnpdiff = f + nlp - state.log_prob[j]


Failed to find derivative sign change for trace 301


  lnpdiff = f + nlp - state.log_prob[j]
  lnpdiff = f + nlp - state.log_prob[j]
  lnpdiff = f + nlp - state.log_prob[j]
  lnpdiff = f + nlp - state.log_prob[j]
  lnpdiff = f + nlp - state.log_prob[j]


Failed to fit trace 225
Failed to fit trace 61


  lnpdiff = f + nlp - state.log_prob[j]
  lnpdiff = f + nlp - state.log_prob[j]
  lnpdiff = f + nlp - state.log_prob[j]
  lnpdiff = f + nlp - state.log_prob[j]


Failed to fit trace 183


  lnpdiff = f + nlp - state.log_prob[j]


Failed to fit trace 130


  lnpdiff = f + nlp - state.log_prob[j]
  lnpdiff = f + nlp - state.log_prob[j]


Number of traces: 137
Number of traces with valid fits: 132
Number of traces with invalid fits: 5


In [41]:
faadata.check_particle_fits()

In [40]:
faadata.save_checked_particle_fits()

Checked particle fits saved


In [36]:
faadata.average_particle_fits();

ValueError: zero-size array to reduction operation maximum which has no identity

### Average and Fit

In [None]:
from transcription_pipeline.RateExtraction import AverageAndFit

In [None]:
time_bin_width = dataset.export_frame_metadata[0]['t_s'][1, 0]
aafdata = AverageAndFit(compiled_dataframe, nc14_start_frame, time_bin_width, num_bins, test_dataset_name)

In [None]:
aafdata.check_bin_fits()

In [None]:
aafdata.bin_average_fit_dataframe

In [None]:
aafdata.save_checked_bin_fits()

In [None]:
aafdata.plot_bin_fits()