## Prepare Dataset and Specify Parameters (please only edit cells in this section)

In [116]:
# Parameters to specify

# Specify here at what frame NC14 starts
nc14_start_frame = 0

# Any trace with frame number smaller than min_frames will be filtered out
min_frames = 40

# Number of bins you want to split the full embryo into
num_bins = 42

In [117]:
# Dataset Directory

dataset_folder = '/mnt/Data1/Josh/transcription_pipeline/'

# Var2 embryo list
embryo_list = [
    'test_data/NSPARC/2025-03-31/MCP-mSG_His-RFP_Var2(001)_embryo01',
    'test_data/NSPARC/2025-03-31/MCP-mSG_His-RFP_Var2(001)_embryo02',
    'test_data/NSPARC/2025-04-01/MCP-mSG_His-RFP_Var2(001)_embryo20',
    'test_data/NSPARC/2025-04-01/MCP-mSG_His-RFP_Var2(001)_embryo38',
    'test_data/NSPARC/2025-04-14/MCP-mSG_His-RFP_Var2(001)_embryo28',
    'test_data/NSPARC/2025-04-15/MCP-mSG_His-RFP_Var2(001)_embryo01',
    'test_data/NSPARC/2025-04-22/MCP-mSG_His-RFP_Var2(001)_embryo01_22.8C'
]

# PWM embryo list
# embryo_list = [
#     'test_data/NSPARC/2025-04-29/MCP-mSG_His-RFP_RBSPWM(003)_embryo01',
#     'test_data/NSPARC/2025-04-29/MCP-mSG_His-RFP_RBSPWM(003)_embryo02',
#     'test_data/NSPARC/2025-04-29/MCP-mSG_His-RFP_RBSPWM(003)_embryo03',
# ]

test_dataset_name = dataset_folder + embryo_list[0]

#test_dataset_name = '/mnt/Data1/Josh/transcription_pipeline/' + 'test_data/2025-03-18/MCP-mSG_His-RFP_RBSPWM(003)_embryo03'
print('Dataset Path: ' + test_dataset_name)

Dataset Path: /mnt/Data1/Josh/transcription_pipeline/test_data/NSPARC/2025-03-31/MCP-mSG_His-RFP_Var2(001)_embryo01


In [118]:
# Import pipeline
from transcription_pipeline import nuclear_pipeline
from transcription_pipeline import preprocessing_pipeline

from transcription_pipeline import spot_pipeline
from transcription_pipeline import fullEmbryo_pipeline

from transcription_pipeline.spot_analysis import compile_data
from transcription_pipeline.utils import plottable

import os
import matplotlib.pyplot as plt

In [119]:
# Specify how you would want the plots to be shown: Use TkAgg if you use PyCharm, or widget if you use a browser
import matplotlib as mpl
import numpy as np

mpl.use('TkAgg')
#%matplotlib widget

## Import Dataset

### Import MS2 Dataset

Detect whether the dataset has already been converted into `zarr` files, i.e. whether there's "previously" processed data. If so, load the previous results.

In [120]:
ms2_import_previous = os.path.isdir(test_dataset_name + '/collated_dataset')
ms2_import_previous

True

In [121]:
dataset = preprocessing_pipeline.DataImport(
    name_folder=test_dataset_name,
    trim_series=True,
    working_storage_mode='zarr',
    import_previous=ms2_import_previous, 
)

if not ms2_import_previous:
    dataset.save()

### Import FullEmbryo Dataset

In [122]:
fullembryo_import_previous = os.path.isdir(test_dataset_name + '/preprocessed_full_embryo')
fullembryo_import_previous

True

In [123]:
FullEmbryo_dataset = preprocessing_pipeline.FullEmbryoImport(
    name_folder=test_dataset_name,
    import_previous=fullembryo_import_previous
)

if not fullembryo_import_previous:
    FullEmbryo_dataset.save()

## Starting a DASK Client for parallel processing

In [124]:
from dask.distributed import LocalCluster, Client

try:
    cluster = LocalCluster(
        host="localhost",
        scheduler_port=37763,
        threads_per_worker=1,
        n_workers=14,
        memory_limit="6GB",
    )
    
    client = Client(cluster)
except:
    print("Cluster already running")
    client = Client('localhost:37763')

print(client)

Perhaps you already have a cluster running?
Hosting the HTTP server on port 46207 instead


Cluster already running
<Client: 'tcp://127.0.0.1:37763' processes=14 threads=14, memory=78.23 GiB>



+---------+----------+-----------+----------+
| Package | Client   | Scheduler | Workers  |
+---------+----------+-----------+----------+
| dask    | 2023.5.1 | 2025.4.0  | 2025.4.0 |
| msgpack | 1.0.8    | 1.0.7     | 1.0.7    |
| numpy   | 1.24.4   | 1.26.4    | 1.26.4   |
| toolz   | 0.12.1   | 0.12.0    | 0.12.0   |
| tornado | 6.4.1    | 6.3.3     | 6.3.3    |
+---------+----------+-----------+----------+
2025-05-05 17:28:30,184 - distributed.client - ERROR - 'adjust-heartbeat-interval'
Traceback (most recent call last):
  File "/mnt/Data1/Josh/miniforge3/envs/transcription_pipeline/lib/python3.10/site-packages/distributed/client.py", line 1573, in _handle_report
    handler = self._stream_handlers[op]
KeyError: 'adjust-heartbeat-interval'
2025-05-05 17:28:35,186 - distributed.client - ERROR - 'adjust-heartbeat-interval'
Traceback (most recent call last):
  File "/mnt/Data1/Josh/miniforge3/envs/transcription_pipeline/lib/python3.10/site-packages/distributed/client.py", line 1573,

In [125]:
# client.restart()

In [126]:
# client.shutdown()

In [127]:
print(client.dashboard_link)

http://localhost:36103/status


## Nuclear Tracking

Detect whether the nuclear tracking has been done "previously." If so, load the previous results.

In [128]:
nuclear_tracking_previous = os.path.isdir(test_dataset_name + '/nuclear_analysis_results')
nuclear_tracking_previous

True

In [129]:
if nuclear_tracking_previous:
    # Load nuclear tracking results
    print('Load from previous nuclear tracking results')
    
    nuclear_tracking = nuclear_pipeline.Nuclear()
    nuclear_tracking.read_results(name_folder=test_dataset_name)
    
else:
    # Do nuclear tracking and save the results
    print('Do nuclear tracking for the dataset')
    
    nuclear_tracking = nuclear_pipeline.Nuclear(
        data=dataset.channels_full_dataset[0],
        global_metadata=dataset.export_global_metadata[0],
        frame_metadata=dataset.export_frame_metadata[0],
        series_splits=dataset.series_splits,
        series_shifts=dataset.series_shifts,
        search_range_um=1.5,
        stitch=False,
        stitch_max_distance=4,
        stitch_max_frame_distance=2,
        client=client,
        keep_futures=False,
    )
    
    nuclear_tracking.track_nuclei(
            working_memory_mode="zarr",
            working_memory_folder=test_dataset_name,
            trackpy_log_path="".join([test_dataset_name, "trackpy_log"]),
        )
        # Saves tracked nuclear mask as a zarr, and pickles dataframes with segmentation and
        # tracking information.
    nuclear_tracking.save_results(
            name_folder=test_dataset_name, save_array_as=None
        )

Load from previous nuclear tracking results


## Spot Tracking

Detect whether the spot tracking has been done "previously." If so, load the previous results.

In [130]:
spot_tracking_previous = os.path.isdir(test_dataset_name + '/spot_analysis_results')
spot_tracking_previous

True

In [131]:
%%time

if spot_tracking_previous:
    # Load spot tracking results
    print('Load from spot tracking results')
    
    spot_tracking = spot_pipeline.Spot()
    spot_tracking.read_results(name_folder=test_dataset_name)
    
else:
    # Do spot tracking and save the results
    print('Do spot tracking for the dataset')
    
    spot_tracking = spot_pipeline.Spot(
        data=dataset.channels_full_dataset[1],
        global_metadata=dataset.export_global_metadata[1],
        frame_metadata=dataset.export_frame_metadata[1],
        labels=None,#nuclear_tracking.reordered_labels,
        expand_distance=3,
        search_range_um=4.2,
        retrack_search_range_um=4.5,
        threshold_factor=1.3,
        memory=3,
        retrack_after_filter=False,
        stitch=True,
        min_track_length=0,
        series_splits=dataset.series_splits,
        series_shifts=dataset.series_shifts,
        keep_bandpass=False,
        keep_futures=False,
        keep_spot_labels=False,
        evaluate=True,
        retrack_by_intensity=True,
        client=client,
    )
    
    spot_tracking.extract_spot_traces(
        working_memory_folder=test_dataset_name, 
        stitch=True,
        retrack_after_filter=True,
        trackpy_log_path = test_dataset_name+'/trackpy_log'
    )
    
    # Saves tracked spot mask as a zarr, and pickles dataframes with spot fitting and
    # quantification information.
    spot_tracking.save_results(name_folder=test_dataset_name, save_array_as=None)

Load from spot tracking results
CPU times: user 427 ms, sys: 32.8 ms, total: 460 ms
Wall time: 782 ms


### Make Compiled Dataframe

In [132]:
# Load spot tracking dataframe
spot_df = spot_tracking.spot_dataframe

# Remove spots that were not detected
detected_spots = spot_df[spot_df["particle"] != 0]

# Compile traces
compiled_dataframe = compile_data.compile_traces(
    detected_spots,
    compile_columns_spot=[
        "frame",
        "t_s",
        "intensity_from_neighborhood",
        "intensity_std_error_from_neighborhood",
        "x",
        "y"
    ],
    nuclear_tracking_dataframe=None,
)

compiled_dataframe.head()

Unnamed: 0,particle,frame,t_s,intensity_from_neighborhood,intensity_std_error_from_neighborhood,x,y
0,1,[242],[507.2349823000431],[70.90065841584158],[27.458556279819902],[150.09488025015824],[11.94217819569382]
1,2,[275],[574.167991300106],[4.406658291457286],[34.113741671182694],[346.62643190514643],[10.219247688087197]
2,3,"[317, 393]","[658.9601913000345, 812.5342713000774]","[8.576612244897959, 103.77140776699031]","[35.798972133482096, 31.398385925085883]","[382.15574038539876, 399.0733026873408]","[8.072623765543039, 11.954605195396592]"
3,4,[70],[159.62662730002404],[50.825],[37.11608145075052],[456.01391779864167],[6.426530039523591]
4,5,"[71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 8...","[161.12761130011083, 165.17358230006695, 167.1...","[46.77967525773196, 78.12756190476189, 55.7352...","[34.83654078079327, 28.837275934709975, 31.350...","[694.7749615335287, 692.9882547256797, 693.058...","[17.517689444184782, 17.461846448495784, 17.77..."


## Full Embryo Analysis

In [133]:
plt.figure(figsize=(12,6))

plt.subplot(1, 2, 1)
plt.imshow(FullEmbryo_dataset.channels_full_dataset_surf[1][0, :, :], cmap='gray')
plt.title('Full Embryo Surf')

plt.subplot(1, 2, 2)
plt.imshow(FullEmbryo_dataset.channels_full_dataset_mid[1][0, :, :], cmap='gray')
plt.title('Full Embryo Mid')

plt.tight_layout()
plt.show()

can't invoke "event" command: application has been destroyed
    while executing
"event generate $w <<ThemeChanged>>"
    (procedure "ttk::ThemeChanged" line 6)
    invoked from within
"ttk::ThemeChanged"


In [134]:
fullEmbryo = fullEmbryo_pipeline.FullEmbryo(FullEmbryo_dataset, dataset, his_channel=1)

In [135]:
fullEmbryo.find_ap_axis(make_plots=True)

invalid command name "131084109401728process_stream_events"
    while executing
"131084109401728process_stream_events"
    ("after" script)
can't invoke "event" command: application has been destroyed
    while executing
"event generate $w <<ThemeChanged>>"
    (procedure "ttk::ThemeChanged" line 6)
    invoked from within
"ttk::ThemeChanged"


In [136]:
compiled_dataframe = fullEmbryo.xy_to_ap(compiled_dataframe)
compiled_dataframe.head()

Unnamed: 0,particle,frame,t_s,intensity_from_neighborhood,intensity_std_error_from_neighborhood,x,y,ap,ap90
0,1,[242],[507.2349823000431],[70.90065841584158],[27.458556279819902],[150.09488025015824],[11.94217819569382],[1.3229430781863758],[-0.8864577073662293]
1,2,[275],[574.167991300106],[4.406658291457286],[34.113741671182694],[346.62643190514643],[10.219247688087197],[1.4246816150359325],[-0.9347434935712711]
2,3,"[317, 393]","[658.9601913000345, 812.5342713000774]","[8.576612244897959, 103.77140776699031]","[35.798972133482096, 31.398385925085883]","[382.15574038539876, 399.0733026873408]","[8.072623765543039, 11.954605195396592]","[1.4428646765358193, 1.452082281899122]","[-0.9454435853260534, -0.9452716228662169]"
3,4,[70],[159.62662730002404],[50.825],[37.11608145075052],[456.01391779864167],[6.426530039523591],[1.4809849094308016],[-0.9646622558838587]
4,5,"[71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 8...","[161.12761130011083, 165.17358230006695, 167.1...","[46.77967525773196, 78.12756190476189, 55.7352...","[34.83654078079327, 28.837275934709975, 31.350...","[694.7749615335287, 692.9882547256797, 693.058...","[17.517689444184782, 17.461846448495784, 17.77...","[1.6060888260782038, 1.6051557416945763, 1.605...","[-1.0091637813895908, -1.0088016020546218, -1...."


## RateExtraction Analysis

### Fit and Average

In [137]:
from transcription_pipeline.RateExtraction import FitAndAverage

In [138]:
faadata = FitAndAverage(compiled_dataframe, nc14_start_frame, min_frames, num_bins, test_dataset_name)

Load previous particle trace fit checking results from "particle_fits_checked.pkl"


In [139]:
compiled_dataframe

Unnamed: 0,particle,frame,t_s,intensity_from_neighborhood,intensity_std_error_from_neighborhood,x,y,ap,ap90
0,1,[242],[507.2349823000431],[70.90065841584158],[27.458556279819902],[150.09488025015824],[11.94217819569382],[1.3229430781863758],[-0.8864577073662293]
1,2,[275],[574.167991300106],[4.406658291457286],[34.113741671182694],[346.62643190514643],[10.219247688087197],[1.4246816150359325],[-0.9347434935712711]
2,3,"[317, 393]","[658.9601913000345, 812.5342713000774]","[8.576612244897959, 103.77140776699031]","[35.798972133482096, 31.398385925085883]","[382.15574038539876, 399.0733026873408]","[8.072623765543039, 11.954605195396592]","[1.4428646765358193, 1.452082281899122]","[-0.9454435853260534, -0.9452716228662169]"
3,4,[70],[159.62662730002404],[50.825],[37.11608145075052],[456.01391779864167],[6.426530039523591],[1.4809849094308016],[-0.9646622558838587]
4,5,"[71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 8...","[161.12761130011083, 165.17358230006695, 167.1...","[46.77967525773196, 78.12756190476189, 55.7352...","[34.83654078079327, 28.837275934709975, 31.350...","[694.7749615335287, 692.9882547256797, 693.058...","[17.517689444184782, 17.461846448495784, 17.77...","[1.6060888260782038, 1.6051557416945763, 1.605...","[-1.0091637813895908, -1.0088016020546218, -1...."
...,...,...,...,...,...,...,...,...,...
457,970,[65],[149.33749430012702],[160.55646875],[27.371145959910876],[1000.0533213626323],[168.5216238661509],[1.7816584196243799],[-0.9191188081808113]
458,975,"[57, 58, 59, 60, 61, 62]","[132.9795963001251, 135.00251730012894, 137.19...","[28.708403846153843, 83.97516042780748, 76.620...","[27.712175177726326, 26.847480726097665, 25.36...","[1000.8930492287077, 1001.6971543326819, 1000....","[168.0501033243174, 169.58116239985725, 169.88...","[1.7820401577291263, 1.7826319287689107, 1.782...","[-0.9198236158504431, -0.918369285975899, -0.9..."
459,983,[57],[132.80557430005072],[124.62180288461538],[26.853760354775826],[969.6851191216887],[141.66620036584422],[1.7628429201499898],[-0.9407856333394138]
460,1000,[5],[27.95743730008602],[57.01442288557214],[27.789361405607664],[1011.1425851104433],[168.08586832973262],[1.787360371626505],[-0.9222069094800084]


In [140]:
faadata.compiled_dataframe_fits

Unnamed: 0,index,particle,frame,t_s,intensity_from_neighborhood,intensity_std_error_from_neighborhood,x,y,ap,ap90,fit_results,modified_fit_results,tv_denoised_trace,approval_status
0,0,475,"[137, 138, 139, 141, 143, 144, 145, 146, 147, ...","[294.4716593000889, 296.6687213001251, 298.691...","[69.07539999999999, 78.28566666666666, 88.6253...","[28.72770709332717, 27.290419953203767, 27.034...","[31.19923032741879, 30.331994417943854, 30.933...","[16.490438292957748, 15.768851401633807, 16.72...","[0.5192055803706052, 0.5194569449457892, 0.519...","[0.19807115946769502, 0.1987209582896305, 0.19...","[[294.4716593000889, 296.6687213001251, 298.69...",,"[170.60238572722847, 171.1209788046907, 172.25...",1
1,1,28,"[138, 143, 146, 147, 148, 150, 152, 153, 154, ...","[296.1467853000164, 306.26173430001734, 312.50...","[96.64624154589372, 51.5746, 98.04436585365853...","[27.424096427339755, 27.36933676288119, 27.839...","[87.82511449221663, 88.72772047740536, 86.7548...","[60.148016634620916, 61.95294149906148, 59.691...","[0.5026697456663506, 0.5024456347320423, 0.502...","[0.1583892089735454, 0.156875910444479, 0.1588...","[[296.1467853000164, 306.26173430001734, 312.5...",,"[81.7567820131067, 81.80030008561718, 82.17989...",-1
2,2,8,"[164, 166, 167, 169, 170, 171, 172, 173, 174, ...","[348.7447383000851, 352.7908193000555, 354.813...","[147.39123557692307, 135.8680684210526, 232.19...","[31.067960979128955, 32.111491728217565, 31.79...","[212.06515729579513, 212.61103919735123, 212.4...","[26.21108096142722, 25.341293853597556, 25.496...","[0.46177279899111806, 0.46156746055217923, 0.4...","[0.17434971988889514, 0.17499219661098794, 0.1...","[[348.7447383000851, 352.7908193000555, 354.81...",,"[192.74235473455062, 193.03509658739472, 193.7...",-1
3,3,300,"[184, 189, 191, 192, 194, 195, 196, 197, 198, ...","[389.726818300128, 399.66785930001737, 403.539...","[54.736969696969695, 60.58798387096774, 52.001...","[31.153920354159656, 35.163501210363385, 33.86...","[238.5248759924355, 238.1593576702546, 238.114...","[232.56662144809536, 232.2923384505139, 232.74...","[0.4606636715179005, 0.46077067762079843, 0.46...","[0.008124374860581781, 0.00837454412848265, 0....","[[389.726818300128, 399.66785930001737, 403.53...",,"[76.33251948661947, 76.40812890858439, 76.5656...",-1
4,4,122,"[103, 107, 109, 110, 111, 112, 114, 115, 116, ...","[225.8635953000784, 233.7816293001175, 237.827...","[34.048178217821786, 98.96533333333335, 148.89...","[31.513865209235135, 31.121977731929892, 29.00...","[251.95027895277858, 250.91422121918504, 249.9...","[250.19328898687343, 249.41455520391898, 249.9...","[0.4570021270773454, 0.45730538911139285, 0.45...","[-0.0070622188183835895, -0.006352098912553642...","[[225.8635953000784, 233.7816293001175, 237.82...",,"[109.7099452061057, 110.0170140738951, 110.637...",1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
102,102,682,"[55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 6...","[128.7594783000946, 130.608505300045, 132.6315...","[37.87826086956522, 46.861730569948186, 28.913...","[27.187626483686017, 28.811606698328752, 28.66...","[962.8689426863385, 962.9453761684821, 963.168...","[198.06152001839513, 198.3473332413109, 198.75...","[0.22803965781145616, 0.22802541193016496, 0.2...","[-0.028555901917200005, -0.028789652586352278,...","[None, None, None, None, None]","[[128.7594783000946, 130.608505300045, 132.631...","[75.0007435579677, 77.50014491199872, 82.40145...",2
103,103,397,"[269, 271, 272, 273, 274, 275, 276, 278, 279, ...","[561.5080383000374, 565.5540873000622, 567.576...","[1206.909648780488, 1166.762873015873, 1201.86...","[54.00725576774427, 55.640914332071674, 58.039...","[958.9567519393917, 959.2206129399825, 957.718...","[195.79718066227022, 195.32737916665087, 194.2...","[0.22920884952289208, 0.22910783867686074, 0.2...","[-0.026411481374623343, -0.026061716344049336,...","[None, None, None, None, None]",,"[1168.7749544314972, 1167.7206992801566, 1165....",-1
104,104,778,"[62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 7...","[143.09458230006695, 144.94360830008983, 146.9...","[34.3458679245283, 109.32432160804021, 69.5245...","[29.843062532140678, 30.89349864434946, 29.101...","[973.8599507474794, 973.4794070245891, 971.167...","[228.966975979639, 228.3399189608266, 228.7991...","[0.22562834867422937, 0.22572760053800842, 0.2...","[-0.054072932291576524, -0.05354126596761547, ...","[[143.09458230006695, 144.94360830008983, 146....",,"[92.07516984138431, 93.96713148918708, 97.5907...",1
105,105,738,"[79, 80, 81, 82, 85, 88, 89, 90, 91, 95, 102, ...","[176.96357430005074, 178.9866213001013, 181.00...","[175.02309574468086, 209.1499411764706, 233.81...","[29.50885235253587, 33.30846613100039, 31.7699...","[983.089789181343, 982.6778938489308, 981.8271...","[66.39350106971867, 66.11601528555268, 67.3499...","[0.2168943074481172, 0.21701601498868814, 0.21...","[0.07422386160794861, 0.074480677516973, 0.073...","[[176.96357430005074, 178.9866213001013, 181.0...",,"[318.80637710862993, 321.4769972564965, 327.42...",1


In [141]:
faadata.check_particle_fits()

  warn('No particle has been left unchecked')
invalid command name "131084097315200process_stream_events"
    while executing
"131084097315200process_stream_events"
    ("after" script)
can't invoke "event" command: application has been destroyed
    while executing
"event generate $w <<ThemeChanged>>"
    (procedure "ttk::ThemeChanged" line 6)
    invoked from within
"ttk::ThemeChanged"


In [142]:
faadata.save_checked_particle_fits()

No changes made to the particle fit checking results


In [143]:
ap_positions, mean_fit_rates, SE_fit_rates, bin_counts, bin_particles_rates = faadata.average_particle_fits();

invalid command name "131084136155776process_stream_events"
    while executing
"131084136155776process_stream_events"
    ("after" script)
can't invoke "event" command: application has been destroyed
    while executing
"event generate $w <<ThemeChanged>>"
    (procedure "ttk::ThemeChanged" line 6)
    invoked from within
"ttk::ThemeChanged"


In [144]:
bin_particles_rates

array([0, 0, 0, 0, 0, 0, 0, 0, 0,
       {'bin': 10, 'bin_ap_position': 0.21428571428571427, 'bin_particle_counts': 7.0, 'particles': array([624, 519, 162, 789, 682, 778, 738]), 'rates': array([330.54685105, 364.35903028, 486.53103405, 544.14154759,
              357.17971548, 389.19931858, 394.37902184]), 'mean_rate': 409.4766455522843, 'SE_rate': 26.996448527811836}                                                                                               ,
       {'bin': 11, 'bin_ap_position': 0.23809523809523808, 'bin_particle_counts': 9.0, 'particles': array([117, 189,  96,  31, 412, 376, 329, 375, 467]), 'rates': array([443.47862877, 436.7446199 , 402.54766162, 378.9573963 ,
              399.05475907, 218.20142844, 495.00954822, 188.06274663,
              458.99945631]), 'mean_rate': 380.11736058354813, 'SE_rate': 33.44587103242748}                                                                                                                                     ,
       {'b