In [10]:
# Import pipeline
from transcription_pipeline import nuclear_pipeline
from transcription_pipeline import preprocessing_pipeline

from transcription_pipeline import spot_pipeline
from transcription_pipeline import fullEmbryo_pipeline

from transcription_pipeline.spot_analysis import compile_data
from transcription_pipeline.utils import plottable

import os
import matplotlib.pyplot as plt
import matplotlib as mpl

# Importing the dataset

In [11]:
dataset_folder = '/mnt/Data1/Josh/transcription_pipeline/'

# Var2 embryo list
embryo_list = [
    'test_data/NSPARC/2025-03-31/MCP-mSG_His-RFP_Var2(001)_embryo01', # 25C embryos
    'test_data/NSPARC/2025-03-31/MCP-mSG_His-RFP_Var2(001)_embryo02',
    'test_data/NSPARC/2025-04-01/MCP-mSG_His-RFP_Var2(001)_embryo20',
    'test_data/NSPARC/2025-04-01/MCP-mSG_His-RFP_Var2(001)_embryo38',
    'test_data/NSPARC/2025-04-14/MCP-mSG_His-RFP_Var2(001)_embryo28',
    'test_data/NSPARC/2025-04-15/MCP-mSG_His-RFP_Var2(001)_embryo01',
    'test_data/NSPARC/2025-04-22/MCP-mSG_His-RFP_Var2(001)_embryo01_22.8C'
]

# PWM embryo list
# embryo_list = [
#     'test_data/NSPARC/2025-04-29/MCP-mSG_His-RFP_RBSPWM(003)_embryo01', # 22C embryos
#     'test_data/NSPARC/2025-04-29/MCP-mSG_His-RFP_RBSPWM(003)_embryo02',
#     'test_data/NSPARC/2025-04-29/MCP-mSG_His-RFP_RBSPWM(003)_embryo03',
# ]


test_dataset_name = dataset_folder + embryo_list[0]
print('Dataset Path: ' + test_dataset_name)

Dataset Path: /mnt/Data1/Josh/transcription_pipeline/test_data/NSPARC/2025-03-31/MCP-mSG_His-RFP_Var2(001)_embryo01


In [12]:
def import_dataset(test_dataset_name):
    import_previous_ms2 = os.path.isdir(test_dataset_name + '/collated_dataset')
    if import_previous_ms2:
        print('Reading previous imported dataset')
    else:
        print('No previous dataset import found; importing from scratch')

    dataset = preprocessing_pipeline.DataImport(
        name_folder=test_dataset_name,
        trim_series=True,
        working_storage_mode='zarr',
        import_previous=import_previous_ms2,
    )
    if not import_previous_ms2:
        dataset.save()
    return dataset

def import_fullEmbryo_dataset(test_dataset_name):
    import_previous_fullEmbryo = os.path.isdir(test_dataset_name + '/preprocessed_full_embryo')

    if import_previous_fullEmbryo:
        print('Reading previous imported FullEmbryo dataset')
    else:
        print('No previous FullEmbryo dataset import found; importing from scratch')

    FullEmbryo_dataset = preprocessing_pipeline.FullEmbryoImport(
        name_folder=test_dataset_name,
        import_previous=import_previous_fullEmbryo,
    )
    if not import_previous_fullEmbryo:
        FullEmbryo_dataset.save()
    return FullEmbryo_dataset

def track_import_nuclei(test_dataset_name, dataset, nuclear_channel, spot_channel, retrack=False):
    import_previous_nuclear = os.path.isdir(test_dataset_name + '/nuclear_analysis_results')

    if import_previous_nuclear and not retrack:
        print(f'Reading previous nuclear tracking results (retrack={retrack})')
        nuclear_tracking = nuclear_pipeline.Nuclear()

        # Load nuclear tracking results
        nuclear_tracking.read_results(name_folder=test_dataset_name)

    elif import_previous_nuclear and retrack:
        print(f'Previous nuclear tracking detected. Retracking nuclei (retrack={retrack})')
        nuclear_tracking = nuclear_pipeline.Nuclear(
            data=dataset.channels_full_dataset[nuclear_channel],
            global_metadata=dataset.export_global_metadata[nuclear_channel],
            frame_metadata=dataset.export_frame_metadata[nuclear_channel],
            series_splits=dataset.series_splits,
            series_shifts=dataset.series_shifts,
            search_range_um=1.5,
            stitch=False,
            stitch_max_distance=4,
            stitch_max_frame_distance=2,
            client=client,
            keep_futures=False,
        )
        nuclear_tracking.track_nuclei(
                working_memory_mode="zarr",
                working_memory_folder=test_dataset_name,
                trackpy_log_path="".join([test_dataset_name, "trackpy_log"]),
            )
        # Saves tracked nuclear mask as a zarr, and pickles dataframes with segmentation and
        # tracking information.
        nuclear_tracking.save_results(
                name_folder=test_dataset_name, save_array_as=None
            )
        # # Load nuclear tracking results
        # nuclear_tracking.read_results(name_folder=test_dataset_name)
    else:
        print(f'No previous nuclear tracking results found; importing from scratch (retrack={retrack})')
        nuclear_tracking = nuclear_pipeline.Nuclear(
            data=dataset.channels_full_dataset[nuclear_channel],
            global_metadata=dataset.export_global_metadata[nuclear_channel],
            frame_metadata=dataset.export_frame_metadata[nuclear_channel],
            series_splits=dataset.series_splits,
            series_shifts=dataset.series_shifts,
            search_range_um=1.5,
            stitch=False,
            stitch_max_distance=4,
            stitch_max_frame_distance=2,
            client=client,
            keep_futures=False,
        )
        nuclear_tracking.track_nuclei(
                working_memory_mode="zarr",
                working_memory_folder=test_dataset_name,
                trackpy_log_path="".join([test_dataset_name, "trackpy_log"]),
            )
        # Saves tracked nuclear mask as a zarr, and pickles dataframes with segmentation and
        # tracking information.
        nuclear_tracking.save_results(
                name_folder=test_dataset_name, save_array_as=None
            )
        # # Load nuclear tracking results
        # nuclear_tracking.read_results(name_folder=test_dataset_name)
    return nuclear_tracking

def track_import_spots(test_dataset_name, dataset, nuclear_channel, spot_channel, retrack=False, use_nuclear_tracking=True):
    import_previous_spot = os.path.isdir(test_dataset_name + '/spot_analysis_results')
    if use_nuclear_tracking:
        nuclear_tracking = track_import_nuclei(test_dataset_name, dataset,
                                               nuclear_channel=nuclear_channel,
                                               spot_channel=spot_channel,
                                               retrack=False
                                               )
        Labels = nuclear_tracking.reordered_labels
    else:
        Labels = None

    if import_previous_spot and not retrack:
        print(f'Load from spot tracking results (retrack={retrack})')
        spot_tracking = spot_pipeline.Spot()
        # Load spot tracking results
        spot_tracking.read_results(name_folder=test_dataset_name)

    elif import_previous_spot and retrack:
        print(f'Previous spot tracking detected. Retracking spots (retrack={retrack})')
        spot_tracking = spot_pipeline.Spot(
            data=dataset.channels_full_dataset[spot_channel],
            global_metadata=dataset.export_global_metadata[spot_channel],
            frame_metadata=dataset.export_frame_metadata[spot_channel],
            labels=Labels,
            expand_distance=3,
            search_range_um=2,
            retrack_search_range_um=4.5,
            threshold_factor=1.3,
            memory=3,
            retrack_after_filter=False,
            stitch=True,
            min_track_length=0,
            series_splits=dataset.series_splits,
            series_shifts=dataset.series_shifts,
            keep_bandpass=False,
            keep_futures=False,
            keep_spot_labels=False,
            evaluate=True,
            retrack_by_intensity=True,
            client=client,
        )

        spot_tracking.extract_spot_traces(
                working_memory_folder=test_dataset_name,
                stitch=False,
                retrack_after_filter=False,
                trackpy_log_path = test_dataset_name+'/trackpy_log'
            )

        # Saves tracked spot mask as a zarr, and pickles dataframes with spot fitting and
        # quantification information.
        spot_tracking.save_results(name_folder=test_dataset_name, save_array_as=None)

    else:
        print(f'No previous spot tracking results found; importing from scratch (retrack={retrack})')
        spot_tracking = spot_pipeline.Spot(
            data=dataset.channels_full_dataset[spot_channel],
            global_metadata=dataset.export_global_metadata[spot_channel],
            frame_metadata=dataset.export_frame_metadata[spot_channel],
            labels=Labels,
            expand_distance=3,
            search_range_um=4.2,
            retrack_search_range_um=4.5,
            threshold_factor=1.55,
            memory=3,
            retrack_after_filter=False,
            stitch=True,
            min_track_length=0,
            series_splits=dataset.series_splits,
            series_shifts=dataset.series_shifts,
            keep_bandpass=False,
            keep_futures=False,
            keep_spot_labels=False,
            evaluate=True,
            retrack_by_intensity=True,
            client=client,
        )

        spot_tracking.extract_spot_traces(
                working_memory_folder=test_dataset_name,
                stitch=True,
                retrack_after_filter=True,
                trackpy_log_path = test_dataset_name+'/trackpy_log',
                verbose=True,
            )

        # Saves tracked spot mask as a zarr, and pickles dataframes with spot fitting and
        # quantification information.
        spot_tracking.save_results(name_folder=test_dataset_name, save_array_as=None)

    return spot_tracking


# Spot and Nuclear Tracking

In [13]:
from dask.distributed import LocalCluster, Client

cluster = LocalCluster(
    host="localhost",
    #scheduler_port=37763,
    threads_per_worker=1,
    n_workers=14,
    memory_limit="6GB",
)

client = Client(cluster)

print(client)

Perhaps you already have a cluster running?
Hosting the HTTP server on port 42267 instead


<Client: 'tcp://127.0.0.1:33603' processes=14 threads=14, memory=78.23 GiB>




In [14]:
print(client.dashboard_link)

http://127.0.0.1:42267/status


In [15]:
client.restart()

0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: http://127.0.0.1:42267/status,

0,1
Dashboard: http://127.0.0.1:42267/status,Workers: 14
Total threads: 14,Total memory: 78.23 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:33603,Workers: 14
Dashboard: http://127.0.0.1:42267/status,Total threads: 14
Started: Just now,Total memory: 78.23 GiB

0,1
Comm: tcp://127.0.0.1:35377,Total threads: 1
Dashboard: http://127.0.0.1:35113/status,Memory: 5.59 GiB
Nanny: tcp://127.0.0.1:43441,
Local directory: /tmp/dask-scratch-space-1003/worker-li26mxl1,Local directory: /tmp/dask-scratch-space-1003/worker-li26mxl1

0,1
Comm: tcp://127.0.0.1:35085,Total threads: 1
Dashboard: http://127.0.0.1:37897/status,Memory: 5.59 GiB
Nanny: tcp://127.0.0.1:45263,
Local directory: /tmp/dask-scratch-space-1003/worker-_s_9r9kk,Local directory: /tmp/dask-scratch-space-1003/worker-_s_9r9kk

0,1
Comm: tcp://127.0.0.1:45907,Total threads: 1
Dashboard: http://127.0.0.1:38323/status,Memory: 5.59 GiB
Nanny: tcp://127.0.0.1:43013,
Local directory: /tmp/dask-scratch-space-1003/worker-slkl0upt,Local directory: /tmp/dask-scratch-space-1003/worker-slkl0upt

0,1
Comm: tcp://127.0.0.1:43455,Total threads: 1
Dashboard: http://127.0.0.1:33195/status,Memory: 5.59 GiB
Nanny: tcp://127.0.0.1:43295,
Local directory: /tmp/dask-scratch-space-1003/worker-x4gtjpa9,Local directory: /tmp/dask-scratch-space-1003/worker-x4gtjpa9

0,1
Comm: tcp://127.0.0.1:42283,Total threads: 1
Dashboard: http://127.0.0.1:44935/status,Memory: 5.59 GiB
Nanny: tcp://127.0.0.1:45977,
Local directory: /tmp/dask-scratch-space-1003/worker-gxjxiy4q,Local directory: /tmp/dask-scratch-space-1003/worker-gxjxiy4q

0,1
Comm: tcp://127.0.0.1:41005,Total threads: 1
Dashboard: http://127.0.0.1:44693/status,Memory: 5.59 GiB
Nanny: tcp://127.0.0.1:38109,
Local directory: /tmp/dask-scratch-space-1003/worker-x51euw8r,Local directory: /tmp/dask-scratch-space-1003/worker-x51euw8r

0,1
Comm: tcp://127.0.0.1:45233,Total threads: 1
Dashboard: http://127.0.0.1:35901/status,Memory: 5.59 GiB
Nanny: tcp://127.0.0.1:41741,
Local directory: /tmp/dask-scratch-space-1003/worker-qs4tut8f,Local directory: /tmp/dask-scratch-space-1003/worker-qs4tut8f

0,1
Comm: tcp://127.0.0.1:36165,Total threads: 1
Dashboard: http://127.0.0.1:39541/status,Memory: 5.59 GiB
Nanny: tcp://127.0.0.1:37717,
Local directory: /tmp/dask-scratch-space-1003/worker-1ot9n5yp,Local directory: /tmp/dask-scratch-space-1003/worker-1ot9n5yp

0,1
Comm: tcp://127.0.0.1:35401,Total threads: 1
Dashboard: http://127.0.0.1:34241/status,Memory: 5.59 GiB
Nanny: tcp://127.0.0.1:40809,
Local directory: /tmp/dask-scratch-space-1003/worker-zxklq811,Local directory: /tmp/dask-scratch-space-1003/worker-zxklq811

0,1
Comm: tcp://127.0.0.1:42651,Total threads: 1
Dashboard: http://127.0.0.1:44175/status,Memory: 5.59 GiB
Nanny: tcp://127.0.0.1:45223,
Local directory: /tmp/dask-scratch-space-1003/worker-be276vl0,Local directory: /tmp/dask-scratch-space-1003/worker-be276vl0

0,1
Comm: tcp://127.0.0.1:42749,Total threads: 1
Dashboard: http://127.0.0.1:41947/status,Memory: 5.59 GiB
Nanny: tcp://127.0.0.1:46723,
Local directory: /tmp/dask-scratch-space-1003/worker-6_sl6v54,Local directory: /tmp/dask-scratch-space-1003/worker-6_sl6v54

0,1
Comm: tcp://127.0.0.1:41253,Total threads: 1
Dashboard: http://127.0.0.1:46361/status,Memory: 5.59 GiB
Nanny: tcp://127.0.0.1:38225,
Local directory: /tmp/dask-scratch-space-1003/worker-tqwnjw6w,Local directory: /tmp/dask-scratch-space-1003/worker-tqwnjw6w

0,1
Comm: tcp://127.0.0.1:36903,Total threads: 1
Dashboard: http://127.0.0.1:33341/status,Memory: 5.59 GiB
Nanny: tcp://127.0.0.1:44925,
Local directory: /tmp/dask-scratch-space-1003/worker-370xvmco,Local directory: /tmp/dask-scratch-space-1003/worker-370xvmco

0,1
Comm: tcp://127.0.0.1:44937,Total threads: 1
Dashboard: http://127.0.0.1:44033/status,Memory: 5.59 GiB
Nanny: tcp://127.0.0.1:34677,
Local directory: /tmp/dask-scratch-space-1003/worker-4wv2kffy,Local directory: /tmp/dask-scratch-space-1003/worker-4wv2kffy


In [16]:
nuclear_channel = 1
spot_channel = 0

In [17]:
import gc
for i in range(len(embryo_list)):
    # Define the dataset path
    test_dataset_name = dataset_folder + embryo_list[i]
    print('Dataset Path: ' + test_dataset_name)

    gc.disable()
    # Load the dataset
    dataset = import_dataset(test_dataset_name)

    # Load the full embryo dataset
    FullEmbryo_dataset = import_fullEmbryo_dataset(test_dataset_name)
    gc.enable()
    # Track and import nuclei
    nuclear_tracking = track_import_nuclei(test_dataset_name, dataset,
                                           nuclear_channel=nuclear_channel,
                                           spot_channel=spot_channel,
                                           retrack=False
                                           )

    # Track and import spots
    spot_tracking = track_import_spots(test_dataset_name, dataset,
                                       nuclear_channel=nuclear_channel,
                                       spot_channel=spot_channel,
                                       retrack=False,
                                       use_nuclear_tracking=True
                                       )


Dataset Path: /mnt/Data1/Josh/transcription_pipeline/test_data/NSPARC/2025-03-31/MCP-mSG_His-RFP_Var2(001)_embryo01
Reading previous imported dataset
Reading previous imported FullEmbryo dataset
Reading previous nuclear tracking results (retrack=False)
Reading previous nuclear tracking results (retrack=False)
Load from spot tracking results (retrack=False)
Dataset Path: /mnt/Data1/Josh/transcription_pipeline/test_data/NSPARC/2025-03-31/MCP-mSG_His-RFP_Var2(001)_embryo02
Reading previous imported dataset
Reading previous imported FullEmbryo dataset
Reading previous nuclear tracking results (retrack=False)
Reading previous nuclear tracking results (retrack=False)
Load from spot tracking results (retrack=False)
Dataset Path: /mnt/Data1/Josh/transcription_pipeline/test_data/NSPARC/2025-04-01/MCP-mSG_His-RFP_Var2(001)_embryo20
Reading previous imported dataset
Reading previous imported FullEmbryo dataset
Reading previous nuclear tracking results (retrack=False)
Reading previous nuclear trac

In [18]:
# Switch to inline plotting
# %matplotlib inline
# Switch to interactive plotting
mpl.use('TkAgg')
for i in range(len(embryo_list)):
    # Define the dataset path
    test_dataset_name = dataset_folder + embryo_list[i]
    print('Dataset Path: ' + test_dataset_name)
    # Load the dataset
    dataset = import_dataset(test_dataset_name)

    # Load the full embryo dataset
    FullEmbryo_dataset = import_fullEmbryo_dataset(test_dataset_name)

    # Load spot tracking
    spot_tracking = spot_pipeline.Spot()
    spot_tracking.read_results(name_folder=test_dataset_name)
    # Load spot tracking dataframe
    spot_df = spot_tracking.spot_dataframe

    # Remove spots that were not detected
    detected_spots = spot_df[spot_df["particle"] != 0]

    # Compile traces
    compiled_dataframe = compile_data.compile_traces(
        detected_spots,
        compile_columns_spot=[
            "frame",
            "t_s",
            "intensity_from_neighborhood",
            "intensity_std_error_from_neighborhood",
            "x",
            "y"
        ],
        nuclear_tracking_dataframe=None,
    )
    # Load the full embryo dataset
    fullEmbryo = fullEmbryo_pipeline.FullEmbryo(test_dataset_name, FullEmbryo_dataset, dataset, his_channel=nuclear_channel)
    fullEmbryo.find_ap_axis(make_plots=True, ap_method='minf90', sigma=10, radius=5,
                            load_previous=True, save_results=True)

    # Create compiled dataframe
    compiled_dataframe = fullEmbryo.xy_to_ap(compiled_dataframe)
    # Save compiled dataframe
    print('Save compiled dataframe')
    compiled_dataframe.to_pickle(test_dataset_name + '/compiled_dataframe.pkl')


Dataset Path: /mnt/Data1/Josh/transcription_pipeline/test_data/NSPARC/2025-03-31/MCP-mSG_His-RFP_Var2(001)_embryo01
Reading previous imported dataset
Reading previous imported FullEmbryo dataset
Previous AP points loaded.
AP angle:  192.40741852740075


invalid command name "125144824953600process_stream_events"
    while executing
"125144824953600process_stream_events"
    ("after" script)
can't invoke "event" command: application has been destroyed
    while executing
"event generate $w <<ThemeChanged>>"
    (procedure "ttk::ThemeChanged" line 6)
    invoked from within
"ttk::ThemeChanged"


Save compiled dataframe
Dataset Path: /mnt/Data1/Josh/transcription_pipeline/test_data/NSPARC/2025-03-31/MCP-mSG_His-RFP_Var2(001)_embryo02
Reading previous imported dataset
Reading previous imported FullEmbryo dataset
Previous AP points loaded.
AP angle:  188.9726266148964
Save compiled dataframe
Dataset Path: /mnt/Data1/Josh/transcription_pipeline/test_data/NSPARC/2025-04-01/MCP-mSG_His-RFP_Var2(001)_embryo20
Reading previous imported dataset
Reading previous imported FullEmbryo dataset
No previous AP points found. Calculating AP points.


IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices

In [59]:
# Show the 'ap' colum for row 0
compiled_dataframe.loc[1, 'ap']

array([-0.01373128, -0.01811509, -0.01802702, -0.01784189, -0.01801626,
       -0.01737649, -0.01736353, -0.01751206, -0.01757781, -0.01746008,
       -0.01732768, -0.01726716, -0.01744071, -0.01803175, -0.01800162,
       -0.01812322, -0.01752222, -0.01730723, -0.01789551, -0.01704556,
       -0.01752568, -0.01789089, -0.01798086, -0.01838821, -0.01814657,
       -0.0186135 , -0.01870752, -0.01885052, -0.0187848 , -0.01921219,
       -0.01835497, -0.01781661, -0.00831965, -0.0179796 , -0.01744603,
       -0.01694815, -0.01224719, -0.01713874, -0.01655792, -0.01631108,
       -0.01623546, -0.01611911, -0.01620972, -0.01550117, -0.01545769,
       -0.01559578, -0.0127761 , -0.01552968, -0.01562264, -0.01567371,
       -0.01546777, -0.01600136, -0.01598895, -0.01579279, -0.00905151,
       -0.01672056, -0.01640281, -0.01645903])