In [2]:
# Import pipeline
from transcription_pipeline import nuclear_pipeline
from transcription_pipeline import preprocessing_pipeline

from transcription_pipeline import spot_pipeline
from transcription_pipeline import fullEmbryo_pipeline

from transcription_pipeline.spot_analysis import compile_data
from transcription_pipeline.utils import plottable

import os
import matplotlib.pyplot as plt
import matplotlib as mpl

# Importing the dataset

In [3]:
dataset_folder = '/mnt/Data1/Nick/transcription_pipeline/'

# 25C embryos
# embryo_list = [
#     'test_data/NSPARC/2025-03-31/MCP-mSG_His-RFP_Var2(001)_embryo01',
#     'test_data/NSPARC/2025-03-31/MCP-mSG_His-RFP_Var2(001)_embryo02',
#     'test_data/NSPARC/2025-04-01/MCP-mSG_His-RFP_Var2(001)_embryo20',
#     'test_data/NSPARC/2025-04-01/MCP-mSG_His-RFP_Var2(001)_embryo38',
#     'test_data/NSPARC/2025-04-14/MCP-mSG_His-RFP_Var2(001)_embryo28',
#     'test_data/NSPARC/2025-04-15/MCP-mSG_His-RFP_Var2(001)_embryo01',
# ]

# # 23C embryos from 2025-04-22
# embryo_list = [
#     'test_data/NSPARC/2025-04-22/MCP-mSG_His-RFP_Var2(001)_embryo01_22.8C',
#     'test_data/NSPARC/2025-04-22/MCP-mSG_His-RFP_Var2(001)_embryo02_23C',
# ]

# ~22C embryos from 2025-04-29
embryo_list = [
    'test_data/NSPARC/2025-04-29/MCP-mSG_His-RFP_RBSPWM(003)_embryo01',
    'test_data/NSPARC/2025-04-29/MCP-mSG_His-RFP_RBSPWM(003)_embryo02',
    'test_data/NSPARC/2025-04-29/MCP-mSG_His-RFP_RBSPWM(003)_embryo03',
    'test_data/NSPARC/2025-04-29/MCP-mSG_His-RFP_RBSPWM(003)_embryo04',
    'test_data/NSPARC/2025-04-29/MCP-mSG_His-RFP_RBSPWM(003)_embryo05',
]
# embryo_list = [
#     'test_data/NSPARC/2025-04-14/MCP-mSG_His-RFP_Var2(001)_embryo31'
#     ]



test_dataset_name = dataset_folder + embryo_list[0]
print('Dataset Path: ' + test_dataset_name)

Dataset Path: /mnt/Data1/Nick/transcription_pipeline/test_data/NSPARC/2025-04-29/MCP-mSG_His-RFP_RBSPWM(003)_embryo01


In [4]:
def import_dataset(test_dataset_name):
    import_previous_ms2 = os.path.isdir(test_dataset_name + '/collated_dataset')
    if import_previous_ms2:
        print('Reading previous imported dataset')
    else:
        print('No previous dataset import found; importing from scratch')

    dataset = preprocessing_pipeline.DataImport(
        name_folder=test_dataset_name,
        trim_series=True,
        working_storage_mode='zarr',
        import_previous=import_previous_ms2,
    )
    if not import_previous_ms2:
        dataset.save()
    return dataset

def import_fullEmbryo_dataset(test_dataset_name):
    import_previous_fullEmbryo = os.path.isdir(test_dataset_name + '/preprocessed_full_embryo')

    if import_previous_fullEmbryo:
        print('Reading previous imported FullEmbryo dataset')
    else:
        print('No previous FullEmbryo dataset import found; importing from scratch')

    FullEmbryo_dataset = preprocessing_pipeline.FullEmbryoImport(
        name_folder=test_dataset_name,
        import_previous=import_previous_fullEmbryo,
    )
    if not import_previous_fullEmbryo:
        FullEmbryo_dataset.save()
    return FullEmbryo_dataset

def track_import_nuclei(test_dataset_name, dataset, nuclear_channel, spot_channel, retrack=False):
    import_previous_nuclear = os.path.isdir(test_dataset_name + '/nuclear_analysis_results')

    if import_previous_nuclear and not retrack:
        print(f'Reading previous nuclear tracking results (retrack={retrack})')
        nuclear_tracking = nuclear_pipeline.Nuclear()

        # Load nuclear tracking results
        nuclear_tracking.read_results(name_folder=test_dataset_name)

    elif import_previous_nuclear and retrack:
        print(f'Previous nuclear tracking detected. Retracking nuclei (retrack={retrack})')
        nuclear_tracking = nuclear_pipeline.Nuclear(
            data=dataset.channels_full_dataset[nuclear_channel],
            global_metadata=dataset.export_global_metadata[nuclear_channel],
            frame_metadata=dataset.export_frame_metadata[nuclear_channel],
            series_splits=dataset.series_splits,
            series_shifts=dataset.series_shifts,
            search_range_um=1.5,
            stitch=False,
            stitch_max_distance=4,
            stitch_max_frame_distance=2,
            client=client,
            keep_futures=False,
        )
        nuclear_tracking.track_nuclei(
                working_memory_mode="zarr",
                working_memory_folder=test_dataset_name,
                trackpy_log_path="".join([test_dataset_name, "trackpy_log"]),
            )
        # Saves tracked nuclear mask as a zarr, and pickles dataframes with segmentation and
        # tracking information.
        nuclear_tracking.save_results(
                name_folder=test_dataset_name, save_array_as=None
            )
        # # Load nuclear tracking results
        # nuclear_tracking.read_results(name_folder=test_dataset_name)
    else:
        print(f'No previous nuclear tracking results found; importing from scratch (retrack={retrack})')
        nuclear_tracking = nuclear_pipeline.Nuclear(
            data=dataset.channels_full_dataset[nuclear_channel],
            global_metadata=dataset.export_global_metadata[nuclear_channel],
            frame_metadata=dataset.export_frame_metadata[nuclear_channel],
            series_splits=dataset.series_splits,
            series_shifts=dataset.series_shifts,
            search_range_um=1.5,
            stitch=False,
            stitch_max_distance=4,
            stitch_max_frame_distance=2,
            client=client,
            keep_futures=False,
        )
        nuclear_tracking.track_nuclei(
                working_memory_mode="zarr",
                working_memory_folder=test_dataset_name,
                trackpy_log_path="".join([test_dataset_name, "trackpy_log"]),
            )
        # Saves tracked nuclear mask as a zarr, and pickles dataframes with segmentation and
        # tracking information.
        nuclear_tracking.save_results(
                name_folder=test_dataset_name, save_array_as=None
            )
        # # Load nuclear tracking results
        # nuclear_tracking.read_results(name_folder=test_dataset_name)
    return nuclear_tracking

def track_import_spots(test_dataset_name, dataset, nuclear_channel, spot_channel, retrack=False, use_nuclear_tracking=True):
    import_previous_spot = os.path.isdir(test_dataset_name + '/spot_analysis_results')
    if use_nuclear_tracking:
        nuclear_tracking = track_import_nuclei(test_dataset_name, dataset,
                                               nuclear_channel=nuclear_channel,
                                               spot_channel=spot_channel,
                                               retrack=False
                                               )
        Labels = nuclear_tracking.reordered_labels
    else:
        Labels = None

    if import_previous_spot and not retrack:
        print(f'Load from spot tracking results (retrack={retrack})')
        spot_tracking = spot_pipeline.Spot()
        # Load spot tracking results
        spot_tracking.read_results(name_folder=test_dataset_name)

    elif import_previous_spot and retrack:
        print(f'Previous spot tracking detected. Retracking spots (retrack={retrack})')
        spot_tracking = spot_pipeline.Spot(
            data=dataset.channels_full_dataset[spot_channel],
            global_metadata=dataset.export_global_metadata[spot_channel],
            frame_metadata=dataset.export_frame_metadata[spot_channel],
            labels=Labels,
            expand_distance=3,
            search_range_um=2,
            retrack_search_range_um=4.5,
            threshold_factor=1.3,
            memory=3,
            retrack_after_filter=False,
            stitch=True,
            min_track_length=0,
            series_splits=dataset.series_splits,
            series_shifts=dataset.series_shifts,
            keep_bandpass=False,
            keep_futures=False,
            keep_spot_labels=False,
            evaluate=True,
            retrack_by_intensity=True,
            client=client,
        )

        spot_tracking.extract_spot_traces(
                working_memory_folder=test_dataset_name,
                stitch=False,
                retrack_after_filter=False,
                trackpy_log_path = test_dataset_name+'/trackpy_log'
            )

        # Saves tracked spot mask as a zarr, and pickles dataframes with spot fitting and
        # quantification information.
        spot_tracking.save_results(name_folder=test_dataset_name, save_array_as=None)

    else:
        print(f'No previous spot tracking results found; importing from scratch (retrack={retrack})')
        spot_tracking = spot_pipeline.Spot(
            data=dataset.channels_full_dataset[spot_channel],
            global_metadata=dataset.export_global_metadata[spot_channel],
            frame_metadata=dataset.export_frame_metadata[spot_channel],
            labels=Labels,
            expand_distance=3,
            search_range_um=4.2,
            retrack_search_range_um=4.5,
            threshold_factor=1.55,
            memory=3,
            retrack_after_filter=False,
            stitch=True,
            min_track_length=0,
            series_splits=dataset.series_splits,
            series_shifts=dataset.series_shifts,
            keep_bandpass=False,
            keep_futures=False,
            keep_spot_labels=False,
            evaluate=True,
            retrack_by_intensity=True,
            client=client,
        )

        spot_tracking.extract_spot_traces(
                working_memory_folder=test_dataset_name,
                stitch=True,
                retrack_after_filter=True,
                trackpy_log_path = test_dataset_name+'/trackpy_log',
                verbose=True,
            )

        # Saves tracked spot mask as a zarr, and pickles dataframes with spot fitting and
        # quantification information.
        spot_tracking.save_results(name_folder=test_dataset_name, save_array_as=None)

    return spot_tracking


# Spot and Nuclear Tracking

In [None]:
from dask.distributed import LocalCluster, Client

cluster = LocalCluster(
    host="localhost",
    #scheduler_port=37763,
    threads_per_worker=1,
    n_workers=14,
    memory_limit="6GB",
)

client = Client(cluster)

print(client)

In [None]:
print(client.dashboard_link)

In [None]:
client.restart()

In [5]:
nuclear_channel = 1
spot_channel = 0

In [None]:
import gc
for i in range(len(embryo_list)):
    # Define the dataset path
    test_dataset_name = dataset_folder + embryo_list[i]
    print('Dataset Path: ' + test_dataset_name)

    gc.disable()
    # Load the dataset
    dataset = import_dataset(test_dataset_name)

    # Load the full embryo dataset
    FullEmbryo_dataset = import_fullEmbryo_dataset(test_dataset_name)
    gc.enable()
    # Track and import nuclei
    nuclear_tracking = track_import_nuclei(test_dataset_name, dataset,
                                           nuclear_channel=nuclear_channel,
                                           spot_channel=spot_channel,
                                           retrack=False
                                           )

    # Track and import spots
    spot_tracking = track_import_spots(test_dataset_name, dataset,
                                       nuclear_channel=nuclear_channel,
                                       spot_channel=spot_channel,
                                       retrack=False,
                                       use_nuclear_tracking=True
                                       )


In [7]:
# Switch to inline plotting
# %matplotlib inline
# Switch to interactive plotting
mpl.use('TkAgg')
for i in range(len(embryo_list)):
    # Define the dataset path
    test_dataset_name = dataset_folder + embryo_list[i]
    print('Dataset Path: ' + test_dataset_name)
    # Load the dataset
    dataset = import_dataset(test_dataset_name)

    # Load the full embryo dataset
    FullEmbryo_dataset = import_fullEmbryo_dataset(test_dataset_name)

    # Load spot tracking
    spot_tracking = spot_pipeline.Spot()
    spot_tracking.read_results(name_folder=test_dataset_name)
    # Load spot tracking dataframe
    spot_df = spot_tracking.spot_dataframe

    # Remove spots that were not detected
    detected_spots = spot_df[spot_df["particle"] != 0]

    # Compile traces
    compiled_dataframe = compile_data.compile_traces(
        detected_spots,
        compile_columns_spot=[
            "frame",
            "t_s",
            "intensity_from_neighborhood",
            "intensity_std_error_from_neighborhood",
            "x",
            "y"
        ],
        nuclear_tracking_dataframe=None,
    )
    # Load the full embryo dataset
    fullEmbryo = fullEmbryo_pipeline.FullEmbryo(test_dataset_name, FullEmbryo_dataset, dataset, his_channel=nuclear_channel)
    fullEmbryo.find_ap_axis(make_plots=True, ap_method='minf90', sigma=10, radius=5,
                            load_previous=True, save_results=True)

    # Create compiled dataframe
    compiled_dataframe = fullEmbryo.xy_to_ap(compiled_dataframe)
    # Save compiled dataframe
    print('Save compiled dataframe')
    compiled_dataframe.to_pickle(test_dataset_name + '/compiled_dataframe.pkl')


Dataset Path: /mnt/Data1/Nick/transcription_pipeline/test_data/NSPARC/2025-04-29/MCP-mSG_His-RFP_RBSPWM(003)_embryo01
Reading previous imported dataset
Reading previous imported FullEmbryo dataset
Previous AP points loaded.
Save compiled dataframe
Dataset Path: /mnt/Data1/Nick/transcription_pipeline/test_data/NSPARC/2025-04-29/MCP-mSG_His-RFP_RBSPWM(003)_embryo02
Reading previous imported dataset
Reading previous imported FullEmbryo dataset
Previous AP points loaded.
Save compiled dataframe
Dataset Path: /mnt/Data1/Nick/transcription_pipeline/test_data/NSPARC/2025-04-29/MCP-mSG_His-RFP_RBSPWM(003)_embryo03
Reading previous imported dataset
Reading previous imported FullEmbryo dataset
Previous AP points loaded.
Save compiled dataframe
Dataset Path: /mnt/Data1/Nick/transcription_pipeline/test_data/NSPARC/2025-04-29/MCP-mSG_His-RFP_RBSPWM(003)_embryo04
Reading previous imported dataset
Reading previous imported FullEmbryo dataset
Previous AP points loaded.
Save compiled dataframe
Dataset 

In [None]:
# Show the 'ap' colum for row 0
compiled_dataframe.loc[1, 'ap']

In [None]:
fullEmbryo = fullEmbryo_pipeline.FullEmbryo(FullEmbryo_dataset, dataset, his_channel=nuclear_channel)
fullEmbryo.find_ap_axis(make_plots=True, remove_small_objects=False, ap_method='minf90', sigma=10, radius=5)

In [None]:
%matplotlib inline
# histogram of sigma xy
plt.figure()
plt.hist(spot_df['sigma_x_y'], bins=100)
plt.xlabel('sigma_x_y')

# histogram of sigma z
plt.figure()
plt.hist(spot_df['sigma_z'], bins=10000)
plt.xlim(0,10)
plt.xlabel('sigma_z')