## Prepare Dataset and Specify Parameters (please only edit cells in this section)

In [11]:
# Specify here which steps you want to run below

spot_tracking = True

nuclear_tracking = True

In [12]:
dataset_folder = '/mnt/Data1/Nick/transcription_pipeline/test_data/2025-03-18/'

import os
folder_list = sorted([entry.name for entry in os.scandir(dataset_folder) if entry.is_dir()])
folder_list

['Dl-Dendra2_fullEmbryo',
 'MCP-Halo552_His-BFP_r1close(002)_StillImages',
 'MCP-mSG_His-RFP_RBSPWM(003)_embryo01',
 'MCP-mSG_His-RFP_RBSPWM(003)_embryo02',
 'MCP-mSG_His-RFP_RBSPWM(003)_embryo03',
 'MCP-mSG_His-RFP_RBSPWM(003)_embryo04',
 'MCP-mSG_His-RFP_RBSPWM(003)_embryo05']

### Specify the data sets you want to analyze (edit the cell below)

In [13]:
dataset_paths = folder_list[4:7] # Specify here what data sets you want to analyze
dataset_paths

['MCP-mSG_His-RFP_RBSPWM(003)_embryo03',
 'MCP-mSG_His-RFP_RBSPWM(003)_embryo04',
 'MCP-mSG_His-RFP_RBSPWM(003)_embryo05']

In [14]:
# Import pipeline
from transcription_pipeline import nuclear_pipeline
from transcription_pipeline import preprocessing_pipeline

from transcription_pipeline import spot_pipeline
from transcription_pipeline import fullEmbryo_pipeline

from transcription_pipeline.spot_analysis import compile_data
from transcription_pipeline.utils import plottable

import os
import matplotlib.pyplot as plt

## Starting a DASK Client for parallel processing

In [15]:
from dask.distributed import LocalCluster, Client

try:
    cluster = LocalCluster(
        host="localhost",
        scheduler_port=37763,
        threads_per_worker=1,
        n_workers=14,
        memory_limit="6GB",
    )
    
    client = Client(cluster)
except:
    print("Cluster already running")
    client = Client('localhost:37763')

print(client)

Perhaps you already have a cluster running?
Hosting the HTTP server on port 44557 instead


Cluster already running
<Client: 'tcp://127.0.0.1:37763' processes=14 threads=14, memory=78.23 GiB>


In [16]:
client.restart()
# client.shutdown()
print(client.dashboard_link)

http://localhost:8787/status


## Loop over all the data sets

In [None]:
for i in range(len(dataset_paths)):
    
    #------------------------------
    try:
        test_dataset_name = dataset_folder + dataset_paths[i]
        print('Process data for: ' + test_dataset_name)

        # Import MS2 dataset
        # Detect whether the MS2 dataset has already been converted into `zarr` files
        ms2_import_previous = os.path.isdir(test_dataset_name + '/collated_dataset')
        print('Import MS2 from previous zarr files: ' + str(ms2_import_previous))
    
        dataset = preprocessing_pipeline.DataImport(
            name_folder=test_dataset_name,
            trim_series=True,
            working_storage_mode='zarr',
            import_previous=ms2_import_previous, 
        )

        if not ms2_import_previous:
            dataset.save()

    except Exception as e:
        print('Error:', e)

    
    #------------------------------
    try:
        # Import FullEmbryo dataset
        # Detect whether the FullEmbryo dataset has already been converted into `zarr` files
        fullembryo_import_previous = os.path.isdir(test_dataset_name + '/preprocessed_full_embryo')
        print('Import full embryo from previous zarr files: ' + str(fullembryo_import_previous))
    
        FullEmbryo_dataset = preprocessing_pipeline.FullEmbryoImport(
            name_folder=test_dataset_name,
            import_previous=fullembryo_import_previous
        )
    
        if not fullembryo_import_previous:
            FullEmbryo_dataset.save()

    except Exception as e:
        print('Error:', e)
    

    #------------------------------
    try:
        # Nuclear Tracking
        if nuclear_tracking:
            # Detect whether the nuclear tracking has been done "previously." If so, load the previous results.
            nuclear_tracking_previous = os.path.isdir(test_dataset_name + '/nuclear_analysis_results')

            if nuclear_tracking_previous:
                # Load nuclear tracking results
                print('Load from previous nuclear tracking results')
                
                nuclear_tracking = nuclear_pipeline.Nuclear()
                nuclear_tracking.read_results(name_folder=test_dataset_name)
                
            else:
                # Do nuclear tracking and save the results
                print('Do nuclear tracking for the dataset')
                
                nuclear_tracking = nuclear_pipeline.Nuclear(
                    data=dataset.channels_full_dataset[0],
                    global_metadata=dataset.export_global_metadata[0],
                    frame_metadata=dataset.export_frame_metadata[0],
                    series_splits=dataset.series_splits,
                    series_shifts=dataset.series_shifts,
                    search_range_um=1.5,
                    stitch=False,
                    stitch_max_distance=4,
                    stitch_max_frame_distance=2,
                    client=client,
                    keep_futures=False,
                )
                
                nuclear_tracking.track_nuclei(
                        working_memory_mode="zarr",
                        working_memory_folder=test_dataset_name,
                        trackpy_log_path="".join([test_dataset_name, "trackpy_log"]),
                    )
                    # Saves tracked nuclear mask as a zarr, and pickles dataframes with segmentation and
                    # tracking information.
                nuclear_tracking.save_results(
                        name_folder=test_dataset_name, save_array_as=None
                    )

    except Exception as e:
        print('Error:', e)


    #------------------------------
    try:
        # Spot Tracking
        if spot_tracking:
            spot_tracking_previous = os.path.isdir(test_dataset_name + '/spot_analysis_results')

            if spot_tracking_previous:
                # Load spot tracking results
                print('Load from spot tracking results')
                
                spot_tracking = spot_pipeline.Spot()
                spot_tracking.read_results(name_folder=test_dataset_name)
                
            else:
                # Do spot tracking and save the results
                print('Do spot tracking for the dataset')
                
                spot_tracking = spot_pipeline.Spot(
                    data=dataset.channels_full_dataset[1],
                    global_metadata=dataset.export_global_metadata[1],
                    frame_metadata=dataset.export_frame_metadata[1],
                    labels=None,#nuclear_tracking.reordered_labels,
                    expand_distance=3,
                    search_range_um=4.2,
                    retrack_search_range_um=4.5,
                    threshold_factor=1.3,
                    memory=3,
                    retrack_after_filter=False,
                    stitch=True,
                    min_track_length=0,
                    series_splits=dataset.series_splits,
                    series_shifts=dataset.series_shifts,
                    keep_bandpass=False,
                    keep_futures=False,
                    keep_spot_labels=False,
                    evaluate=True,
                    retrack_by_intensity=True,
                    client=client,
                )
                
                spot_tracking.extract_spot_traces(
                    working_memory_folder=test_dataset_name, 
                    stitch=True,
                    retrack_after_filter=True,
                    trackpy_log_path = test_dataset_name+'/trackpy_log'
                )
                
                # Saves tracked spot mask as a zarr, and pickles dataframes with spot fitting and
                # quantification information.
                spot_tracking.save_results(name_folder=test_dataset_name, save_array_as=None)

    except Exception as e:
        print('Error:', e)

    print('\n')

Process data for: /mnt/Data1/Nick/transcription_pipeline/test_data/2025-03-18/MCP-mSG_His-RFP_RBSPWM(003)_embryo03
Import MS2 from previous zarr files: True
Import full embryo from previous zarr files: True
Load from previous nuclear tracking results
Load from spot tracking results


Process data for: /mnt/Data1/Nick/transcription_pipeline/test_data/2025-03-18/MCP-mSG_His-RFP_RBSPWM(003)_embryo04
Import MS2 from previous zarr files: True
Import full embryo from previous zarr files: False
Error: list index out of range
Do nuclear tracking for the dataset


