## Prepare Dataset and Specify Parameters (please only edit cells in this section)


In [1]:
# Parameters to specify

# Specify here at what frame NC14 starts
nc14_start_frame = 400

# Any trace with frame number smaller than min_frames will be filtered out
min_frames = 40

# Number of bins you want to split the full embryo into
num_bins = 42

In [2]:
# Dataset Directory

dataset_folder = '/mnt/Data1/Nick/transcription_pipeline/'

RBSPWM_datasets = [
    "test_data/2024-02-26/Halo-RBSPWM_embryo01",
    "test_data/2024-02-26/Halo-RBSPWM_embryo02",
    "test_data/2024-05-07/Halo552-RBSPWM_embryo01",
    "test_data/2024-05-07/Halo552-RBSPWM_embryo02",
    "test_data/2024-05-09/Halo552-RBSPWM_embryo01",
]

RBSVar2_datasets = [
    "test_data/2024-07-23/Halo673_RBSVar2_embryo01",
    "test_data/2024-07-25/Halo673_RBSVar2_embryo01",
    "test_data/2024-10-10/Halo673_RBSVar2_embryo01",
    "test_data/2024-10-10/Halo673_RBSVar2_embryo02",
]
MCP_mSG_datasets = [
    "test_data/2024-10-31/MCP-mSG_ParB-mScar_RBSPWM_embryo01",
    "test_data/2024-10-31/MCP-mSG_ParB-mScar_RBSPWM_embryo02",
    ]

test_dataset_name = dataset_folder + RBSPWM_datasets[4]
print('Dataset Path: ' + test_dataset_name)

Dataset Path: /mnt/Data1/Nick/transcription_pipeline/test_data/2024-05-09/Halo552-RBSPWM_embryo01


In [3]:
# Import pipeline
from transcription_pipeline import nuclear_pipeline
from transcription_pipeline import preprocessing_pipeline

from transcription_pipeline import spot_pipeline
from transcription_pipeline import fullEmbryo_pipeline

from transcription_pipeline.spot_analysis import compile_data
from transcription_pipeline.utils import plottable

import os
import matplotlib.pyplot as plt
import matplotlib as mpl

`JAVA_HOME` environment variable set to /mnt/Data1/Nick/miniforge3/envs/transcription_pipeline


In [4]:
# Specify how you would want the plots to be shown: Use TkAgg if you use PyCharm, or widget if you use a browser

mpl.use('TkAgg')
# %matplotlib widget

## Import Dataset

### Import MS2 Dataset

Detect whether the dataset has already been converted into `zarr` files, i.e. whether there's "previously" processed data. If so, load the previous results.

In [5]:
ms2_import_previous = os.path.isdir(test_dataset_name + '/collated_dataset')
ms2_import_previous

True

In [6]:
dataset = preprocessing_pipeline.DataImport(
    name_folder=test_dataset_name,
    trim_series=True,
    working_storage_mode='zarr',
    import_previous=ms2_import_previous, 
)

### Import FullEmbryo Dataset

In [None]:
FullEmbryo_dataset = preprocessing_pipeline.FullEmbryoImport(
    name_folder=test_dataset_name,
    #import_previous=True
)
# Loading FullEmbryo dataset is not working currently, but reported to Yovan where it only reads in the last channel
# FullEmbryo_dataset.save()

## Starting a DASK Client for parallel processing

In [None]:
from dask.distributed import LocalCluster, Client

try:
    cluster = LocalCluster(
        host="localhost",
        scheduler_port=37763,
        threads_per_worker=1,
        n_workers=14,
        memory_limit="6GB",
    )
    
    client = Client(cluster)
except:
    print("Cluster already running")
    client = Client('localhost:37763')

print(client)

In [None]:
client.restart()

In [None]:
client

## Nuclear Tracking

Detect whether the nuclear tracking has been done "previously." If so, load the previous results.

In [7]:
nuclear_tracking_previous = os.path.isdir(test_dataset_name + '/nuclear_analysis_results')
nuclear_tracking_previous

False

In [None]:
if nuclear_tracking_previous:
    # Load nuclear tracking results
    print('Load from previous nuclear tracking results')
    
    nuclear_tracking = nuclear_pipeline.Nuclear()
    nuclear_tracking.read_results(name_folder=test_dataset_name)
    
else:
    # Do nuclear tracking and save the results
    print('Do nuclear tracking for the dataset')
    
    nuclear_tracking = nuclear_pipeline.Nuclear(
        data=dataset.channels_full_dataset[0],
        global_metadata=dataset.export_global_metadata[0],
        frame_metadata=dataset.export_frame_metadata[0],
        series_splits=dataset.series_splits,
        series_shifts=dataset.series_shifts,
        search_range_um=1.5,
        stitch=False,
        stitch_max_distance=4,
        stitch_max_frame_distance=2,
        client=client,
        keep_futures=False,
    )
    
    nuclear_tracking.track_nuclei(
            working_memory_mode="zarr",
            working_memory_folder=test_dataset_name,
            trackpy_log_path="".join([test_dataset_name, "trackpy_log"]),
        )
        # Saves tracked nuclear mask as a zarr, and pickles dataframes with segmentation and
        # tracking information.
    nuclear_tracking.save_results(
            name_folder=test_dataset_name, save_array_as=None
        )

## Spot Tracking

Detect whether the spot tracking has been done "previously." If so, load the previous results.

In [8]:
spot_tracking_previous = os.path.isdir(test_dataset_name + '/spot_analysis_results')
spot_tracking_previous

True

In [9]:
%%time

if spot_tracking_previous:
    # Load spot tracking results
    print('Load from spot tracking results')
    
    spot_tracking = spot_pipeline.Spot()
    spot_tracking.read_results(name_folder=test_dataset_name)
    
else:
    # Do spot tracking and save the results
    print('Do spot tracking for the dataset')
    
    spot_tracking = spot_pipeline.Spot(
        data=dataset.channels_full_dataset[1],
        global_metadata=dataset.export_global_metadata[1],
        frame_metadata=dataset.export_frame_metadata[1],
        labels=None,#nuclear_tracking.reordered_labels,
        expand_distance=3,
        search_range_um=4.2,
        retrack_search_range_um=4.5,
        threshold_factor=1.3,
        memory=3,
        retrack_after_filter=False,
        stitch=True,
        min_track_length=0,
        series_splits=dataset.series_splits,
        series_shifts=dataset.series_shifts,
        keep_bandpass=False,
        keep_futures=False,
        keep_spot_labels=False,
        evaluate=True,
        retrack_by_intensity=True,
        client=client,
    )
    
    spot_tracking.extract_spot_traces(
        working_memory_folder=test_dataset_name, 
        stitch=True,
        retrack_after_filter=True,
        trackpy_log_path = test_dataset_name+'/trackpy_log'
    )
    
    # Saves tracked spot mask as a zarr, and pickles dataframes with spot fitting and
    # quantification information.
    spot_tracking.save_results(name_folder=test_dataset_name, save_array_as=None)

Load from spot tracking results
CPU times: user 1.28 s, sys: 283 ms, total: 1.56 s
Wall time: 1.56 s


### Make Compiled Dataframe

In [10]:
# Load spot tracking dataframe
spot_df = spot_tracking.spot_dataframe

# Remove spots that were not detected
detected_spots = spot_df[spot_df["particle"] != 0]

# Compile traces
compiled_dataframe = compile_data.compile_traces(
    detected_spots,
    compile_columns_spot=[
        "frame",
        "t_s",
        "intensity_from_neighborhood",
        "intensity_std_error_from_neighborhood",
        "x",
        "y"
    ],
    nuclear_tracking_dataframe=None,
)

compiled_dataframe.head()

Unnamed: 0,particle,frame,t_s,intensity_from_neighborhood,intensity_std_error_from_neighborhood,x,y
0,2,"[608, 609, 610, 613, 614, 615, 616, 617, 618, ...","[2981.271999359131, 2985.51900100708, 2989.767...","[67.20298245614036, 62.61358125, 92.1778863636...","[54.07608080882359, 53.08599141703332, 48.6846...","[706.3077781092006, 705.0800225329217, 704.078...","[132.22746686990476, 133.33576781054558, 131.9..."
1,3,"[579, 580, 581, 582, 583, 584, 585, 586, 587, ...","[2857.3950004577637, 2861.64400100708, 2865.89...","[368.71108000000004, 162.58609316770185, 207.0...","[48.961304741944936, 48.764177034702456, 49.24...","[814.23756413008, 814.0490753702337, 812.66835...","[165.32374456695854, 165.4022049366289, 164.15..."
2,4,"[640, 641, 642, 643, 644, 645, 647, 648, 649, ...","[3117.5319995880127, 3121.579999923706, 3126.1...","[137.8392151898734, 246.30097297297297, 162.72...","[50.04719298970737, 48.033152885448544, 53.299...","[610.5256101215496, 610.5078280423238, 610.170...","[200.09244408786026, 199.6949788757095, 200.19..."
3,5,"[613, 614, 615, 616, 617, 618, 619, 620, 621, ...","[3002.3190002441406, 3006.1590003967285, 3010....","[223.98077931034484, 149.44997701149424, 102.1...","[52.019684703514685, 46.46238563874822, 49.753...","[789.4493998034413, 790.1299534943532, 790.754...","[111.42483044437226, 110.54379569293525, 110.3..."
4,6,"[583, 585, 586, 588, 590, 591, 592, 593, 594, ...","[2874.9950008392334, 2883.492000579834, 2887.7...","[56.80923595505618, 58.452866666666665, 56.477...","[45.10765132358713, 45.86984351271528, 48.0794...","[830.0205022942665, 828.8195400854084, 827.835...","[152.07172254602511, 153.15739986339034, 152.5..."


## Full Embryo Analysis

In [None]:
plt.figure(figsize=(12,6))

plt.subplot(1, 2, 1)
plt.imshow(FullEmbryo_dataset.channels_full_dataset_surf[0][0, :, :], cmap='gray')
plt.title('Full Embryo Surf')

plt.subplot(1, 2, 2)
plt.imshow(FullEmbryo_dataset.channels_full_dataset_mid[0][0, :, :], cmap='gray')
plt.title('Full Embryo Mid')

plt.tight_layout()
plt.show()

In [None]:
fullEmbryo = fullEmbryo_pipeline.FullEmbryo(FullEmbryo_dataset, dataset, his_channel=0)

In [None]:
fullEmbryo.find_ap_axis(make_plots=True)

In [None]:
compiled_dataframe = fullEmbryo.xy_to_ap(compiled_dataframe)
compiled_dataframe.head()

## RateExtraction Analysis

### Fit and Average

In [11]:
from transcription_pipeline.RateExtraction import FitAndAverage

In [12]:
faadata = FitAndAverage(compiled_dataframe, nc14_start_frame, 40, num_bins, test_dataset_name)

No previous particle trace fit checking results detected. Do particle trace fitting for the dataframe.




Failed to fit trace 547
Failed to fit trace 606
Failed to fit trace 310
Failed to fit trace 150
Failed to fit trace 465
Failed to fit trace 38
Failed to fit trace 360
Failed to find derivative sign change for trace 20
Failed to find derivative sign change for trace 450
Failed to fit trace 623
Failed to fit trace 173
Failed to fit trace 218
Failed to fit trace 30
Failed to find derivative sign change for trace 143
Failed to find derivative sign change for trace 32
Failed to find derivative sign change for trace 690
Failed to find derivative sign change for trace 604
Failed to fit trace 24
Failed to find derivative sign change for trace 46
Failed to fit trace 82
Failed to find derivative sign change for trace 939
Failed to fit trace 580
Failed to fit trace 367
Failed to find derivative sign change for trace 389
Failed to find derivative sign change for trace 724
Failed to find derivative sign change for trace 343
Failed to fit trace 694
Failed to fit trace 64
Failed to find derivative si

In [13]:
faadata.check_particle_fits()

In [None]:
faadata.save_checked_particle_fits()

In [None]:
faadata.average_particle_fits();

### Average and Fit

In [None]:
from transcription_pipeline.RateExtraction import AverageAndFit

In [None]:
time_bin_width = dataset.export_frame_metadata[0]['t_s'][1, 0]
aafdata = AverageAndFit(compiled_dataframe, nc14_start_frame, time_bin_width, num_bins, test_dataset_name)

In [None]:
aafdata.check_bin_fits()

In [None]:
aafdata.bin_average_fit_dataframe

In [None]:
aafdata.save_checked_bin_fits()

In [None]:
aafdata.plot_bin_fits()