In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tifffile as tiff

In [None]:
full_df = pd.read_csv("nuclei_membrane_tracking/full_manual_dataset_cropped.csv")

In [3]:
full_df2 = full_df[full_df['cell_type'] != 'unknown']

In [4]:
full_df2_final = full_df2[full_df2['t'] == 359]

In [None]:
full_df2_final.columns.to_list()

In [None]:
full_df2_final['ID'].value_counts()

In [5]:
def construct_lineages(df, complete_df):
    # Extract the last timepoint dataframe
    last_timepoint_df = df[df['t'] == df['t'].max()]
    last_ids = last_timepoint_df[['ID', 'cell_type', 'Spot track ID relabelled']]

    # Initialize the lineages dictionary
    lineages = {}

    # Iterate over each ID in the last timepoint
    for _, row in last_ids.iterrows():
        current_id = row['ID']
        cell_type = row['cell_type']
        track_id = row['Spot track ID relabelled']
        lineage_ids = []
        n_mem_labels = 0

        print(f'Processing id {current_id}')

        # Trace lineage
        while not pd.isna(current_id):
            lineage_ids.append(current_id)
            #print(current_id)

            if len(complete_df[complete_df['ID'] == current_id]) > 0:
                # Find the row corresponding to the current ID
                current_row = complete_df[complete_df['ID'] == current_id].iloc[0]

                # Check mem_label
                if not pd.isna(current_row['mem_label']) and current_row['mem_label'] != 0:
                    n_mem_labels += 1

                # Move to the source_ID
                current_id = current_row['Spot source ID']
            else:
                current_id = pd.NA

        # Add to lineages dictionary
        lineages[row['ID']] = {
            'cell_type': cell_type,
            'lineage_ids': lineage_ids,
            'track_id' : track_id,
            'n_mem_labels': n_mem_labels
        }

    return lineages

lineages = construct_lineages(full_df2, full_df)



Processing id 1902
Processing id 1903
Processing id 1905
Processing id 36440
Processing id 36441
Processing id 36442
Processing id 36443
Processing id 36445
Processing id 36446
Processing id 36448
Processing id 36449
Processing id 36450
Processing id 36452
Processing id 36453
Processing id 36454
Processing id 36455
Processing id 36456
Processing id 36457
Processing id 36458
Processing id 36459
Processing id 36460
Processing id 36461
Processing id 36462
Processing id 36463
Processing id 36464
Processing id 36465
Processing id 36466
Processing id 36467
Processing id 36468
Processing id 36469
Processing id 36470
Processing id 36471
Processing id 36472
Processing id 36474
Processing id 36475
Processing id 36476
Processing id 36477
Processing id 36479
Processing id 36480
Processing id 36481
Processing id 36482
Processing id 36483
Processing id 36484
Processing id 36485
Processing id 36487
Processing id 36488
Processing id 36489
Processing id 36491
Processing id 36492
Processing id 36493
Pro

In [None]:
lineages

In [6]:
def find_top_lineages_and_subset_df(df, lineages, top_n=10):
    # Convert lineages dictionary to a dataframe
    lineages_df = pd.DataFrame([
        {'ID': key, 'cell_type': value['cell_type'], 'n_mem_labels': value['n_mem_labels'], 'lineage_ids': value['lineage_ids']}
        for key, value in lineages.items()
    ])

    # Find top IDs by n_mem_labels for each cell_type
    top_lineages = (
        lineages_df.sort_values(['cell_type', 'n_mem_labels'], ascending=[True, False])
        .groupby('cell_type')
        .head(top_n)
    )

    # Initialize an empty list to store rows for the subset dataframe
    rows = []

    # Extract relevant rows for each selected lineage
    for _, row in top_lineages.iterrows():
        lineage_ids = row['lineage_ids']
        # Get rows from the original dataframe corresponding to lineage IDs
        lineage_rows = df[df['ID'].isin(lineage_ids)][['Spot track ID relabelled', 'Track ID_y', 'cell_type', 't', 'mem_label', 'mem_x', 'mem_y', 'mem_z']]
        rows.append(lineage_rows)

    # Combine all rows into a subset dataframe
    subset_df = pd.concat(rows, ignore_index=True)

    return subset_df


In [7]:
def find_top_lineages_and_subset_df(df, lineages, top_n=10):
    # Convert lineages dictionary to a dataframe
    lineages_df = pd.DataFrame([
        {
            'ID': key,
            'cell_type': value['cell_type'],
            'n_mem_labels': value['n_mem_labels'],
            'lineage_ids': value['lineage_ids'],
            'track_id': value['track_id']
        }
        for key, value in lineages.items()
    ])

    # For each track_id, keep the row with the highest n_mem_labels
    lineages_df = (
        lineages_df.sort_values('n_mem_labels', ascending=False)
        .drop_duplicates(subset=['track_id'], keep='first')
    )

    # Find the top `top_n` IDs for each cell_type by n_mem_labels
    top_lineages = (
        lineages_df.sort_values(['cell_type', 'n_mem_labels'], ascending=[True, False])
        .groupby('cell_type')
        .head(top_n)
    )

    # Initialize an empty list to store rows for the subset dataframe
    rows = []

    # Extract relevant rows for each selected lineage
    for _, row in top_lineages.iterrows():
        lineage_ids = row['lineage_ids']
        # Get rows from the original dataframe corresponding to lineage IDs
        lineage_rows = df[df['ID'].isin(lineage_ids)][['Spot track ID relabelled', 'Track ID_y', 'cell_type', 't', 'mem_label', 'mem_x', 'mem_y', 'mem_z']]
        rows.append(lineage_rows)

    # Combine all rows into a subset dataframe
    subset_df = pd.concat(rows, ignore_index=True)

    return subset_df

In [8]:
# Example usage:
subset_df = find_top_lineages_and_subset_df(full_df2, lineages, top_n=5)

In [None]:
subset_df['cell_type'].value_counts()

In [9]:
def remap_track_ids_simple(subset_df):
    # Group by cell_type and enumerate Track IDs within each group
    subset_df['Track ID_remapped'] = (
        subset_df.groupby('cell_type')['Spot track ID relabelled']
        .transform(lambda x: pd.Categorical(x).codes)
    )
    return subset_df

# Example usage:
subset_df = remap_track_ids_simple(subset_df)

In [None]:
subset_df['Track ID_remapped'].max()

In [None]:
subset_df

In [None]:
full_mem_array = tiff.imread('seg_membrane_timelapses_fixed/timelapse_sixth_dataset.tif')

In [11]:
# Pad the z-axis of the original image array with 3 slices in both directions
padded_mem_array = np.pad(
    full_mem_array,
    pad_width=((0, 0), (3, 3), (0, 0), (0, 0)),  # Only pad the Z-axis
    mode='constant',  # Use constant values (e.g., zero padding)
    constant_values=0  # Pad with zeros
)

In [12]:
del full_mem_array

In [13]:
import gc
gc.collect()

48

In [14]:
# Generate the croppings array
def generate_croppings(df, image_array):
    C, N, T, Z, Y, X = 5, 5, 360, 10, 150, 150
    croppings = np.zeros((C, N, T, Z, Y, X), dtype=bool)

    # Map cell types to indices (0-4)
    cell_type_to_idx = {cell_type: i for i, cell_type in enumerate(df['cell_type'].unique())}

    # Process each group of cell_type and Track ID
    for (cell_type, track_id), group in df.groupby(['cell_type', 'Track ID_remapped']):
        print(f'Processing cell type: {cell_type}, Track ID: {track_id}')
        c_idx = cell_type_to_idx[cell_type]
        n_idx = list(df['Track ID_remapped'].unique()).index(track_id)

        for t in range(T):  # Iterate over timepoints
            if t in group['t'].values:
                row = group[
                    (group['t'] == t)].iloc[0]
                
                mem_label, mem_x, mem_y, mem_z = row['mem_label'], row['mem_x'], row['mem_y'], row['mem_z']

                if not pd.isna(mem_x):
                    mem_label, mem_x, mem_y, mem_z = int(mem_label), int(mem_x), int(mem_y), int(mem_z)
                    
                    # Define cropping bounds
                    z_min, z_max = mem_z - 5, mem_z + 5
                    y_min, y_max = mem_y - 75, mem_y + 75
                    x_min, x_max = mem_x - 75, mem_x + 75

                    # Ensure bounds stay within image dimensions
                    z_min_pad, z_max_pad = max(0, z_min), min(image_array.shape[1], z_max)
                    y_min_pad, y_max_pad = max(0, y_min), min(image_array.shape[2], y_max)
                    x_min_pad, x_max_pad = max(0, x_min), min(image_array.shape[3], x_max)

                    # Extract crop and ensure it matches target shape
                    crop = image_array[
                        t, z_min_pad:z_max_pad, y_min_pad:y_max_pad, x_min_pad:x_max_pad
                    ] == mem_label
                    
                    # Pad crop to (10, 150, 150) if necessary
                    crop = np.pad(
                        crop,
                        pad_width=(
                            (0, 10 - crop.shape[0]),  # Z-axis padding
                            (0, 150 - crop.shape[1]),  # Y-axis padding
                            (0, 150 - crop.shape[2])   # X-axis padding
                        ),
                        mode='constant',
                        constant_values=0
                    )

                    # Assign to croppings array
                    croppings[c_idx, n_idx, t] = crop
                else:
                    continue
            else:
                # Leave as empty array for missing timepoints
                continue

    return croppings

In [None]:
subset_df['Spot track ID relabelled'].value_counts()

In [15]:
croppings_array = generate_croppings(subset_df, padded_mem_array)

Processing cell type: basal, Track ID: 0
Processing cell type: basal, Track ID: 1
Processing cell type: basal, Track ID: 2
Processing cell type: basal, Track ID: 3
Processing cell type: basal, Track ID: 4
Processing cell type: goblet, Track ID: 0
Processing cell type: goblet, Track ID: 1
Processing cell type: goblet, Track ID: 2
Processing cell type: goblet, Track ID: 3
Processing cell type: goblet, Track ID: 4
Processing cell type: ic, Track ID: 0
Processing cell type: ic, Track ID: 1
Processing cell type: ic, Track ID: 2
Processing cell type: ic, Track ID: 3
Processing cell type: ic, Track ID: 4
Processing cell type: mcc, Track ID: 0
Processing cell type: mcc, Track ID: 1
Processing cell type: mcc, Track ID: 2
Processing cell type: mcc, Track ID: 3
Processing cell type: mcc, Track ID: 4
Processing cell type: ssc, Track ID: 0
Processing cell type: ssc, Track ID: 1
Processing cell type: ssc, Track ID: 2
Processing cell type: ssc, Track ID: 3
Processing cell type: ssc, Track ID: 4


In [16]:
del padded_mem_array

gc.collect()

0

In [None]:
len(croppings_array[croppings_array == True])

In [17]:
celltype_dict = {0 : 'basal', 1 : 'goblet', 2 : 'ic', 3 : 'mcc', 4 : 'ssc'}

In [None]:
croppings_array.shape

In [18]:
%%capture
#import ncolor
mask = croppings_array
#mask_nc = ncolor.label(mask,max_depth=20)

import napari
viewer = napari.view_labels(mask)
voxel_size_x = 0.691 # um
voxel_size_y = 0.691 # um
voxel_size_z = 2 # um

reference_size = voxel_size_x

factor_z = voxel_size_z / reference_size
factor_y = voxel_size_y / reference_size
factor_x = voxel_size_x / reference_size

viewer.layers['mask'].scale = [factor_z, factor_y, factor_x] # Z, Y, X order
viewer.dims.ndisplay = 3
#viewer.camera.center = [s//2 for s in mask.shape]
viewer.camera.zoom=5
viewer.camera.angles=(10.90517458968619, -20.777067798396835, 58.04311170773853)
viewer.camera.perspective=0.0
viewer.camera.interactive=True


In [13]:
viewer.camera.zoom=5
viewer.camera.angles=(-29.987043790992097, 49.57065942474976, -41.85955990611221)
viewer.camera.perspective=0.0
viewer.camera.interactive=True

In [21]:
from PIL import Image

viewer.camera.zoom=5
viewer.camera.angles=(-29.987043790992097, 49.57065942474976, -41.85955990611221)
viewer.camera.perspective=0.0
viewer.camera.interactive=True

for c in range(0, 5):
    print(f'processing celltype {celltype_dict[c]}')
    for n in range(0, 5):
        print(f'processing cell {n}')
        last_valid_image = None  # Store the last valid screenshot
        track_array = mask[c, n]
        if np.all(track_array == 0):
            continue
        else:
            for t in range(0, 360):
                if t == 0:
                    # Update viewer to display the current subarray
                    viewer.dims.current_step = (c, n, t, 1, 74, 74)
                    img = viewer.screenshot(size=(200, 300), scale=1, canvas_only=True, flash=False)
                    last_valid_image = (c, n, t, 1, 74, 74)
                    # Convert the NumPy array to a Pillow Image
                    image = Image.fromarray(img, mode="RGBA")

                else:
                    subarray = mask[c, n, t]  # Extract the subarray (slice for :, :, :)
                    
                    if np.all(subarray == 0):  # Check if the subarray is all zeros
                        print(f'Subarray at (c={c}, n={n}, t={t}) is all zeros, using last valid image.')
                        img = last_valid_image  # Use the last valid image
                    else:
                        # Update viewer to display the current subarray
                        viewer.dims.current_step = (c, n, t, 1, 74, 74)
                        img = viewer.screenshot(size=(200, 300), scale=1, canvas_only=True, flash=False)
                        # Convert the NumPy array to a Pillow Image
                        image = Image.fromarray(img, mode="RGBA")
                        last_valid_image = (c, n, t, 1, 74, 74)
                
                # Save the image as a PNG
                save_path = f'D:/Mari_Sixth_Dataset_Analysis/animation2/{celltype_dict[c]}/{n}_t{t}.png'
                image.save(save_path, format="PNG")

processing celltype basal
processing cell 0
Subarray at (c=0, n=0, t=71) is all zeros, using last valid image.
Subarray at (c=0, n=0, t=72) is all zeros, using last valid image.
Subarray at (c=0, n=0, t=155) is all zeros, using last valid image.
Subarray at (c=0, n=0, t=263) is all zeros, using last valid image.
Subarray at (c=0, n=0, t=288) is all zeros, using last valid image.
Subarray at (c=0, n=0, t=340) is all zeros, using last valid image.
processing cell 1
Subarray at (c=0, n=1, t=166) is all zeros, using last valid image.
Subarray at (c=0, n=1, t=192) is all zeros, using last valid image.
Subarray at (c=0, n=1, t=193) is all zeros, using last valid image.
Subarray at (c=0, n=1, t=195) is all zeros, using last valid image.
Subarray at (c=0, n=1, t=197) is all zeros, using last valid image.
Subarray at (c=0, n=1, t=198) is all zeros, using last valid image.
Subarray at (c=0, n=1, t=344) is all zeros, using last valid image.
processing cell 2
Subarray at (c=0, n=2, t=62) is all ze

In [22]:
import os

In [None]:
for cell_type in ['basal', 'goblet', 'ic', 'mcc', 'ssc']:#'basal', 'goblet']:#, 'ic', 'mcc', 'ssc']:
    # Define the path to the folder containing the images
    chosen_folder = f"animation2/{cell_type}"
    parent_folder = "animation2/"

    # Initialize a list to store the stacks for all `n`
    all_n_stacks = []

    # Process each `n`
    for n in range(10):
        print(f"Processing n={n}")
        # Collect all images for the current `n` in ascending `t` order
        image_files = sorted(
            [f for f in os.listdir(chosen_folder) if f.startswith(f"{n}_t") and f.endswith(".png")],
            key=lambda x: int(x.split("_t")[1].split(".")[0])  # Sort by t
        )

        if image_files:
            # Load and stack the images for this `n`
            stack = []
            for img_file in image_files:
                img_path = os.path.join(chosen_folder, img_file)
                img = np.array(Image.open(img_path))  # Load the image as a NumPy array
                stack.append(img)
            
            # Convert the stack to a NumPy array (Z-dimension stack)
            stack = np.stack(stack, axis=0)  # Shape: (360, height, width, 3)
            print(f"n={n}, stack shape: {stack.shape}")

            # Append the stack to the list of all `n` stacks
            all_n_stacks.append(stack)

    # Concatenate all `n` stacks horizontally (on the width dimension)
    final_combined_stack = np.concatenate(all_n_stacks, axis=2)  # Shape: (360, height, total_width, 3)
    print(f"Final combined stack shape: {final_combined_stack.shape}")

    # Save the final stack as a single image file
    output_path = os.path.join(parent_folder, f"{cell_type}_combined_stack.tif")

    # Convert the NumPy array back to Pillow Images for saving
    final_image = [Image.fromarray(slice_) for slice_ in final_combined_stack]
    final_image[0].save(
        output_path, 
        save_all=True, 
        append_images=final_image[1:], 
        compression="tiff_deflate"
    )

    print(f"Combined stack saved to {output_path}")

Processing n=0
n=0, stack shape: (360, 200, 300, 4)
Processing n=1
n=1, stack shape: (360, 200, 300, 4)
Processing n=2
n=2, stack shape: (360, 200, 300, 4)
Processing n=3
n=3, stack shape: (360, 200, 300, 4)
Processing n=4
n=4, stack shape: (360, 200, 300, 4)
Processing n=5
Processing n=6
Processing n=7
Processing n=8
Processing n=9
Final combined stack shape: (360, 200, 1500, 4)
Combined stack saved to D:/Mari_Sixth_Dataset_Analysis/animation2/basal_combined_stack.tif
Processing n=0
n=0, stack shape: (360, 200, 300, 4)
Processing n=1
n=1, stack shape: (360, 200, 300, 4)
Processing n=2
n=2, stack shape: (360, 200, 300, 4)
Processing n=3
n=3, stack shape: (360, 200, 300, 4)
Processing n=4
n=4, stack shape: (360, 200, 300, 4)
Processing n=5
Processing n=6
Processing n=7
Processing n=8
Processing n=9
Final combined stack shape: (360, 200, 1500, 4)
Combined stack saved to D:/Mari_Sixth_Dataset_Analysis/animation2/goblet_combined_stack.tif
Processing n=0
n=0, stack shape: (360, 200, 300, 4)

In [None]:
img.shape

In [None]:
viewer.dims.current_step = (0, 6, 357, 1, 74, 74)
img = viewer.screenshot(size=(200,300),scale=1,canvas_only=True,flash=False)
plt.figure(figsize=(3,3),frameon=False)
plt.imshow(img)
plt.axis('off')
plt.show()

In [None]:
print(viewer.camera.angles)

In [None]:
print(viewer.dims.current_step)

In [26]:
# Scale boolean values to 0-255
croppings_scaled = (croppings_array.astype(np.uint8) * 255)

# Flatten C and N into one axis
croppings_flattened = croppings_scaled.reshape(-1, *croppings_array.shape[2:])  # Shape: (C * N, T, Z, Y, X)



In [None]:
len(croppings_scaled[croppings_scaled != 0])

In [None]:
# Transpose the array to ImageJ-compatible order (T, Z, C, Y, X)
croppings_imagej_ready = np.transpose(croppings_flattened, (1, 2, 0, 3, 4))  # Shape: (T, Z, C, Y, X)

# Save with ImageJ metadata
tiff.imwrite(
    'croppings_array.tif',
    croppings_imagej_ready,
    imagej=True,
    metadata={
        'axes': 'TZCYX',  # Specify axes order for ImageJ
    }
)

In [None]:
# Transpose the array to ImageJ-compatible order (T, Z, C, Y, X)
croppings_napari_ready = np.transpose(croppings_flattened, (0, 1, 2, 3, 4))  # Shape: (T, Z, C, Y, X)

# Save with ImageJ metadata
tiff.imwrite('croppings_array_napari.tif', croppings_napari_ready)

In [None]:
croppings_napari_ready.shape

In [None]:
croppings_napari_ready = tiff.imread('croppings_array_napari.tif')

In [3]:
croppings_array = croppings_napari_ready.reshape(5, 10, 360, 10, 150, 150)