In [None]:
import os
import sys
import numpy as np
import pandas as pd
from scipy.spatial import distance_matrix
pd.set_option("display.max_columns", 200)
pd.set_option("display.max_rows", 50)
pd.set_option('display.max_colwidth', 150)
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_theme()
try:
    from cellacdc import cca_functions
    from cellacdc import myutils
except FileNotFoundError:
    # Check if user has developer version --> add the Cell_ACDC/cellacdc
    # folder to path and import from there
    sys.path.insert(0, '../cellacdc/')
    from cellacdc import cca_functions
    from cellacdc import myutils

In [None]:
%load_ext autoreload
%autoreload 2

# configurations
- follow the file selection dialog:
    - select microscopy folder in first step
    - select positions of the selected folder in second step
- repeat to add more positions to the analysis
- positions selected within one iteration of the dialog will be pooled together in the following analyses

In [None]:
data_dirs, positions, app = cca_functions.configuration_dialog()
file_names = [os.path.split(path)[-1] for path in data_dirs]
image_folders = [[os.path.join(data_dir, pos_str, 'Images') for pos_str in pos_list] for pos_list, data_dir in zip(positions, data_dirs)]
# determine available channels based on first(!) position.
# Warn user if one or more of the channels are not available for some positions
first_pos_dirs = [os.path.join(data_dir, positions[0][0], 'Images') for data_dir in data_dirs]
first_pos_files = [myutils.listdir(first_pos_dir) for first_pos_dir in first_pos_dirs]
channels = [cca_functions.find_available_channels(fpf, fpd)[0] for fpf, fpd in zip(first_pos_files, first_pos_dirs)]
basenames = [cca_functions.find_available_channels(fpf, fpd)[1] for fpf, fpd in zip(first_pos_files, first_pos_dirs)]
segm_endnames = [cca_functions.get_segm_endname(fpd, bn) for fpd, bn in zip(first_pos_dirs, basenames)]


# load data and perform all needed calculations on image data

In [None]:
data_dirs, positions, file_names, image_folders, segm_endnames, channels

In [None]:
overall_df = cca_functions.load_acdc_output_only(
    file_names,
    image_folders,
    positions,
    segm_endnames
)
is_timelapse_data = True # Maybe not needed

In [None]:
print(overall_df.shape)
overall_df

In [None]:
# if cell cycle annotations were performed in ACDC, extend the dataframe by a join on each cells relative cell
if 'cell_cycle_stage' in overall_df.columns:
    overall_df_with_rel = cca_functions.calculate_relatives_data(overall_df, channels)
    # If working with timelapse data build dataframe grouped by phases
    group_cols = [
        'Cell_ID', 'generation_num', 'cell_cycle_stage', 'relationship', 'position', 'file', 
        'max_frame_pos', 'selection_subset', 'max_t'
    ]
    # calculate data grouped by phase only in the case, that timelapse data is available
    if is_timelapse_data and 'max_t' in overall_df_with_rel.columns:
        phase_grouped = cca_functions.calculate_per_phase_quantities(overall_df_with_rel, group_cols, channels)
        # append phase-grouped data to overall_df_with_rel
        overall_df_with_rel = overall_df_with_rel.merge(
            phase_grouped,
            how='left',
            on=group_cols
        )
        overall_df_with_rel['time_in_phase'] = overall_df_with_rel['frame_i'] - overall_df_with_rel['phase_begin'] + 1
        overall_df_with_rel['time_in_cell_cycle'] = overall_df_with_rel.groupby(['Cell_ID', 'generation_num', 'position', 'file'])['frame_i'].transform('cumcount') + 1

# Example plots as starting point

## Selection subset example
(Try if more than one selection subset was selected)

In [None]:
overall_df.head()

In [None]:
fig, axs = plt.subplots(1, 3, figsize=(15, 5))
sns.lineplot(data=overall_df, x='frame_i', y='cell_area_um2', hue='selection_subset', ci='sd', ax=axs[0])
sns.lineplot(
    data=overall_df.groupby(['frame_i', 'selection_subset']).size().reset_index(drop=False), 
    x='frame_i', 
    y=0, 
    hue='selection_subset', 
    ci='sd', 
    ax=axs[1]
    )
track_lengths = overall_df.groupby(
    ['selection_subset', 'Cell_ID']
    )['frame_i'].apply(lambda x: x.max() - x.min()).reset_index(drop=False)
sns.histplot(data=track_lengths, x='frame_i', kde=True, ax=axs[2], hue='selection_subset', multiple='dodge')
axs[0].set_title('Mean cell area over time')
axs[1].set_title('Number of cells over time')
axs[2].set_title('Track length distribution')

## For rest of the figures, use only one selection subset

In [None]:
plot_data = overall_df.loc[overall_df['selection_subset'] == 0]

## Cell counts over time

In [None]:
plt.figure(figsize=(18, 6))
# First Panel: Number of Cells per Frame
plt.subplot(1, 3, 1)
plot_data.groupby('frame_i').size().plot(kind='line')
plt.xlabel('Frame')
plt.ylabel('Number of Cells')
plt.title('Number of Cells per Frame')

# Second Panel: Mean Cell Volume over Time
plt.subplot(1, 3, 2)
sns.lineplot(data=plot_data, x='frame_i', y='cell_area_um2', ci='sd')
plt.xlabel('Frame')
plt.ylabel('Mean Cell area (µm²)')
plt.title('Mean Cell area over Time')

# Third Panel: Total Area of All Cells over Time
plt.subplot(1, 3, 3)
plot_data.groupby('frame_i')['cell_area_um2'].sum().plot(kind='line')
plt.xlabel('Frame')
plt.ylabel('Total Cell area (µm²)')
plt.title('Total Cell area over Time')

plt.tight_layout()
plt.show()


## Volume distribution on first and last frames

In [None]:
# Filter the DataFrame for the first frame
first_frame_df = plot_data[plot_data['frame_i'] == 0]

# Filter the DataFrame for the last frame
last_frame_df = plot_data[plot_data['frame_i'] == plot_data['frame_i'].max()]
# Calculate the total number of cells in each frame
first_frame_total_cells = len(first_frame_df)
last_frame_total_cells = len(last_frame_df)

# Plot the volume distributions
plt.figure(figsize=(10, 6))
sns.histplot(data=first_frame_df, x='cell_area_um2', kde=True, label='First Frame', stat='density')
sns.histplot(data=last_frame_df, x='cell_area_um2', kde=True, label='Last Frame', stat='density')
plt.xlabel('Cell Area (µm²)')
plt.ylabel('Density')
plt.title('Relative Volume Distribution of Cells')
plt.legend()

# Add text annotations for the relative counts
print(f'Cell count first frame: {first_frame_total_cells}')
print(f'Cell count last frame: {last_frame_total_cells}')

plt.show()


## Track lengths before correction

In [None]:
# Calculate track lengths
track_lengths = plot_data.groupby('Cell_ID')['frame_i'].apply(lambda x: x.max() - x.min())

# Plot track length distribution
plt.figure(figsize=(10, 6))
sns.histplot(data=track_lengths, kde=True)
plt.xlabel('Track Length')
plt.ylabel('Count')
plt.title('Distribution of Track Lengths')
plt.show()


## Visualize paths of cells that have been tracked for more than 20 frames

In [None]:
filtered_df = plot_data[plot_data['Cell_ID'].map(track_lengths) > 20]
plt.figure(figsize=(21, 7))
# First Panel: Volume over time lineplot
plt.subplot(1, 2, 1)
for cell_id, cell_data in filtered_df.groupby('Cell_ID'):
    plt.plot(cell_data['frame_i'], cell_data['cell_area_um2'], label=f'Cell {cell_id}')
plt.xlabel('Frame')
plt.ylabel('Cell Area (µm²)')
plt.title('Volume over Time')
plt.legend().set_visible(False)  # Hide the legend

# Second Panel: Traces of all cells
plt.subplot(1, 2, 2)
for cell_id, cell_data in filtered_df.groupby('Cell_ID'):
    plt.plot(cell_data['centroid-1'], cell_data['centroid-0'], label=f'Cell {cell_id}')
plt.xlabel('X-coordinate')
plt.title('Traces of Cells')
plt.legend().set_visible(False)  # Hide the legend
maxCentroidAll = filtered_df[['centroid-0', 'centroid-1']].max().max()
plt.xlim(0, maxCentroidAll+50)
plt.ylim(0, maxCentroidAll+50)

plt.tight_layout()
plt.show()



## Traveled distance vs. cell volume and (per frame) movement vs. growth

In [None]:
def frame_by_frame_dist(centroid_series):
    """
    Calculate the frame-by-frame distance of a centroid series
    """
    xSeries = centroid_series['centroid-1']
    ySeries = centroid_series['centroid-0']
    # Calculate the distance between each frame
    dists = np.sqrt((xSeries.diff() ** 2) + (ySeries.diff() ** 2))
    return dists

In [None]:

# Left panel: Total traveled distance vs. mean volume
plt.figure(figsize=(14, 7))
plt.subplot(1, 2, 1)
for cell_id, cell_data in filtered_df.groupby('Cell_ID'):
    plt.scatter(cell_data['cell_area_um2'].mean(), np.max(distance_matrix(cell_data[['centroid-0', 'centroid-1']], cell_data[['centroid-0', 'centroid-1']])))

plt.xlabel('Mean Area [µm²]')
plt.ylabel('Total Traveled Distance')
plt.title('Total Traveled Distance vs. Mean Area')

# Right panel: Frame-by-frame distance vs. frame-by-frame growth
plt.subplot(1, 2, 2)
for cell_id, cell_data in filtered_df.groupby('Cell_ID'):
    plt.scatter(frame_by_frame_dist(cell_data[['centroid-0', 'centroid-1']])[1:], np.diff(cell_data['cell_area_um2']), alpha=0.4)

plt.xlabel('Frame-by-Frame Distance')
plt.ylabel('Frame-by-Frame Growth [Area in µm²]')
plt.title('Frame-by-Frame Distance vs. Frame-by-Frame Growth')

plt.tight_layout()
plt.show()
