# Welcome to the Growth Rate Analysis Notebook

In this notebook, we showcase the measurement of cell growth using three different quantities derived from single-cell segmentation. Everything is automated using the [acia](https://pypi.org/project/acia/) framework.

Therfore, we perform the following steps:

1. Install software dependencies
1. Download live-cell imaging data
1. Perform deep-learning based segmentation using [Omnipose](https://github.com/kevinjohncutler/omnipose)
2. Extracting individual cell features
3. Filtering cells based on their features to reduce the number of artifacts
4. Measure growth rates based on cell count (CC), total colony area (TCA), and total single-cell area (TSCA).
4. Visualize all extracted growth rates.

Enjoy the automated analysis and customize the notebook where you need it to introduce custom visualizations or computations. Have fun 🚀

In [None]:
%load_ext autoreload
%autoreload 2

# Install software dependencies

In [None]:
import os
if os.environ.get("JYPN_NO_DEP_INSTALL", None) is None:
    # install acia dependencies
    %pip uninstall acia -y
    %pip install acia==0.3.0
    
    # dependencies for Omnipose segmentation
    %pip install torch torchvision torchaudio # --index-url https://download.pytorch.org/whl/cpu
    %pip install omnipose==1.0.6
    %pip install natsort
    %pip install scipy==1.11.4
else:
    print("Running in scaling mode! Do not install requirements!")

In [None]:
import torch
import logging

try:
  import google.colab
  IN_COLAB = True
except:
  IN_COLAB = False

cuda = torch.cuda.is_available()

if not cuda:
  logging.warning("You are not using GPU computation. Thus the deep learning segmentation might take a while!")
  if IN_COLAB:
    logging.warning("Please go to 'Runtime > Change runtime type' in order to select a GPU based runtime in colab!")

In [None]:
import os
from pathlib import Path

# get the acia unit registry
from acia import ureg

# id of the image set (here it is a path to the folder)
image_id = "3803.tif" # change the id if you want to apply the analysis to different image data

# channel of the phase-contrast images (chanel ordering starting with 0...)
phase_contrast_channel = 0

# size of a single pixel in the image
pixel_size = 0.072 * ureg.micrometer

# higher values than 1 are used to reduce the computational burden (especially segmentation)
subsampling_factor = 10

if subsampling_factor > 1:
  logging.warning("You are using subsampling. This will accelerate the analysis but can lead to imprecise results")

# the imaging interval of the recorded time-lapse
imaging_interval = 1 * ureg.minute * subsampling_factor

# the number of images used for the analysis
num_images = 500

# use current working directory as default storage folder for outputs
storage_folder = os.getcwd()

In [None]:
from pathlib import Path

# create the output directory
output_path = Path(storage_folder) / "output/"
output_path.mkdir(parents=True, exist_ok=True)

# make path relative (advantage in video embedding)
output_path_rel = output_path.relative_to(Path(os.getcwd()))

image_id = Path(image_id)

# Download live-cell imaging data

In [None]:
from pathlib import Path
if not Path(image_id).exists():
  !wget -O 3803.tif https://fz-juelich.sciebo.de/s/Z0LtciSpZX35j9n/download

In [None]:
from pathlib import Path
import tifffile
from acia.segm.local import THWCSequenceSource
import numpy as np
from tqdm.auto import tqdm

image_stack = np.stack(tifffile.imread(image_id)[:num_images][::subsampling_factor])

# bring the image stack into TxHxWxC (time, height, width, channels) format
source = THWCSequenceSource(image_stack)

# Information about the image stack

In [None]:
import matplotlib.pyplot as plt

T = source.size_t
C = source.size_c

# display markdown
from IPython.display import Video, Markdown, display
display(Markdown("# Image information"))

table = f"""
| Value    | Content |
| --- | --- |
| Image Path | {image_id} |
| T Size | { T } |
| C Size | { C } |
| Channels | {','.join([f"{c}" for c in range(C)])} |
| Imaging Interval | {imaging_interval} |
| Pixel Size | {pixel_size} |
| Phase-Contrast Channel | {phase_contrast_channel} |
| Image dtype | {image_stack.dtype}
"""

display(Markdown(table))
display(Markdown(f"## Preview of channels"))

t = T // 2

image = source.get_frame(t).raw

fig, ax = plt.subplots(1, C, figsize=(15, 15))
for i, c in enumerate(range(0, C)):       # Channel index starts at 1

    if C > 1:
        loc_ax = ax[i]
    else:
        loc_ax = ax

    loc_ax.imshow(image[...,c], cmap="gray")
    loc_ax.set_title(f"Channel {i}, t: {t}")

plt.tight_layout()

# 1. Cell Segmentation

Now we specify the segmentation model: [Omnipose](https://doi.org/10.1101/2021.11.03.467199) and the use the phase_contrast channel to only perform segmentation based on the grayscale phase contrast images. With the model and image sequence we kick off the segmentation.

In [None]:
import torch
from acia.segm.processor.omnipose import OmniposeSegmenter

# connect to remote machine learning model
model = OmniposeSegmenter()

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

# perform overlay prediction
print("Perform Prediction...")
with torch.no_grad():
  result = model(source.to_channel(phase_contrast_channel), omnipose_parameters=dict(batch_size=30))

To validate the segmentation result, we create a short video:

In [None]:
import acia
from acia.segm.output import renderVideo
from acia.viz import render_segmentation, render_video, render_time, render_scalebar
import numpy as np
from acia import ureg

# video rendering configuration
video_config = dict(codec="vp9", ffmpeg_params = ["-crf", "30", "-b:v", "0", "-speed", "1"])

# scalebar placement
scalebar_config = dict(
    xy_position=(750, 1050),
    size_of_pixel = pixel_size,
    bar_width=10 * ureg.micrometer, # width of the scalebar
    bar_height="1 micrometer" # height of the scalebar
)

# timestamp placement
time_config = dict(
    xy_position=(800, 50),
    timepoints=np.array(range(num_images)) * imaging_interval, # timepoints of the individual frames (with correct unit)
    background_color = (0, 0, 0),
)

# framerate of the video
framerate=20

# Make a video with
video_file = str(output_path_rel / "segmented.mp4")

# do the different rendering steps sequentially
source_rend = render_time(source.to_rgb(), **time_config)
source_rend = render_scalebar(source_rend, **scalebar_config)
source_rend = render_segmentation(source_rend, result)
render_video(source_rend, filename=video_file, **video_config, framerate=framerate)

# Display the rendered segmentation
from IPython.display import Video, Markdown, display
display(Markdown("# Your segmentation"))

from moviepy.editor import *
myvideo =  VideoFileClip(video_file)
myvideo.ipython_display(maxduration=400)

# 2. Extracting individual cell properties

Now that we have the cell segmentation, we can move on and extract individual cell properties like Area, Time, Length, ....
and visualize with units and push them into a pands table:

In [None]:
from acia.analysis import ExtractorExecutor, AreaEx, IdEx, FrameEx, TimeEx, LengthEx, FluorescenceEx
import pint

ex = ExtractorExecutor()

df = ex.execute(result, source, [
    # define the cell properties that you want to extract here
    AreaEx(input_unit=pixel_size ** 2),  # pass the correct area of pixels
    LengthEx(input_unit=pixel_size),  # pass the correct size of pixels
    FrameEx(),
    TimeEx(input_unit=imaging_interval),  # one picture every minute
])

# show the dataframe
df

# 3. Filtering artifacts in segmentation

In the segmentation, we can often observe artifacts, that is objects that are mistakenly recoginzed as cells. To reduce the number of artifacts in our analysis we can utilize some simple filtering functionality for the area: We only keep all the objects that have an area between `min_area` and `max_area` as defined below in the code:

In [None]:
import matplotlib.pyplot as plt

min_area = 1  # the minimal area in micrometer ** 2. All smaller objects are dropped
max_area = 10 # the maximal area in micrometer ** 2. All larger objects are dropped

fig, ax = plt.subplots(2, 1, facecolor='white', figsize=(15,10))

area_unit = ex.units['area']

# plot the area distribution before filtering
ax[0].hist(df['area'], bins=100)
ax[0].set_title('Area distribution before filtering')
ax[0].set_ylabel('Frequency')
ax[0].set_xlabel(f'Cell area [${area_unit:~L}$]')

# filter by area
filtered_df = df[(min_area < df['area']) & (df['area'] < max_area)]

# plot the area distribution after filtering
ax[1].hist(filtered_df['area'], bins=100)
ax[1].set_title('Area distribution after filtering')
ax[1].set_ylabel('Frequency')
ax[1].set_xlabel(f'Cell area [${area_unit:~L}$]')

plt.tight_layout()

# export with german decimal: ,
filtered_df.to_csv(str(output_path / 'allcells.csv'), decimal='.', sep=';')

print("Done")

And now let's look at the new video with filtered content

In [None]:
from acia.base import Overlay
from acia.segm.formats import gen_simple_segmentation
import gzip
import numpy as np

# ids in the filtered dataframe
id_set = set(filtered_df.index)

# store segmentation
filtered_overlay = Overlay([c for c in result if c.id in id_set])

# Make a video with
video_file = str(output_path_rel / "filter_segmented.mp4")

source_time = render_time(source.to_rgb(), (800, 50), timepoints=np.array(range(num_images)) * imaging_interval)
source_scalebar = render_scalebar(source_time, xy_position=(750, 1050), size_of_pixel = pixel_size, bar_width="10 micrometer", bar_height="1 micrometer")
source_segm = render_segmentation(source_scalebar, filtered_overlay)
render_video(source_segm, filename=video_file, **video_config, framerate=framerate)

# display in markdown
display(Markdown("# Your filtered segmentation"))
myvideo =  VideoFileClip(video_file)
myvideo.ipython_display()

# 4. Generating insights into the colony growth

We determine the exponential growth rates of the cell populations based on three quantities:
- cell count (CC): just counting the cells
- total colony area (tca): the total size of the cell population blob.
- total single-cell area (tsca): the total amount of detected single-cell area

We fit an exponential model:

$$ Q(t) = Q_0 \cdot exp(\mu_Q \cdot t),$$

where $Q$ denotes the measured quantity (e.g. CC, TCA, or TSCA), $Q_0$ denotes the initial value of the quantity and $\mu_Q$ denotes the exponential growth rate of the measured quantity. During a linear fit in the log-space of the measured quantities, we determine $Q_0, \mu_Q$ for CC, TCA, and TSCA.

In [None]:
import numpy as np
from sklearn.metrics import r2_score

def linear_regression(x,y):
    # linear regression to the log of data (as described above)
    params, _, _, _, _ = np.polyfit(x, np.log(y), 1, full=True)

    # following exponential model: N(t) = N_0 * np.exp(mu * t)
    Q_0 = np.exp(params[1])
    mu = params[0]

    y_pred = Q_0 * np.exp(x * mu)

    R2 = r2_score(y, y_pred)

    return Q_0, mu, R2

In [None]:
collected_results = {}

## (A) Use Cell count as basis for the growth rate

We use the CC quantity measurements, perform a linear regression to the `log` cell count data , and derive the exponential growth rate ($\mu_{CC}$) and measure the `R2` quality of fit.

In [None]:
colony = filtered_df

counts = filtered_df.groupby("time").size().reset_index(name="count")

x = counts["time"] # time in minutes
# y is the counts at a certain time
y = counts["count"]

# perform linear regression
N_0, mu, R2 = linear_regression(x, y)

# perform prediction
y_pred = N_0 * np.exp(x * mu)

# plot both
plt.plot(x, y, label="measured CC")
plt.plot(x, y_pred, label=f"fitted CC ($\mu_{{CC}}={mu:.2f}~1/h$; R^2{R2:.4f})")

plt.xlabel("Time [hour]")
plt.ylabel("Cell Count")
plt.grid(True)
plt.yscale("log")

plt.legend()

collected_results["A"] = dict(
    x=x,
    y=y,
    y_pred=y_pred,
    N_0=N_0,
    mu=mu,
    R2=R2,
    Q_0=N_0
)


# display markdown
from IPython.display import Video, Markdown, display
display(Markdown(fr"""
# Linear regression result for cell count (CC)
For cell count growth according to: $N(t) = N_0 \cdot exp(t \cdot \mu_{{CC}})$, we determined\
$\quad N_0={N_0:.3f}$\
$\quad \mu_{{CC}}={mu:.5f} \cdot \text{{h}}^{{-1}}$\
$\quad R^2={R2:.5f}$"""))

## (B) Use the total colony outline area (TCOA) as the base for the growth estimation

First, we compute the colony outline by merging all cell instances together and creating a new overlay.

Then we perform a linear regression to the `log` of the total colony outline area data in order to derive an exponential growth rate ($\mu_{TCA}$) and measure the `R2` quality of fit.

In [None]:
from acia.segm.utils import merge_cells_to_colonies

colony_overlay = merge_cells_to_colonies(filtered_overlay)

In [None]:
len(colony_overlay)

In [None]:
# Analyze the colony overlay
colony_df = ex.execute(colony_overlay, source, [
    # define the cell properties that you want to extract here
    AreaEx(input_unit=pixel_size ** 2),  # pass the correct area of pixels
    LengthEx(input_unit=pixel_size),  # pass the correct size of pixels
    FrameEx(),
    TimeEx(input_unit=imaging_interval),  # one picture every minute
])

colony_df.to_csv(output_path / "colony.csv")

In [None]:
# sum the area of all individual cells at a certain frame
areas = colony_df.groupby("time")["area"].agg("sum").reset_index(name="area")

x = areas["time"] # time in minutes
y = areas["area"]

A_0, mu, R2 = linear_regression(x, y)

y_pred = A_0 * np.exp(x * mu)


plt.plot(x, y, label="measured TCA")
plt.plot(x, y_pred, label=f"fitted TCA ($\mu_{{TCA}}={mu:.2f}~1/h$; R^2={R2:.4f})")

plt.xlabel("Time [minute]")
plt.ylabel(r"TCA [$\mu m^2$]")

plt.grid(True)
plt.yscale("log")

plt.legend()

collected_results["B"] = dict(
    x=x,
    y=y,
    y_pred=y_pred,
    A_0=A_0,
    mu=mu,
    R2=R2,
    Q_0=A_0
)

# display markdown
from IPython.display import Video, Markdown, display
display(Markdown(fr"""
# Linear regression result for total colony area (TCA)
For cell area growth according to: $A(t) = A_0 \cdot exp(t \cdot \mu_{{TCA}})$, we have\
$\quad A_0={A_0:.3f}$\
$\quad \mu_{{TCA}}={mu:.5f} \cdot \text{{h}}^{{-1}}$\
$\quad R^2={R2:.5f}$"""))

## (C) Use total single-cell area (TSCA) as basis for the growth estimation

In [None]:
# sum the area of all individual cells at a certain frame
areas = filtered_df.groupby("time")["area"].agg("sum").reset_index(name="area")


#x = np.array(list(range(last_frame+1)))
x = areas["time"] # time in minutes
y = areas["area"]

#results["summed_cell_area"] = [x, y]

A_0, mu, R2 = linear_regression(x, y)

y_pred = A_0 * np.exp(x * mu)


plt.plot(x, y, label="measured")
plt.plot(x, y_pred, label=f"fitted ($\mu_{{TSCA}}={mu:.2f}~1/h$; R^2={R2:.4f})")

plt.xlabel("Time [minute]")
plt.ylabel(r"TSCA [$\mu^2$]")

plt.grid(True)
plt.yscale("log")

plt.legend()

collected_results["C"] = dict(
    x=x,
    y=y,
    y_pred=y_pred,
    A_0=A_0,
    mu=mu,
    R2=R2,
    Q_0=A_0
)

# display markdown
from IPython.display import Video, Markdown, display
display(Markdown(fr"""
# Linear regression result for total single-cell area (TSCA)
For cell area growth according to: $A(t) = A_0 \cdot exp(t \cdot \mu)$, we have\
$\quad A_0={A_0:.3f}$\
$\quad \mu_{{TSCA}}={mu:.5f} \cdot \text{{h}}^{{-1}}$\
$\quad R^2={R2:.5f}$"""))

# Combined view

Visualize all three results side-by-side.

In [None]:
import pandas as pd

fig, axes = plt.subplots(1, 3, figsize=(15, 5), sharex=True)

cr = collected_results

title_font_size = 18

method_names = ["CC", "CA", "TSCA"]

growth_estimates = []

for row, method in zip([0,1,2],"ABC"):

    axes[row].plot(cr[method]["x"], cr[method]["y"], label=f"measured {method_names[row]}")
    axes[row].plot(cr[method]["x"], cr[method]["y_pred"], label=fr"fitted {method_names[row]} ($\mu_{{{method_names[row]}}}={cr[method]['mu']:.2f}; R^2$={cr[method]['R2']:.4f})")

    axes[row].set_yscale("log")

    growth_estimates.append(dict(
        method = method_names[row],
        Q_0 = cr[method]["Q_0"],
        mu = cr[method]["mu"],
        R2 = cr[method]["R2"]
    ))

    axes[row].legend()
    axes[row].grid(True)

pd.DataFrame(growth_estimates).to_csv(output_path / "growth_estimates.csv")

axes[0].set_ylabel("Cell count (CC)")
axes[1].set_ylabel("Total colony area (TCA) [$\mu m^2$]")
axes[2].set_ylabel("Total singel-cell area (TSCA) [$\mu m^2$]")

axes[0].set_title("Cell count (CC)", fontsize=title_font_size)
axes[1].set_title("Total Colony area (TCA)", fontsize=title_font_size)
axes[2].set_title("Total single-cell area (TSCA)", fontsize=title_font_size)

for ax in axes:
    ax.set_xlabel("Time [h]")

#plt.legend()
plt.tight_layout()

plt.savefig(output_path / "summary.svg")

# Complete figure

In [None]:
from acia.viz import render_cell_centers

fig, axes = plt.subplots(3, 4, figsize=(15, 12))

frames = [120//subsampling_factor, 240//subsampling_factor, 480//subsampling_factor]

rgb_source = source.to_rgb()

for j, f in enumerate(frames):
    im = rgb_source.get_frame(f).raw

    frame_overlay = Overlay([c for c in filtered_overlay if c.frame == f])
    frame_colony_overlay = Overlay([c for c in colony_overlay if c.frame == f])

    for i in range(3):

        im2 = im.copy()
        if i == 0:
            im2 = render_cell_centers([im2], frame_overlay).get_frame(0).raw
        if i == 1:
            im2 = render_segmentation([im2], frame_colony_overlay).get_frame(0).raw
        if i == 2:
            im2 = render_segmentation([im2], frame_overlay).get_frame(0).raw

        im2 = render_scalebar(THWCSequenceSource(im2[None,...]), xy_position=(750, 1050), size_of_pixel = pixel_size, bar_width="10 micrometer", bar_height="1 micrometer")
        im2 = render_time(im2, (800, 50), timepoints=[f * imaging_interval])
        axes[i, j].imshow(im2.get_frame(0).raw)
        #axes[i, j].axis('off')
        axes[i, j].set_xticks([])
        axes[i, j].set_yticks([])
        #axes[0, j].set_xlabel(f"Time {f} [min]")

for row, method in zip([0,1,2],"ABC"):

    axes[row, 3].scatter(cr[method]["x"], cr[method]["y"], label=f"measured {method_names[row]}", marker="+", color="gray")
    axes[row, 3].plot(cr[method]["x"], cr[method]["y_pred"], label=f"fitted ($\mu_{{{method_names[row]}}}$={cr[method]['mu']:.2f}|$R^2$={cr[method]['R2']:.4f})", ls="--")
    axes[row, 3].legend()
    axes[row, 3].set_yscale("log")


axes[0, 3].set_ylabel("CC")
axes[1, 3].set_ylabel("TCA [$\mu m^2$]")
axes[2, 3].set_ylabel("TSCA [$\mu m^2$]")

axes[2, 3].set_xlabel("Time [h]")

#axes[0, 3].set_title("Cell count (CC) measure", fontsize=title_font_size)
#axes[1, 3].set_title("Colony area (CA) measure", fontsize=title_font_size)
#axes[2, 3].set_title("Total single-cell area (TSCA) measure", fontsize=title_font_size)


axes[0, 0].set_ylabel("Cell count (CC)", fontsize=title_font_size)
axes[1, 0].set_ylabel("Total Colony Area (TCA)", fontsize=title_font_size)
axes[2, 0].set_ylabel("Total Single-Cell Area (TSCA)", fontsize=title_font_size)

#for ax in axes[:, 3]:
#    ax.set_xlabel("Time [h]")
axes[2,0].set_xlabel(f"Time={frames[0] * imaging_interval.to('h'):~P}", fontsize=title_font_size)
axes[2,1].set_xlabel(f"Time={frames[1] * imaging_interval.to('h'):~P}", fontsize=title_font_size)
axes[2,2].set_xlabel(f"Time={frames[2] * imaging_interval.to('h'):~P}", fontsize=title_font_size)

axes[0,3].grid(True)
axes[1,3].grid(True)
axes[2,3].grid(True)

plt.tight_layout()

plt.savefig(output_path / "gr_comparison.png", dpi=300)


# Complete overview (including single-cell size distribution) stacked vertically with a shared x-axis

In [None]:
import matplotlib.gridspec as gridspec
import seaborn as sns

gs = gridspec.GridSpec(4,1, hspace=0)
fig = plt.figure(figsize=(9, 14))

#first plot
ax = fig.add_subplot(gs[0])
ax.plot(cr["A"]["x"], cr["A"]["y"], label="measured")
ax.plot(cr["A"]["x"], cr["A"]["y_pred"], label=f"fitted ($\mu_{{CC}} = {cr['A']['mu']:.2f}/h$, $R^2 = {cr['A']['R2']:.4f}$")
ax.set_ylabel('Cell count\n(CC)', size =16)
ax.tick_params(
    axis='x',          # changes apply to the x-axis
    labelbottom='off') # labels along the bottom edge are off
ax.set_yscale("log")
ax.legend()
ax.grid(True)
ax.axes.xaxis.set_ticklabels([])

#second plot
ax1 = fig.add_subplot(gs[1])
ax1.plot(cr["B"]["x"], cr["B"]["y"], label="measured")
ax1.plot(cr["B"]["x"], cr["B"]["y_pred"], label=f"fitted ($\mu{{TCA}} = {cr['B']['mu']:.2f}/h$, $R^2 = {cr['B']['R2']:.4f}$")
ax1.set_ylabel('Colony area\n(CA) [$\mu m^2$]', size =16)
ax1.set_xlabel("Time [h]")
ax1.set_yscale("log")
ax1.grid(True)
ax1.legend()

#third plot
ax = fig.add_subplot(gs[2], sharey=ax1)
ax.plot(cr["C"]["x"], cr["B"]["y"], label="measured")
ax.plot(cr["C"]["x"], cr["B"]["y_pred"], label=f"fitted ($\mu_{{TSCA}} = {cr['C']['mu']:.2f}/h$, $R^2 = {cr['C']['R2']:.4f}$")
ax.set_ylabel('Total single-cell area\n(TSCA) [$\mu m^2$]', size =16)
ax.set_xlabel("Time [h]")
ax.set_yscale("log")
ax.legend()
ax.grid(True)

ax = fig.add_subplot(gs[3])
ax.set_ylabel("Average cell size\n[$\mu m^2$]", size=16)
ax.grid(True)


sns.lineplot(data=filtered_df, x="time", y="area")

plt.savefig(output_path / "summary_vertical.svg")
plt.savefig(output_path / "summary_vertical.png", dpi=300)


## Reproducibility Information

pip and conda environment details

In [None]:
%pip freeze

In [None]:
%micromamba env export