# Scaling analysis for analyzing alternating oxygen impact on singl-cell development

We have developed the notebook to evaulate a single time-lapse sequences and gain insight into the impact of alternating oxygen impact on the single-cell level. Now we are going to scale this analysis across multiple time-lapse sequences and extract quantitative insights across multiple cell populations.

## 1. Setup

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
# Install dependencies

%pip uninstall acia -y
%pip install acia==0.3.0

# dependencies for Omnipose segmentation
%pip install torch torchvision torchaudio # --index-url https://download.pytorch.org/whl/cpu
%pip install omnipose==1.0.6
%pip install natsort
%pip install scipy==1.11.4

## Parameters

In [None]:
from pathlib import Path

analysis_script = str(Path("../../case_studies/03_SingleCell_Alternating_Oxygen/Single_cell_alternating_oxygen.ipynb").absolute().resolve())

In [None]:
import os
print(os.getcwd())

In [None]:
import os

# place to store the data
dataset_folder = Path("03_OxygenAlternation")

# make sure the data exists (otherwise download)
if not dataset_folder.is_dir():
    !wget -O 03_oxygen_alternation.zip https://fz-juelich.sciebo.de/s/f1RCpz0h9gwI3IS/download
    !unzip 03_oxygen_alternation.zip

## 1.2 Specify the analysis script

Now you have to specify the name of the analysis script you want to apply to the image data.

**Note:** If the analysis script is not located in the same folder you need to specify the path to it.

In [None]:
print(Path(analysis_script).resolve().absolute(), Path(analysis_script).exists())
assert Path(analysis_script).exists(), f"The notebook '{analysis_script}' does not exist!"

# 2. Information about the underlying data

We summarize the amount of underlying data

In [None]:
image_ids = [str(p.absolute()) for p in dataset_folder.glob("*.tiff")]

## TODO: give an overview about the data
print(image_ids)

In [None]:
#!rm -r automated_executions

# 3. Scale the analysis script to all image sequences

Now we apply the analysis script to every image sequence individually 🚀! You can lean back and enjoy the working computer 😎 🥂

**Note:** For heavy analysis scripts or for larget `datasets` this process may take a while (from minutes to hours or days). The top-level progress bar will indicate the total progress and give you an indication how long this will take. For large image data volumes we can recommend execution over night 🌔!

In [None]:
os.environ["JYPN_NO_DEP_INSTALL"] = "True"

In [None]:
from datetime import datetime
from pathlib import Path
from acia.analysis import scale

# set the base path for all results
stem = Path(analysis_script).stem
output_path = Path("./automated_executions")

print(f"Results are stored in: {output_path.absolute()}")

In [None]:
# scale your analysis script to many images
result = scale(
    output_path,
    analysis_script=analysis_script,
    image_ids=image_ids,
    exist_ok=True,
    execution_naming=lambda iid: f"execution_{Path(iid).stem}",
    kernel_name="python3")

# 4. Inspect your analysis results


In [None]:
import pandas as pd
from pathlib import Path

df_paths = Path("automated_executions").glob("**/interesting_single_cell.csv")

df = pd.concat([pd.read_csv(path) for path in df_paths])
df_image_groups = df.groupby(["image_id"])

num_images = len(df_image_groups)

for image_id, df_image in df_image_groups:
    df_image.groupby("label")
    
print(len(df.groupby(["image_id", "label"])))

In [None]:
valid_labels = df.groupby(["image_id", "label"])["area"].agg(["min", "max"])["max"] > 2.5

image_label_set = set(valid_labels[valid_labels].index)

In [None]:
sel = df.apply(lambda r: (r["image_id"], r["label"]) in image_label_set, axis=1)
new_df = df[sel]
new_df

In [None]:
df_image_groups = new_df.groupby(["image_id"])

In [None]:
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import numpy as np
import seaborn as sns

gs = gridspec.GridSpec(2*num_images,2, hspace=0)
fig = plt.figure(figsize=(4, 12))

axes = np.zeros((num_images, 2), dtype=object)

for i, (image_id, df_image) in enumerate(df_image_groups):
    axes[i,0] = fig.add_subplot(gs[i, 0], sharex=axes[0,0] if i > 0 else None, sharey=axes[0,0] if i > 0 else None)
    axes[i,1] = fig.add_subplot(gs[i, 1], sharex=axes[0,1] if i > 0 else None, sharey=axes[0,1] if i > 0 else None)
    
    axes[i,0].set_xlim((1.3, 1.7))
    axes[i,1].set_xlim((1.3, 1.7))
    
    axes[i,0].set_ylim((3, 7.5))
    axes[i,1].set_ylim((0, 14))

    axes[i,0].grid(True)
    axes[i,1].grid(True)
    
    axes[i,0].axvline(1.5, color="gray", ls="--")
    axes[i,1].axvline(1.5, color="gray", ls="--")
    
    axes[i,0].set_ylabel(f"Cell area\n[$\mu m^2$]")
    axes[i,1].set_ylabel(r"[$\frac{\mu m^2}{h}$]")
    
    axes[i,0].set_xlabel(f"Time [h]")
    axes[i,1].set_xlabel(r"Time [h]")
    
    
    if i == 0:
        axes[i, 0].set_title("Single-Cell\nArea")
        axes[i, 1].set_title("Single-Cell\nInstant growth rate")
    
    for l, df_label in df_image.groupby("label"):
    
        sns.lineplot(df_label, x="time", y="area_filtered", ax=axes[i,0])
        sns.lineplot(df_label, x="time", y="deriv_filtered", ax=axes[i, 1])
        
plt.tight_layout()

plt.savefig("single_cell_ensemble.png", dpi=300)
plt.savefig("single_cell_ensemble.svg", dpi=300)

In [None]:
df_image.groupby("label").agg(["min", "max"])

In [None]:
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import numpy as np
import seaborn as sns

fig, axes = plt.subplots(2, 1, figsize=(3, 7), sharex=True)

cmap = plt.get_cmap('Set1')

image_ids = np.unique(df["image_id"])

colors = cmap(np.linspace(0, 1, len(image_ids)))

color_map = {image_id: color for image_id, color in zip(image_ids, colors)}
print(color_map)

for i, (image_id, df_image) in enumerate(df_image_groups):
    for l, df_label in df_image.groupby("label"):
        axes[0].plot(df_label["time"], df_label["area_filtered"], label=l, color="black", linewidth=0.3) #color_map[image_id[0]])
        axes[1].plot(df_label["time"], df_label["deriv_filtered"], label=l, color="black", linewidth=0.3) #color_map[image_id[0]])

#sns.lineplot(df, x="time", y="area_filtered", hue="image_id", ax=axes[0])

axes[0].set_xlim((1.3, 1.7))
axes[1].set_xlim((1.3, 1.7))

axes[0].set_ylim((3, 7.5))
axes[1].set_ylim((0, 16))

#axes[0].axvline(1.5, color="gray", ls="--")
#axes[1].axvline(1.5, color="gray", ls="--")

axes[0].grid(True)
axes[1].grid(True)

axes[0].set_ylabel(f"Single-Cell Area [$\mu m^2$]")
axes[1].set_ylabel(r"Instantaneous Growth Rate [$\mu m^2 \cdot h^{-1}$]")

axes[1].set_xlabel(r"Time [$h$]")

plt.tight_layout()

colors = ["green", "red"]
switch_interval = 1/6*3 # every 30 minutes
total_time = 3
for i, ax in enumerate(axes):
    state = 0
    x_start = 0
    for _ in range(int(np.ceil(total_time / switch_interval))):
        ax.axvspan(xmin=x_start, xmax=x_start+switch_interval, color=colors[state], alpha=0.2)

        x_start += switch_interval
        state = (state + 1) % 2

#plt.legend()

plt.savefig("single_cell_ensemble.png", dpi=300)
plt.savefig("single_cell_ensemble.svg", dpi=300)

## 🔁 Reproducibility Information

pip and conda environment details

In [None]:
%pip freeze

In [None]:
%mamba env export