# Timeseries statistics
This notebook shows how you can use the PadAnalysis package to extract colony and cell statistics from time-lapse microscopy data. The colony data-frame contains growth rate data and single cell statistics. 

In [1]:
# Install dependencies with:
# pip install -r requirements.txt

# set up notebook
%load_ext autoreload
%autoreload 2

import os
import natsort
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import cv2 as cv
from PadAnalyser.MicrocolonySegmenter import ZStack, MKSegmentUtils, DInfo, ColonySegment, CellSegment, Segment
from PadAnalyser import FrameSet
    
from PadAnalyser import MicrocolonySegmenter
from PadAnalyser.OutputConfig import OutputConfig

IMAGE_FOLDER = 'sample_images'
OUTPUT_FOLDER = 'output'
TIMESERIES_FOLDER = 'ecoli_timeseries'

In [2]:
# set up folder paths
input_folder = os.path.join(IMAGE_FOLDER, TIMESERIES_FOLDER)
work_folder = os.path.join(OUTPUT_FOLDER, 'work')
results_folder = os.path.join(OUTPUT_FOLDER, 'results')

# create folders and clear work folder
if not os.path.exists(results_folder): os.makedirs(results_folder)
if not os.path.exists(work_folder): os.makedirs(work_folder)
else: # clear it
    for f in os.listdir(work_folder):
        os.remove(os.path.join(work_folder, f))

def get_all_frames_in_folder(path: str) -> list[tuple[str, str]]:
    filenames = [n for n in natsort.natsorted(os.listdir(path)) if n.endswith('.png')]
    return [(n, os.path.join(path, n)) for n in filenames]

# Perform segmentation

output_config = OutputConfig(
    output_dir = results_folder,
    work_dir = 'tmp/work',
    debug_dir = 'tmp/debug',
    mask_dir = 'tmp/masks',
    
    logging_file = 'tmp/runlog.log',

    cache_segmentation = False,
    cache_dataframe = False,

    clear_dirs = True,
    process_count = 1,
)

# define a frame set from a set of tiff images
image_filenames, image_filepaths = zip(*get_all_frames_in_folder(input_folder))
times_in_seconds = [int(f.split('_')[-2][1:]) for f in image_filepaths] # extract time from filename

frame_set = FrameSet.PngFrameSet(
    label='ecoli',
    metadata={'experiment': 'BE142', 'pad_name': 'A12', 'row': 'A', 'col': 12}, # information that is not used in analysis, put placed as columns into final dataframe
    file_paths=image_filepaths, 
    times_in_seconds=times_in_seconds, 
    frame_labels=None,
)

print(frame_set)

PngFrameSet with 15 frames


In [3]:
df_colony, df_cell = MicrocolonySegmenter.segment_frame_set(frame_set=frame_set, output_config=output_config)

Segmenting frame 0 as species None with params {'label': 'ecoli', 'sigma': 1.5, 'threshold': -1000, 'split_factor': 0.3, 'min_mask_size_filter': 60}
Segmenting frame 1 as species None with params {'label': 'ecoli', 'sigma': 1.5, 'threshold': -1000, 'split_factor': 0.3, 'min_mask_size_filter': 60}
Segmenting frame 2 as species None with params {'label': 'ecoli', 'sigma': 1.5, 'threshold': -1000, 'split_factor': 0.3, 'min_mask_size_filter': 60}
Segmenting frame 3 as species None with params {'label': 'ecoli', 'sigma': 1.5, 'threshold': -1000, 'split_factor': 0.3, 'min_mask_size_filter': 60}
Segmenting frame 4 as species None with params {'label': 'ecoli', 'sigma': 1.5, 'threshold': -1000, 'split_factor': 0.3, 'min_mask_size_filter': 60}
Segmenting frame 5 as species None with params {'label': 'ecoli', 'sigma': 1.5, 'threshold': -1000, 'split_factor': 0.3, 'min_mask_size_filter': 60}
Segmenting frame 6 as species None with params {'label': 'ecoli', 'sigma': 1.5, 'threshold': -1000, 'split

In [4]:
# Colony statistics - each row corresponds to a single micro-colony for one time-point, including the single cell statistics for the cells in that colony. 
df_colony.head()

Unnamed: 0,time,time_hours,time_days,round_time,round_time_hours,round_time_days,id,label,labelid,time_index,...,cmax_at_eop,present_at_end,present_at_start,Colony lysis,colony_area_idxmax,cmax_colony_area,cmax_cell_count,cmax_time,labelID,present_for_duration
0,83,0.023056,0.000961,0,0.0,0.0,0,ecoli,ecoli_0,0,...,True,False,True,False,98.0,2053.151744,2053.151744,2.75,ecoli0,2.75
1,83,0.023056,0.000961,0,0.0,0.0,8,ecoli,ecoli_8,0,...,True,False,True,False,73.0,360.134899,362.16636,2.75,ecoli1.0,1.75
2,83,0.023056,0.000961,0,0.0,0.0,4,ecoli,ecoli_4,0,...,True,False,True,False,88.0,2135.173367,2136.024401,2.75,ecoli4,2.25
3,83,0.023056,0.000961,0,0.0,0.0,9,ecoli,ecoli_9,0,...,True,False,True,False,75.0,540.673121,542.858625,2.75,ecoli1.1,1.75
4,83,0.023056,0.000961,0,0.0,0.0,2,ecoli,ecoli_2,0,...,True,False,True,False,100.0,1178.170112,1178.170112,2.75,ecoli2,2.75


In [5]:
# Cell statistics - each row corresponds to a single cell for one time-point
df_cell.head()

Unnamed: 0,ss_area,ss_aspect_ratio,ss_aspect_ratio_max_width,ss_distance_from_colony_edge,ss_centroid,ss_dist_sums,ss_length,ss_max_width,ss_width,time,ss_count,experiment,pad_name,row,col,label,time_index
0,12.725888,5.831229,6.195671,0.950352,"(1964, 575)",4.788885,8.777409,1.4167,1.505242,83,14,BE142,A12,A,12,ecoli,0
1,4.29632,2.001591,2.30921,1.12,"(1827, 3067)",1.665421,3.103578,1.344,1.550555,83,14,BE142,A12,A,12,ecoli,0
2,4.152064,2.073513,2.305739,1.008,"(1832, 3037)",1.551339,3.098913,1.344,1.494523,83,14,BE142,A12,A,12,ecoli,0
3,4.446848,2.155506,2.427583,1.12,"(1821, 2998)",1.682738,3.262671,1.344,1.513645,83,14,BE142,A12,A,12,ecoli,0
4,3.782016,2.137981,2.230662,1.232,"(1825, 2967)",1.325844,2.99801,1.344,1.402262,83,14,BE142,A12,A,12,ecoli,0
