Process tracks tables and align time points to Reference Time.

The notebook includes step-by-step processing of tracks tabels obtained from CellProfiler. Functions used to process the tracks are written within the module trackprocessor.py.

# Importation

In [7]:
# import neccessary packages
import os
import glob
from pathlib import Path
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from mpl_toolkits import mplot3d
from tqdm import tqdm

import networkx as nx
from networkx.drawing.nx_agraph import to_agraph 

from skimage import measure

from functools import partial
import pathos.pools as pp

import trackprocessor

# Environment configuration

In [8]:
%matplotlib notebook # maybe not useful

In [9]:
plt.ioff() # turn interactive plotting off

In [10]:
plt.rcParams.update({'figure.max_open_warning': 0}) # ignore max plotted figures warning

# Parameters setting

## Input folder

In [12]:
# For Linux and MacOS, use "/" separators e.g "/home/Output/ 
# For Windows, use "\" separators e.g. "D:\_OUTPUT\
# base_input_path = r"D:\_OUTPUT\_RUNS\RUN_siNuMa\CP_Output"
base_input_path = r"/media/mphan/Data/Perso/Phan/LOB/NucleoTeloTrack/2020-09_RUN3_CP4.0.3"

## Output folder

In [20]:
# base_output_path = r"D:\_OUTPUT\_RUNS\RUN_siNuMa\Analysis_Output\"
base_output_path = r"/media/mphan/Data/Perso/Phan/LOB/NucleoTeloTrack/Output"

## Reading csv files from input folder

In [26]:
glob_path = Path(base_input_path)
input_files = [str(pp) for pp in glob_path.glob("**/*.csv")]
print("nb. of files:",len(input_files))
[print(i,":",input_files[i].split(base_input_path)[1]) for i in range(len(input_files))];

nb. of files: 84
0 : /siCTRL_20190422_Pos02_cl7/Image.csv
1 : /siCTRL_20190422_Pos02_cl7/Nuclei.csv
2 : /siCTRL_20190422_Pos02_cl7/Telomere.csv
3 : /siCTRL_20190422_Pos03_cl7/Image.csv
4 : /siCTRL_20190422_Pos03_cl7/Nuclei.csv
5 : /siCTRL_20190422_Pos03_cl7/Telomere.csv
6 : /siCTRL_20190422_Pos04_cl7/Image.csv
7 : /siCTRL_20190422_Pos04_cl7/Nuclei.csv
8 : /siCTRL_20190422_Pos04_cl7/Telomere.csv
9 : /siCTRL_20190422_Pos05_cl7/Image.csv
10 : /siCTRL_20190422_Pos05_cl7/Nuclei.csv
11 : /siCTRL_20190422_Pos05_cl7/Telomere.csv
12 : /siCTRL_20190524_Pos01_cl16/Image.csv
13 : /siCTRL_20190524_Pos01_cl16/Nuclei.csv
14 : /siCTRL_20190524_Pos01_cl16/Telomere.csv
15 : /siCTRL_20190524_Pos02_cl16/Image.csv
16 : /siCTRL_20190524_Pos02_cl16/Nuclei.csv
17 : /siCTRL_20190524_Pos02_cl16/Telomere.csv
18 : /siCTRL_20190524_Pos03_cl16/Image.csv
19 : /siCTRL_20190524_Pos03_cl16/Nuclei.csv
20 : /siCTRL_20190524_Pos03_cl16/Telomere.csv
21 : /siCTRL_20190524_Pos05_cl16/Image.csv
22 : /siCTRL_20190524_Pos05_cl1

In [24]:
# Create "Movies" subfolder
base_output_spath = os.path.join(base_output_path,"Movies")
if not os.path.exists(base_output_spath):
    os.makedirs(base_output_spath)

input_files = glob.glob(os.path.join(base_input_path,"**/*.csv"),recursive=True)
print("nb. of files:",len(input_files))
[print(i,":",input_files[i].split(base_input_path)[1]) for i in range(len(input_files))];

nb. of files: 84
0 : /siCTRL_20190422_Pos02_cl7/Image.csv
1 : /siCTRL_20190422_Pos02_cl7/Nuclei.csv
2 : /siCTRL_20190422_Pos02_cl7/Telomere.csv
3 : /siCTRL_20190422_Pos03_cl7/Image.csv
4 : /siCTRL_20190422_Pos03_cl7/Nuclei.csv
5 : /siCTRL_20190422_Pos03_cl7/Telomere.csv
6 : /siCTRL_20190422_Pos04_cl7/Image.csv
7 : /siCTRL_20190422_Pos04_cl7/Nuclei.csv
8 : /siCTRL_20190422_Pos04_cl7/Telomere.csv
9 : /siCTRL_20190422_Pos05_cl7/Image.csv
10 : /siCTRL_20190422_Pos05_cl7/Nuclei.csv
11 : /siCTRL_20190422_Pos05_cl7/Telomere.csv
12 : /siCTRL_20190524_Pos01_cl16/Image.csv
13 : /siCTRL_20190524_Pos01_cl16/Nuclei.csv
14 : /siCTRL_20190524_Pos01_cl16/Telomere.csv
15 : /siCTRL_20190524_Pos02_cl16/Image.csv
16 : /siCTRL_20190524_Pos02_cl16/Nuclei.csv
17 : /siCTRL_20190524_Pos02_cl16/Telomere.csv
18 : /siCTRL_20190524_Pos03_cl16/Image.csv
19 : /siCTRL_20190524_Pos03_cl16/Nuclei.csv
20 : /siCTRL_20190524_Pos03_cl16/Telomere.csv
21 : /siCTRL_20190524_Pos05_cl16/Image.csv
22 : /siCTRL_20190524_Pos05_cl1

## State transistions

In [None]:
# Define state transistions
state_labels = ["interphase","prophase","prometaphase","metaphase","anaphase"]
numbers = np.arange(len(state_labels))+1 # the number is assigned automatically in increasing order from 1
state_numbers = pd.Series(index=state_labels,data=numbers)
print(state_numbers);

## Transistion rule graph

In [None]:
# Initialize graph with multiple directions
G=nx.OrderedMultiDiGraph() # this graph type keeps order of input nodes

# Add nodes
G.add_nodes_from(state_labels);

# Add self transistions
G.add_edges_from(list(zip(state_labels,state_labels)));

# Define transistion here
G.add_edges_from([("interphase",item) for item in ["prophase","prometaphase"]]);
G.add_edges_from([("prophase",item) for item in ["interphase","prometaphase","metaphase"]]);
G.add_edges_from([("prometaphase",item) for item in ["interphase","prophase","metaphase"]]);
G.add_edges_from([("metaphase",item) for item in ["prometaphase","anaphase"]]);
G.add_edges_from([("anaphase",item) for item in ["interphase"]]);

In [None]:
# Save fig
fig = plt.figure(figsize=(10,7))
ax = fig.add_subplot(111)
pos = nx.circular_layout(G)
nx.draw_networkx(G, pos=pos, ax=ax, width=1, arrowsize=20, 
                 min_source_margin=50, min_target_margin=50,
                 node_shape="s", node_color="none")
fig.savefig(os.path.join(base_output_path,"transistion_rule.png"))
plt.tight_layout();

## Excluded border conditions

In [None]:
# Define exclude border condition
# criterion can be "bbox" or "circle"
# if criterion is "circle", you can set percentage, e.g. "percentage":0.8 means take 80% of circle area
exclude_borderobjs_conds = {"criterion":"bbox"}

## Alignment conditions

1st order: if the track goes through metapahse then use the last metapahse  as time 0 
2nd order: if the track starts with anaphase, then assign time point 1 
**NOTE: can change this alignment for reversine or prophase as reference time

In [None]:
# Define rule for aligning time points
align_conds={"state_numbers":[state_numbers["metaphase"],state_numbers["anaphase"]],
             "align_modes":["last","first"],
             "shifts":[0,1]}

## Features

In [None]:
# Define features will be added after alignment
features = ["ImageNumber","ObjectNumber","TrackObjects_Label",
            "AreaShape_Area",
            "AreaShape_Perimeter",
            "AreaShape_FormFactor",
            "Intensity_IntegratedIntensity_H2B_Smooth",
            "Intensity_IntegratedIntensity_TRF1_Smooth",
            "Intensity_MeanIntensity_H2B_Smooth",
            "Intensity_MeanIntensity_TRF1_Smooth",
            "Mean_Telomere_AreaShape_Area",
            "Mean_Telomere_AreaShape_Perimeter",
            "Mean_Telomere_Distance_Minimum_Nuclei",
            "Mean_Telomere_Distance_Centroid_Nuclei",
            "Mean_Telomere_Intensity_IntegratedIntensity_TRF1_Smooth",
            "Children_Telomere_Count"]

# Test a case specific

This is used to test one file before runing all files. Otherwise, go to next section.

# Parallel computing

In [None]:
import dill
dill.settings['recurse'] = True

In [None]:
maxcpu = 18
pool = pp.ProcessPool(min(len(input_files),maxcpu))
print("nb. of allocated cpus:",pool.ncpus)

In [None]:
def compact_func(f,base_input_path,base_output_spath,
                 features,transistion_graph,
                 nrows_limit,min_nb_timepoints,
                 exclude_borderobjs_conds,align_conds):
    
    # configure output path
    output_path = os.path.join(base_output_spath,f.split(base_input_path)[1].split('.csv')[0])
    
    telomere.process_data(f,output_path,features,transistion_graph,
                          nrows_limit,min_nb_timepoints,
                          exclude_borderobjs_conds,align_conds)

In [None]:
partial_func = partial(compact_func,
                       base_input_path=base_input_path,base_output_spath=base_output_spath,
                       features=features,transistion_graph=G,
                       nrows_limit=30,min_nb_timepoints=5,
                       exclude_borderobjs_conds=exclude_borderobjs_conds,align_conds=align_conds)

In [None]:
%%time
result = pool.map(partial_func,input_files)