In [39]:
import pandas as pd
import numpy as np
import yaml
from pathlib import Path

In [65]:
fpath = "/scratch/indikar_root/indikar1/cstansbu/pf-test/tracks/B2.tracks.full.csv"
cpath = "../config/config.yaml"

config = yaml.safe_load(Path(cpath).read_text())

tracks = pd.read_csv(fpath)
tracks.head()

Unnamed: 0,ID,t,x,y,z,parent,root,state,generation,dummy,...,c2_logFoldChange_prepared,c0_pval_prepared,c1_pval_prepared,c2_pval_prepared,c0_logFoldChange_processed,c1_logFoldChange_processed,c2_logFoldChange_processed,c0_pval_processed,c1_pval_processed,c2_pval_processed
0,2753,0,1471.903226,399.935484,0.0,2753,2753,5,0,False,...,-0.106723,7.53028e-16,1.681218e-12,0.861937,3.166707,1.832299,0.389393,2.076077e-16,9.694417e-15,0.001182
1,2919,0,1480.14433,441.546392,0.0,2919,2919,5,0,False,...,-1.989953,7.765547e-15,3.881317e-12,1.0,2.240715,1.29091,-1.493838,1.388693e-16,5.594907e-18,1.0
2,1273,0,488.0375,993.7125,0.0,1273,1273,5,0,False,...,-0.985273,9.768357e-18,1.826176e-15,1.0,3.146824,1.664239,-0.489158,2.101264e-18,4.955055999999999e-19,1.0
3,2156,0,1007.610169,182.661017,0.0,2156,2156,5,0,False,...,0.308312,1.663925e-16,1.505812e-10,0.023106,3.242326,1.878838,0.804427,4.927378000000001e-17,2.251523e-12,3.2e-05
4,2606,0,1381.858974,81.897436,0.0,2606,2606,5,0,False,...,-1.416315,2.386693e-12,4.59488e-10,1.0,2.716708,1.206603,-0.9202,3.79349e-13,1.711148e-15,1.0


In [71]:
def getAhat(tracks, config):
    """A function to compute the corrected aplha
    value using Bonferroni correction """
    alpha = config['phase']['alpha']

    # number of tests performed is one per cell per time for each channel
    t = tracks[['t', 'label']].drop_duplicates()
    ntests = len(t) * 3
    alphaHat = 1 - ((1-alpha) ** (1/ntests))
    print(f"{alpha=} {ntests=} {alphaHat}")
    return alphaHat


def getPhases(tracks, config):
    """A function to threhold pvals """
    a = getAhat(tracks, config)

    testCols = [
        'c0_pval_prepared',
        'c0_pval_processed',
        'c1_pval_prepared',
        'c1_pval_processed',
        'c2_pval_prepared',
        'c2_pval_processed',
    ]

    # threshold all based on corrected alpha
    flags = np.where(tracks[testCols] <= a, 1, 0)
    flags = pd.DataFrame(flags, columns=testCols)
    flags = flags.fillna(0)

    newCols = ['c0', 'c1', 'c2']

    # cell must be significant in both 
    # the processed and raw images
    for c in newCols:
        pairs = [x for x in flags.columns if c in x]
        flags[c] = flags[pairs].min(axis=1)
        # print(c, pairs)

    
    flags = flags[newCols].astype(str)
    flags['val'] = flags.agg(''.join, axis=1)

    # map phases
    phaseMap = config['phase']['phase_map']
    phaseMap_r = dict((v,k) for k,v in phaseMap.items())
    
    tracks['phase'] = flags['val'].map(phaseMap_r)
    tracks['phase'] = tracks['phase'].fillna('NA')
    return tracks.
    
    




phase = getPhases(tracks, config)
phase.head()
    

alpha=0.05 ntests=844374 6.074712488590706e-08


0    G1
1    G1
2    G1
3    G1
4    G1
Name: val, dtype: object

In [73]:
def getPhaseData(tracks, phase):
    """a function to structure the output """ 
    tracks['phase'] = phase
    print(list(tracks.columns))



getPhaseData(tracks, phase)

['ID', 't', 'x', 'y', 'z', 'parent', 'root', 'state', 'generation', 'dummy', 'area_x', 'area_y', 'area_convex', 'area_filled', 'axis_major_length', 'axis_minor_length', 'eccentricity', 'c0_intensity_max', 'c1_intensity_max', 'c2_intensity_max', 'c0_intensity_min', 'c1_intensity_min', 'c2_intensity_min', 'c0_intensity_mean', 'c1_intensity_mean', 'c2_intensity_mean', 'label', 'orientation', 'perimeter', 'solidity', 'circularity', 'c0_logFoldChange_prepared', 'c1_logFoldChange_prepared', 'c2_logFoldChange_prepared', 'c0_pval_prepared', 'c1_pval_prepared', 'c2_pval_prepared', 'c0_logFoldChange_processed', 'c1_logFoldChange_processed', 'c2_logFoldChange_processed', 'c0_pval_processed', 'c1_pval_processed', 'c2_pval_processed', 'phase']


In [74]:
list(tracks.columns)

['ID',
 't',
 'x',
 'y',
 'z',
 'parent',
 'root',
 'state',
 'generation',
 'dummy',
 'area_x',
 'area_y',
 'area_convex',
 'area_filled',
 'axis_major_length',
 'axis_minor_length',
 'eccentricity',
 'c0_intensity_max',
 'c1_intensity_max',
 'c2_intensity_max',
 'c0_intensity_min',
 'c1_intensity_min',
 'c2_intensity_min',
 'c0_intensity_mean',
 'c1_intensity_mean',
 'c2_intensity_mean',
 'label',
 'orientation',
 'perimeter',
 'solidity',
 'circularity',
 'c0_logFoldChange_prepared',
 'c1_logFoldChange_prepared',
 'c2_logFoldChange_prepared',
 'c0_pval_prepared',
 'c1_pval_prepared',
 'c2_pval_prepared',
 'c0_logFoldChange_processed',
 'c1_logFoldChange_processed',
 'c2_logFoldChange_processed',
 'c0_pval_processed',
 'c1_pval_processed',
 'c2_pval_processed',
 'phase']

In [None]:
keepCols = [

]

In [67]:
config['phase']['phase_map']
pfMap = dict((v,k) for k,v in config['phase']['phase_map'].items())
pfMap

{'110': 'G1', '111': 'S', '011': 'G2'}

In [64]:
def getPhaseMap(config):
    channels = config['channels']
    print(channels)

    pfMap = config['phase']['phase_map']

    for k, v in pfMap.items():
        print(k, v)


getPhaseMap(config)
    
    

{'nucleus': 1, 'red': 0, 'green': 2}
red G1
green G2
both S


In [36]:
testCols = [
    'c1_pval_prepared',
    'c0_pval_prepared',
    'c2_pval_prepared',
    'c0_pval_processed',
    'c1_pval_processed',
    'c2_pval_processed',
]

In [38]:
t = tracks[['t', 'label']].drop_duplicates()
len(t)

281458

['c0_pval_prepared',
 'c1_pval_prepared',
 'c2_pval_prepared',
 'c0_pval_processed',
 'c1_pval_processed',
 'c2_pval_processed']

In [None]:
def computeAlphaHat(tracks, alpha)

alpha = 0.001
ntests = len(scores) * 3 # number of times times channels
alphaHat = 1 - ((1-alpha) ** (1/ntests))
print(f"{alpha=} {ntests=} {alphaHat}")