In [1]:
import pandas as pd
import numpy as np
import os
from tqdm.notebook import tqdm
import shutil
from importlib import reload
import skimage.io
import matplotlib 
import matplotlib.pyplot as plt 
import matplotlib.patches as patches

# local code
import sys
sys.path.append("../source/")

In [2]:
SCALING = 0.7310
MINUTES_PER_FRAME = 15

In [3]:
# DATA PATHS
OUTPUT_DIR = f"/nfs/turbo/umms-indikar/shared/projects/live_cell_imaging/PIP_FUCCI_ANALYSIS/WH18/processed/randomized_annotations/"
ROOT_DIR = "/nfs/turbo/umms-indikar/shared/projects/live_cell_imaging/PIP_FUCCI_ANALYSIS/WH18/processed/"


"""
load dataframes
"""

df_list = []

for subdir in os.listdir(ROOT_DIR):
    subdir_path = f"{ROOT_DIR}{subdir}"
    for f in os.listdir(subdir_path):
        if f.endswith(".csv"):
            f_name = f.split(".")[0]
            f_path = f"{subdir_path}/{f}"
            tmp = pd.read_csv(f_path, skiprows=[1, 2, 3])
            
            tmp = tmp[tmp['TRACK_ID'].notna()]
            tmp['TRACK_ID'] = f"{f_name}_" + tmp['TRACK_ID'].astype(int).astype(str) 
            tmp['FILE'] = f_name
            
            df_list.append(tmp)
            
            
            
df = pd.concat(df_list, ignore_index=True)

print(df.shape)
df.head()  

(242115, 47)


Unnamed: 0,LABEL,ID,TRACK_ID,QUALITY,POSITION_X,POSITION_Y,POSITION_Z,POSITION_T,FRAME,RADIUS,...,ELLIPSE_Y0,ELLIPSE_MAJOR,ELLIPSE_MINOR,ELLIPSE_THETA,ELLIPSE_ASPECTRATIO,AREA,PERIMETER,CIRCULARITY,SOLIDITY,FILE
0,ID21504,21504,control_2_127,0.849856,1143.109214,1082.636587,0.0,0.0,0,11.130618,...,0.002268,17.311726,7.242815,-0.396035,2.390193,389.213998,83.191616,0.706707,0.965197,control_2
1,ID21505,21505,control_2_132,0.849028,240.169994,749.805032,0.0,0.0,0,8.645886,...,0.004526,11.584871,6.520448,-1.124865,1.776699,234.838254,63.609402,0.72935,0.954373,control_2
2,ID21506,21506,control_2_130,0.84866,804.217729,933.511026,0.0,0.0,0,7.659594,...,-0.012602,9.474305,6.260665,0.61543,1.513306,184.315283,54.110153,0.791068,0.960976,control_2
3,ID21507,21507,control_2_53,0.848561,1504.883577,986.452002,0.0,0.0,0,7.422642,...,0.022389,8.60392,6.474063,1.197707,1.328983,173.087956,51.374301,0.824109,0.958549,control_2
4,ID21508,21508,control_2_129,0.848051,822.3307,142.416281,0.0,0.0,0,7.009945,...,0.029969,7.706559,6.442779,-0.836012,1.196155,154.375744,53.06515,0.688922,0.942857,control_2


In [4]:
"""
load images
"""

files = {}

for subdir in os.listdir(ROOT_DIR):
    subdir_path = f"{ROOT_DIR}{subdir}"
    for f in os.listdir(subdir_path):
        if f.endswith(".tif"):
                f_name = f.split(".")[0]
                f_path = f"{subdir_path}/{f}"
                tiff = skimage.io.imread(f_path)
                files[f_name] = tiff
                
                
files.keys()


dict_keys(['control_3', 'control_2', 'control_1', 'control_4', 'myod_4', 'myod_1', 'myod_3', 'myod_2'])

In [5]:
# a few quick exclusions based on size and track length

min_track_length = 10
min_mean_size = 2

print(f"{df.shape=}")
print(f"{df['TRACK_ID'].nunique()=}")

df = df.sort_values(by=['TRACK_ID', 'FRAME'])

df['STEP'] =  df.groupby('TRACK_ID').transform('cumcount')
df['MAX_STEP'] =  df.groupby('TRACK_ID')['STEP'].transform('max')


####### perform all filtering on a temporary dataframe
tmp = df.copy()

filtered = df.groupby('TRACK_ID')['STEP'].max().reset_index()
filtered = filtered[filtered['STEP'] > min_track_length]
tmp = tmp[tmp['TRACK_ID'].isin(filtered['TRACK_ID'])]

print(f"\nfilter number of frames:")
print(f"\t{tmp.shape=}")
print(f"\t{tmp['TRACK_ID'].nunique()=}")

filtered = tmp.groupby('TRACK_ID')['RADIUS'].mean().reset_index()
filtered = filtered[filtered['RADIUS'] > min_mean_size]
tmp = tmp[tmp['TRACK_ID'].isin(filtered['TRACK_ID'])]

print(f"\nfilter nucleus size:")
print(f"\t{tmp.shape=}")
print(f"\t{tmp['TRACK_ID'].nunique()=}")


"""
Filter values by their maximum min-green intensity
If the cell never expressed green, it is not considered
"""


df = tmp.copy()
df = df.sort_values(by=['TRACK_ID', 'FRAME', "FILE"])

filename = f"{ROOT_DIR}filtered_spots.csv"

df.to_csv(filename, index=False)
print(f"{filename=}")

df.shape=(242115, 47)
df['TRACK_ID'].nunique()=5676

filter number of frames:
	tmp.shape=(233429, 49)
	tmp['TRACK_ID'].nunique()=4075

filter nucleus size:
	tmp.shape=(233429, 49)
	tmp['TRACK_ID'].nunique()=4075
filename='/nfs/turbo/umms-indikar/shared/projects/live_cell_imaging/PIP_FUCCI_ANALYSIS/WH18/processed/filtered_spots.csv'


In [6]:
def get_images(tiff, frame, xpos, ypos, xwin=50, ywin=50):
    """A function to return a small frame of from a position
    at a specific time from a movie
    
    expects input shape (time, y, x, 3) for RGB image
    """
    f, m, n, c = tiff.shape
    
    xmin = int(xpos - xwin)
    xmax = int(xpos + xwin)
    ymin = int(ypos - ywin)
    ymax = int(ypos + ywin)
    
    spot_x, spot_y = xwin, ywin
    
    # handle boundaries
    if xmin < 0:
        spot_x = spot_x + xmin
        xmin = 0 
    
    if xmax > n:
        xmax = n
        
    if ymin < 0:
        spot_y = spot_y + ymin
        ymin = 0

    if ymax > m:
        ymax = m

    img = tiff[frame, ymin:ymax, xmin:xmax, :]
    return img, (spot_x, spot_y)


In [7]:
N_TRACKS = df['TRACK_ID'].nunique()
print(f"{N_TRACKS=}")


PROPORTION_OF_TRACKS = 0.025
SAMPLE_SIZE = int(N_TRACKS * PROPORTION_OF_TRACKS) # this is the number of TRACKS to be sampled
print(f"{SAMPLE_SIZE=}\n")

N_TRACKS=4075
SAMPLE_SIZE=101



In [8]:
break

SyntaxError: 'break' outside loop (<ipython-input-8-6aaf1f276005>, line 1)

In [9]:
# store images for annotation

xwin = 100
ywin = 100
nucleus_size = 30
plt.rcParams['figure.dpi'] = 150
plt.rcParams['font.size'] = 6
plt.rcParams['figure.figsize'] = 4,4

# will overwrite the dir each time
if os.path.exists(OUTPUT_DIR):
    shutil.rmtree(OUTPUT_DIR)
os.makedirs(OUTPUT_DIR)

# np.random.seed(0)
tracks = np.random.choice(df['TRACK_ID'].unique(), SAMPLE_SIZE)
tmp = df[df['TRACK_ID'].isin(tracks)]

print(f"{tmp.shape=}")

last_track = None
for idx, row in tmp.iterrows():
    xpos = row['POSITION_X'] * SCALING
    ypos = row['POSITION_Y'] * SCALING
    
    frame = int(row['FRAME'])
    track = row['TRACK_ID']
    
    if not track == last_track:
        print(f"{track=}")
    last_track = track
    
    # make sure we always have a left and right image
    fig, (ax1, ax2, ax3) = plt.subplots(1, 3)
    
    tiff = files[row["FILE"]]

    # image under question 
    img, spot = get_images(tiff, frame, xpos, ypos, xwin, ywin)
    
    if len(img) == 0:
        continue
    
    ax1.imshow(img)
    ax1.axis('off')
    spot_blob = plt.Circle(spot, nucleus_size, color='r', fill=False)
    ax1.add_patch(spot_blob)
    
    ax2.imshow(img[:, :, 1], cmap='Greens')
    ax2.axis('off')
    spot_blob = plt.Circle(spot, nucleus_size, color='r', fill=False)
    ax2.add_patch(spot_blob)
    
    ax3.imshow(img[:, :, 0], cmap='Reds')
    ax3.axis('off')
    spot_blob = plt.Circle(spot, nucleus_size, color='r', fill=False)
    ax3.add_patch(spot_blob)
    
    plt.title(f"{track=} {frame=}")
    
    frame_fname = str(frame).zfill(4)
           
    filename = f"{track}_frame_{frame_fname}.png"
    save_path = f"{OUTPUT_DIR}{filename}"
    
    plt.savefig(save_path,  bbox_inches='tight')
    plt.close(fig)
    
print("\nDone.")

tmp.shape=(4954, 49)
track='control_1_180'
track='control_1_195'
track='control_1_199'
track='control_1_232'
track='control_1_250'
track='control_1_263'
track='control_1_264'
track='control_1_357'
track='control_1_361'
track='control_1_455'
track='control_2_141'
track='control_2_143'
track='control_2_156'
track='control_2_208'
track='control_2_216'
track='control_2_269'
track='control_2_30'
track='control_2_340'
track='control_2_428'
track='control_2_508'
track='control_3_150'
track='control_3_187'
track='control_3_312'
track='control_3_341'
track='control_3_373'
track='control_3_5'
track='control_3_609'
track='control_3_713'
track='control_3_831'
track='control_3_835'
track='control_3_93'
track='control_3_948'
track='control_3_994'
track='control_4_1035'
track='control_4_140'
track='control_4_166'
track='control_4_180'
track='control_4_224'
track='control_4_24'
track='control_4_251'
track='control_4_388'
track='control_4_425'
track='control_4_52'
track='control_4_59'
track='control_4_