In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
import time, os, sys, random, re, itertools
import PIL.Image
import pims
import skimage.io as imgio
import skimage.filters as skf
import matplotlib.pyplot as plt
import matplotlib as mpl
import scipy.ndimage as ndi
from scipy.ndimage import distance_transform_edt
from scipy import stats
import colorcet as cc
import skimage.filters
from IPython.display import clear_output
import xml.etree.ElementTree as ET
mpl.rcParams['figure.dpi'] = 300
PIL.Image.MAX_IMAGE_PIXELS = None

In [2]:
eDir = ""
imgDir = "raw/"
maskDir = "mask/"
ssTabFile = eDir + "slideset-table.xml"

ssTab = ET.parse(ssTabFile)
imageFiles = [e.text for e in ssTab.find("./col[@name='Img']").findall("e")]
imageNames = list(map(lambda f : os.path.basename(f), imageFiles))
try:
    excludeMaskFiles = [e.text for e in ssTab.find("./SlideSet/col[@name='Mask image']").findall("e")]
    excludeMaskDict = dict(zip(imageNames, excludeMaskFiles))
except:
    excludeMaskDict = dict(itertools.zip_longest(imageNames, "x", fillvalue="x"))

csvBigFile = eDir + "pla-table.csv"

In [3]:
def wrangleMe(imageName):
    clear_output(wait=True)
    print("Now working on: " + imageName)
    
    # Set up file names
    imgShortName = re.findall(r'batchName-\d_\d+', imageName)[0]
    imgFile = eDir + imgDir + imageName
    maskFile = eDir + maskDir + imageName + "_masks-400.png"
    excludeFile = eDir + excludeMaskDict[imageName]
    plaFile = eDir + "plaDetect/" + imgShortName + "_pointMask.png"
    
    # Load and pre-process the image (may need to adjust this depending on image format)
    I = pims.Bioformats(imgFile)
    I.bundle_axes = 'yxc'
    img = I[0]
    masks = imgio.imread(maskFile)
    plaPointMask = imgio.imread(plaFile)
    try:
        excludeMask = imgio.imread(excludeFile)
    except:
        excludeMask = np.zeros_like(masks)

    # Make the data frame for this image
    df = pd.DataFrame({
        "idx": np.ravel(masks, order='C'),
        "K5": np.ravel(img[:,:,1], order='C'),
        "K6": np.ravel(img[:,:,2], order='C'),
        "PLA": np.ravel(img[:,:,3], order='C'),
        "PLApointMask": np.ravel(plaPointMask, order='C'),
        "exclude": np.ravel(excludeMask, order='C')
    })
    df = df.groupby("idx", as_index=False).agg(
        size=("PLApointMask", np.size),
        K5_med=("K5", "median"),
        K6_med=("K6", "median"),
        PLA_med=("PLA", "median"),
        PLA_points=("PLApointMask", "sum"),
        exclude=("exclude", np.max)
    )
    df["img"] = imgShortName
    df['group'] = df['img'].str.extract(r'((?<=batchName-)\d)')
    df["Krt"] = "?"
    df.loc[np.logical_and(df["K6_med"]<0.75*df["K5_med"], df["K5_med"]>4000), "Krt"] = "K5"
    df.loc[np.logical_and(df["K6_med"]>1.25*df["K5_med"], df["K6_med"]>4000), "Krt"] = "K6"
    df.loc[np.logical_and(df["K5_med"]<=4000, df["K6_med"]<=4000), "Krt"] = "wt"
    
    return df


In [None]:
dfs = [ wrangleMe(i) for i in imageNames ]
df = pd.concat(dfs)
df.to_csv(csvBigFile)