# Processing of 201707 Data

In [1]:
import glob
import tqdm as tq
import numpy as np
import pandas as pd
import xmltodict
import json

import skimage.io
import skimage.exposure
import skimage.morphology
import scipy.ndimage

import mscl_utils as mscl
colors = mscl.set_plotting_style()
bokeh.io.output_notebook()

NameError: name 'bokeh' is not defined

The purpose of this notebook is to reprocess all data from March - July of 2017 for the MscL project. I rewrote a bunch of functions for proper background subtraction and flatfield illumination that will be important for the final analysis.  

## Functions to be used 

Below are all of the functions to be used in this processing. I am trying to keep these up to date with the functions found in the `mscl_utils` file.

In [3]:
# For background subtraction
def compute_mean_bg(phase_image, fluo_image, method='isodata', obj_dark=True):
    """
    Computes the mean background fluorescence of the inverted segmentation mask. 
    
    Parameters
    ----------
    phase_image : 2d-array, int or float. 
        The phase contrast image used for generating the inverse segmentation mask.
        If this image is not a float with pixel values in (0, 1), it will be
        renormalized.
    fluo_image : 2d-array, int
        The fluorescence image used to calculate the mean pixel value. If flatfield
        correction is necessary, it should be done before this sending to this function.
    method: string, ['otsu', 'yen', 'li', 'isodata'], default 'isodata'
        Automated thresholding method to use. Default is 'isodata' method. 
    obj_dark : bool, default True
        If True, objects will be **darker** than the automatically generated
        threshold value. If False, objects are deemed to be brighter. 
    
    Returns
    -------
    mean_bg: float
        The mean background fluorescence of the image. 
    """
    
    # Ensure that the image is renormalized. 
    if (phase_image > 1.0).any():
        phase_image = (phase_image - phase_image.min()) /\
                      (phase_image.max() - phase_image.min())
    # Perform the background subtraction. 
    im_blur = skimage.filters.gaussian(phase_image, sigma=50)
    im_sub = phase_image - im_blur
    
    # Determine the method to use. 
    methods = {'otsu': skimage.filters.threshold_otsu,
               'yen': skimage.filters.threshold_yen,
               'li': skimage.filters.threshold_li,
               'isodata': skimage.filters.threshold_isodata}
    
    # Determine the threshold value. 
    thresh_val = methods[method](im_sub)
    
    # Generate the inverted segmentation mask and dilate. 
    if obj_dark is True:
        im_thresh = im_sub < thresh_val
    else:
        im_thresh = im_sub > thresh_val
        
    selem = skimage.morphology.disk(20)
    im_dil = skimage.morphology.dilation(im_thresh, selem=selem)
    
    # Mask onto the fluroescence image and compute the mean background value. 
    mean_bg = np.mean(fluo_image[im_dil < 1]) 
    return mean_bg

## For flat-field illumination
def median_flatfield(image_stack, medfilter=True, selem='default',
                    return_profile=False):
    """
    Computes a illumination profile from the median of all images 
    and corrects each individual image.
    
    Parameters
    ----------
    image_stack: scikit-image ImageCollection
        Series of images to correct. The illumination profile is created
        from computing the median filter of all images in this collection.
    medfilter: bool, default True
        If True, each individiual image will be prefiltered using a median
        filter with  a given selem.
    selem : string or structure, default 3x3 square
        Structural element to use for the median filtering. Default  is 
        a 3x3 pixel square.
    return_profile: bool, default False
        If True, the illumination profiled image will be returned.
   
    Returns
    -------
    ff_ims : list of 2d-array
        Flatfield corrected images.
    med_im : 2d-array
        Illumination profile produced from the median of all images in
        image stack.
    """
 
    # Determine if the prefiltering should be performed.
    if medfilter is True:
        
        # Define the structural element. 
        if selem is 'default':
            selem = skimage.morphology.square(3)
        image_stack = [scipy.ndimage.median_filter(im, footprint=selem) for im in image_stack]
    
    # Compute the median filtered image.
    med_im = np.median(image_stack, axis=0)
    
    # Perform the correction. 
    ff_ims = [(i / med_im) * np.mean(med_im) for i in image_stack]
    
    if return_profile is True:
        return [ff_ims, med_im]
    else:
        return ff_ims
    
## For segmentation
def contour_seg(image, level=0.3, selem='default', perim_bounds=(5, 1E3),
                ip_dist=0.160, ecc_bounds=(0.7, 1), area_bounds=(1, 50), 
                return_conts=False, min_int=0.2):
    """
    Identifies contours around dark objects in a phase contrast image.

    Parameters
    ----------
    image: 2d-array
        Phase contrast image of interest. 
    level: float
        Level at which to draw contours on black top-hat filtered image.
        Default value is 0.3.
    selem: 2d-array or string
        Structuring element to use for the black top-hat filtering procedure
        Default value is a disk with a diameter of 20 pixels.
    perim_bounds: length 2 tuple
        Lower and upper perimeter bounds of approved objects. This should be
        in units of microns. The default values are 5 and 25 microns for the
        lower and upper bound, respectively.
    ip_dist : float
        Interpixel distance of the image in units of microns per pixel. The
        default value is 0.160 microns per pixel.
    area_bounds : tuple of float
        Upper and lower bounds for selected object areas. These should be given in
        units of square microns. 
    ecc_bounds : tuple of float
        Bounds for object eccentricity. Default values are between 0.5 and 1.0. 
    return_conts : bool
        If True, the x and y coordinates of the individual contours will be
        returned. Default value is False

    Returns
    -------
    im_lab : 2d-array, int
        Two dimensional image where each individual object is labeled.

    conts : 1d-array
        List of contour coordinates. Each entry of this array comes as
        an x,y pair of arrays. Has the same length as the number of
        contoured objects. This is only returned if `return_conts` is
        True.

    """

    # Apply the white top-hat filter.
    if selem == 'default':
        selem = skimage.morphology.disk(20)

    # Normalize the image.
    image = (image - image.min()) / (image.max() - image.min())

    # Blur and background subtract the image.
    im_blur = skimage.filters.gaussian(image, sigma=5)
    im_sub = image - im_blur

    # Apply the black tophat filter.
    im_filt = skimage.morphology.black_tophat(im_sub, selem)

    # Find the contours and return.
    conts = skimage.measure.find_contours(im_filt, level)

    # Make an empty image for adding the approved objects.
    objs = np.zeros_like(image)

    # Loop through each contour.
    for _, c in enumerate(conts):
        perim = 0
        for j in range(len(c) - 1):
            # Compute the distance between points.
            distance = np.sqrt((c[j+1, 0] - c[j, 0])**2 +
                               (c[j+1, 1] - c[j, 1])**2)
            perim += distance * ip_dist

        # Test if the perimeter is allowed by the user defined bounds.
        if (perim > perim_bounds[0]) & (perim < perim_bounds[1]):

            # Round the contours.
            c_int = np.round(c).astype(int)

            # Color the image with the contours and fill.
            objs[c_int[:, 0], c_int[:, 1]] = 1.0

    # Fill and label the objects.
    objs_fill = scipy.ndimage.binary_fill_holes(objs)
    objs_fill = skimage.morphology.remove_small_objects(objs_fill)
    im_lab = skimage.measure.label(objs_fill)
   
    # Apply filters.
    approved_obj = np.zeros_like(im_lab)
    props = skimage.measure.regionprops(im_lab, image)
    for prop in props:
        area = prop.area * ip_dist**2
        ecc = prop.eccentricity
        if (area < area_bounds[1]) & (area > area_bounds[0]) &\
            (ecc < ecc_bounds[1]) & (ecc > ecc_bounds[0]) & (prop.mean_intensity < min_int):
                approved_obj +=  (im_lab == prop.label) 
    im_lab = skimage.measure.label(approved_obj)
    
    
    if return_conts is True:
        return conts, im_lab
    else:
        return im_lab
    

def marker_parse(fname, type_dict={1: False, 2: True}):
    """
    Parses the XML file produced from the CellCounter ImageJ plugin and
    packages the marker positions and type into a Pandas DataFrame.

    Parameters
    ----------
    fname : str
        Path to the XML file of interest.
    type_dict : dict
        Dictionary of types and survival. Default is assigning type 1
        as death and type 2 as survival.

    Returns
    -------
    df : Pandas DataFrame
        Data frame containing x and y positions of markers as well as
        the type classification.
    """
    with open(fname, 'r') as f:
        positions = xmltodict.parse(f.read())

    # Extract only the marker data.
    markers =  positions['CellCounter_Marker_File']['Marker_Data']['Marker_Type']

    # Find the total number of types and loop through them to make data frames.
    dfs = []
    num_types = len(markers)
    for i in range(num_types):
        try:
            type_marks = markers[i]['Marker']
            _df = pd.DataFrame(type_marks)
            # Insert a column keeping track of the type
            _df.insert(0, 'survival', type_dict[int(markers[i]['Type'])])
            dfs.append(_df)
        except:
            pass

    # Concatenate the data frames ignorning indexing.
    df = pd.concat(dfs, axis=0, ignore_index=True)

    # Clean up the data frame and return.
    df.drop('MarkerZ', 1, inplace=True)
    df.columns = ['survival', 'x_pos', 'y_pos']
    df['x_pos'] = df['x_pos'].astype(int)
    df['y_pos'] = df['y_pos'].astype(int)
    return df


def link_markers(markers, seg_mask, fluo_image, ip_dist=0.160,
                 return_coords=False, inplace=False, max_dist=5,
                 position_labels=('x_pos', 'y_pos')):
    """
    Maps markers from one image to centroids of segmented objects from
    another. This assumes a marker belongs to the object with the minimum
    marker-centroid distance.

    Parameters
    ----------
    markers : Pandas DataFrame
        DataFrame containing the x and y positions of the markers.
    seg_mask : 2d-array, int
        Labeled segmentation mask. The coordinates of the object centroids
        will be calculated from this image.
    fluo_image : 2d-array, float or int
        The fluorescence image used to extract intensities. If None, no
        intensity information will be returned. These intensity values
        will be returned as an intensity per square physical distance
        as given by `ip_dist`.
    ip_dist :  float
        Interpixel distance for the image. Default value is 0.160 microns
        per pixel.
    return_coords : bool
        If True, the paired coordinates will be returned as a tuple. It
        will have the form ((mark_x, mark_y), (cent_x, cent_y)). Default
        value is False.
    inplace : bool
        If True, the markers DataFrame will be updated in place with the
        paired mask label and intensity if the fluorescence image is given.
    max_dist : float
        Maximum distance to keep. Default Value is 5 microns. 
    position_labels :  tuple of str
        Labels of position markers in the markers DataFrame in the order
        of x position and y position. Default is `x_pos` and `y_pos`.

   Returns
   -------
   df : Pandas DataFrame
       DataFrame containing survival type, marker positions, mask label,
       area, and intensity if provided. Note this is not returned if
       `in_place = True`.
   coords : list of tuple
       A list of tuples containing the marker x,y positions and the
       coordinates of the associated segmentation centroid. This
       is only returned if `return_coords`==True.

    """
    # Compute the properties from the segmentation mask.
    props = skimage.measure.regionprops(seg_mask, fluo_image)
    area, intensity, labels, centroids = [], [], [], []
    for prop in props:
        area.append(prop.area * ip_dist**2)
        intensity.append(prop.mean_intensity / ip_dist**2)
        labels.append(prop.label)
        centroids.append(prop.centroid)

    # Set up a list to store the coordinates and duplicate the df.
    coords = []
    if type(markers) == str:
        df= pd.DataFrame([intensity, area]).T
        df.columns=['intensity','area']
        df.insert(np.shape(df)[1], 'dist', 0)
        df.insert(0, 'label_cent_y', 0)
        df.insert(0, 'label_cent_x', 0)
        df.insert(0, 'mask_label', labels )
        df.insert(0, 'y_pos', 0)
        df.insert(0, 'x_pos', 0)
        df.insert(0, 'survival', False)
        return df 
        
    else:
        df = markers.copy(deep=True)

        
        # Compute the minimum distances.
        for i in range(len(markers)):
            distances = []
            x = markers.iloc[i][position_labels[0]]
            y = markers.iloc[i][position_labels[1]]

            # Loop through each centroid and find the minimum distance.
            for c in centroids:
                dist = np.sqrt((x - c[1])**2 + (y - c[0])**2)
                distances.append(dist)
            if len(distances) == 0:
                df.set_value(i, 'dist', 1E6)
                pass
            else:
                # Find the index with the minimum distance.
                min_ind = np.argmin(distances)
                coords.append(((x, y),
                               (centroids[min_ind][1], centroids[min_ind][0])))

                # Determine if a new DataFrame should be made or not.
                # There should be a better way to do this -- will spruce up later.
                if inplace is False:
                    # Update the data frame.
                    df.set_value(i, 'mask_label', labels[min_ind])
                    df.set_value(i, 'label_cent_x', centroids[min_ind][1])
                    df.set_value(i, 'label_cent_y', centroids[min_ind][0])
                    df.set_value(i, 'intensity', intensity[min_ind])
                    df.set_value(i, 'area', area[min_ind])
                    df.set_value(i, 'dist', distances[min_ind])
                else:
                    markers.set_value(i, 'mask_label', labels[min_ind])
                    markers.set_value(i, 'label_cent_x', centroids[min_ind][1])
                    markers.set_value(i, 'label_cent_y', centroids[min_ind][0])
                    markers.set_value(i, 'intensity', intensity[min_ind])
                    markers.set_value(i, 'area', area[min_ind])

        # Apply the distance filter. 
        if inplace is False:
            df = df[df['dist'] <= (max_dist / ip_dist)]
            if return_coords is True:
                return df, coords
            else:
                return df


def scrape_metadata(fname, channels=('Brightfield', 'GFP'), return_date=True):
    """
    Takes an image metadata file and returns the datea nd GFP exposure time.

    Parameters
    ----------
    fname : str
        Pat of the metadata file to parse.
    channel : tuple of str
        The channels from which to scrape the exposure time. A single channel
        name can be given. Default is ('Brightfield', 'GFP').
    return_date : bool
        If True, the date of the acquisition will also be returned.

    Returns
    -------
    exposure: dict or float
        The exposure time of the desired channel. If multiple channels are
        given, this will be a tuple of the exposure times. If return_date is
        True, the date will also be in this dictionary.
   """

    # Open the metadata file.
    with open(fname, 'r') as f:
        metadata = json.load(f)

    

    # Get a list of the keys in the metadata file.
    keys = metadata.keys()

    # Determine if a single channel or multiple channel exposures are desired.
    if (type(channels) != tuple) & (type(channels) != str):
        raise TypeError('desired channels must be a tuple or a string.')
    else:
        if type(channels) == str:
            num_channels = 1
            channels = (channels)
            exposure = None
        else:
            num_channels = len(channels)
            exposure = []

    # Loop through each desired channel and scrape the exposure.
    for i in range(num_channels):
        for k in keys:
            try:
                chan = metadata[k]['Channel']
                if chan.lower() == channels[i].lower():
                    _exposure = metadata[k]['Exposure-ms']
                if num_channels == 1:
                    exposure = _exposure
                else:
                    if i == 0:
                        exposure = {channels[i] + '_exp_ms': _exposure}
                    else:
                        exposure[channels[i] + '_exp_ms'] = _exposure
            except:
                pass

    if return_date is True:
        # Get the date from the Summary field.
        date = metadata['Summary']['Date'].split('-')
        date = ''.join(date)
        exposure['date'] = date
    return exposure


def save_seg(fname, image, mask, fill_contours=True, ip_dist=0.160,
             bar_length=10, title=None, colormap='hls'):
    """
    Saves a merge of a segmentation mask and the original image for a
    sanity check.

    Parameters
    ----------
    fname : str
        The file will be saved with this path.
    image : 2d-array, float
        The original image on which the segmentation mask will be overlaid.
    mask : 2d-array, bool
        Boolean segmentation mask of the original image.
    contours: bool
        If True, contours of segmented objects will be filled.
    ip_dist : float
        Interpixel distance for the image. This is used for computing the
        scalebar length.  This should be in units of microns. Default
        value is 0.160 microns per pixel.
    bar_length : int
        The length of the desired scalebar in units of microns.
    title : str, optional
        Title for the image.
    colormap : str
        Colormap for labeling the objects. Default is the high-contrast
        'hls'. This can take any standard colormap string.

    Return
    ------
    fig : Matplotlib Figure object
        Figure containing the axis of the plotted image.
    """

    # Make copies of the image and mask.
    image_copy = np.copy(image)
    mask_copy = np.copy(mask)

    # Burn the scalebar into the upper-left hand  of th image.
    num_pix = int(bar_length / ip_dist)
    image = (image_copy - image_copy.min()) /\
            (image_copy.max() - image_copy.min())
    image[10:20, 10:10 + num_pix] = 1.0

    # Make sure the mask is a boolean image.
    if type(mask) != bool:
        mask = mask_copy > 0

    # Find the contours of the mask.
    conts = skimage.measure.find_contours(mask, 0)

    # Plot the image and generate the contours.
    with sns.axes_style('white'):
        fig = plt.figure()
        plt.imshow(image, cmap=plt.cm.Greys_r)

        # Plot all of the contours
        colors = sns.color_palette(colormap, n_colors=len(conts))
        for i, c in enumerate(conts):
            plt.plot(c[:, 1], c[:, 0], color=colors[i], lw=0.75)
            if fill_contours is True:
                plt.fill(c[:, 1], c[:, 0], color=colors[i], alpha=0.5)

        # Remove the axes.
        plt.xticks([])
        plt.yticks([])

        # Add title if provided.
        if title is not None:
            plt.title(title)

        # Tighten up and save the image.
        plt.tight_layout()
        plt.savefig(fname, bbox_inches='tight')
        plt.close()
    return fig


def show_connections(fname, image, data, title=None, bar_length=10,
                     ip_dist=0.16):
    """
    Saves the original phase contrast image with the segmented
    centroids and the manually recorded markers linked by lines.

    Parameters
    ----------
    fname : str
        Filename to save the image wish shown connections between
        segmented object centroids and the markers.
    image : 2d-array
        Original phase contrast image over which the points will
        be drawn
    data : Pandas DataFrame
        DataFrame containing the marker x and y positions and the
        centroid x and y positions.
    title : str
        Title to be applied to the image. If not specified, none will
        be included.
    bar_length : int
        Length of the scalebar in units of microns. Default value
        is 10.
    ip_dist : float
        Interpixel distance of the image. This should be in units of
        microns per pixel. Default value is 0.16 microns per pixel.

    Returns
    -------
    fig : Matplotlib Figure Canvas
        Figure canvas of the plot.
    """
    # Add the scale bar to the image.
    if image.max() > 1:
        image = (image - image.min()) / (image.max() - image.min())
    num_pix = int(bar_length / ip_dist)
    image_copy = np.copy(image)
    image_copy[10:20, 10:10 + num_pix] = 1.0

    # Define the colors for survivors and corpses.
    colors = {False: '#D56C55', True: '#08AADE'}

    # Group the DataFrame by survival.
    grouped = pd.groupby(data, 'survival')

    # Show the image
    with sns.axes_style('white'):
        fig = plt.figure()
        plt.imshow(image_copy, cmap=plt.cm.Greys_r)
        plt.plot([], [], '-o', ms=3, lw=1, color=colors[True],
                 label='survivor')
        plt.plot([], [], '-o', ms=3, lw=1, color=colors[False], label='goner')
        plt.legend(loc='lower left')
        for g, d in grouped:
            for i in range(len(d)):
                # Parse the positions
                m_x = d.iloc[i]['x_pos']
                m_y = d.iloc[i]['y_pos']
                c_x = d.iloc[i]['label_cent_x']
                c_y = d.iloc[i]['label_cent_y']
                # Plot the connections.
                plt.plot((m_x, c_x), (m_y, c_y), '-', ms=3, lw=1,
                         color=colors[g])
                plt.plot(m_x, m_y, 'o', ms=3, lw=1, color=colors[g])
                plt.plot(c_x, c_y, 'o', ms=3, markerfacecolor='w',
                         markeredgecolor=colors[g], markeredgewidth=1)

        # Format the axes
        plt.xticks([])
        plt.yticks([])

        # Add a title if necessary.
        if title is not None:
            plt.title(title, fontsize=12)
        plt.savefig(fname, bbox_inches='tight')
    return fig

## Reprocessing the data

Life is pretty miserable with the current data structure. I'm going to reorganize all of the data I have in to a tidier format with standardized names so that I don't have to include all of these separate special cases. The entire directory `/data/reorganized_data/` is set with two subdirectories, `shock_data` and `calibration_data`. The `shock_data` has all of the actual survival measurements (images + markers) with the following folder pattern:

* `YYYYMMDD_RBS_PRE/POST_PUMPRATE_SHOCKRATE_SETNUMBER`

There is a mix of `.ome.tiff` files and normal `.tiff` files, so I'll need to construct the loop to take care of that. The intensity calibration images have the following pattern:

* `YYYYMMDD_RBS_PRE/POST_SETNUMBER`


There is one special case I will have to deal with. For the shock data, there is one experiment `20170316_sd6_pre_100ulmin_1.00hz_0000` that only has three marker files. The rest of the fields had no survivors, so HJ didn't spend his time marking them. Unfortunately, I will have to deal with this one as a special case.



For total clarity, the pseudocode for the complete processing should be as follows. 

1. For each `folder` in `shock_data`
    1. Determine if sample is composed of `.ome.tiff` or `.tiff`
    2. Load all fluorescence images and flatten them.
    3. For each `image` in `set_images`:
        1. Segment via contouring in phase. 
        2. Compute mean background fluorescence.
        3. Compute background subtraction and compute areal intensity.
        4. Load `marker` files and match survival to intensity
        5. Compute and save a segmentation mask and connection figure.
        5. Store information in `survival_data` pandas DataFrame.
       

With the pseudocode out of the way, let's actually do it.  

##  Executing the processing.

In [160]:
# Define the data directory. 
data_dir = 'data/reorganized_data/shock_data/'
shock_folders = glob.glob(data_dir + '2017*')

dfs = []
for i, folder in enumerate(shock_folders):
    # Determine the file identifiers.
    split_files = folder.split('/')[-1].split('_')
    DATE, RBS, _PREPOST , PUMPRATE, SHOCKRATE, _NUM = split_files
    
    # Determine if the subfolder is composed of `Pos` folders or `.ome.tif'
    tif_files = glob.glob(folder + '/*.ome.tif*')
    if len(tif_files) > 0: 
        _ims = skimage.io.ImageCollection(tif_files)
        phase_ims = [i[0] for i in _ims]
        fluo_ims = [i[1] for i in _ims] 
        metadata = glob.glob(folder + '/*.txt')
    else:
        bf_files = glob.glob(folder + '/Pos*/*Brightfield*.tif')
        gfp_files = glob.glob(folder + '/Pos*/*GFP*.tif')
        phase_ims = skimage.io.ImageCollection(bf_files)
        fluo_ims = skimage.io.ImageCollection(gfp_files)
        metadata = glob.glob(folder + '/Pos*/*.txt')
        
    # Load the marker files
    markers = glob.glob(folder + '/*.xml')       

    # Generate the average illumination profile. 
    ff_ims = median_flatfield(fluo_ims, medfilter=True)
    
    # Loop through each phase file and segment. 
    print('Processing {0} from {1} with flow rate {2}.'.format(RBS, DATE, SHOCKRATE))
    for j, ph in enumerate(phase_ims):
        seg = contour_seg(ph)
        # Parse the marker file. 
        _markers = marker_parse(markers[j])
        _df = link_markers(_markers, seg, ff_ims[j])
        
        # Scrape the metadata for the exposure. 
        exposure = scrape_metadata(metadata[j], return_date=False)
        
        # Compute the mean background intensity. 
        mean_bg = compute_mean_bg(ph, ff_ims[j])
        mean_bg = mean_bg / ip_dist**2
        
        # Prune the data frame
        _df['date'] = DATE
        _df['rbs'] = RBS
        _df['pump_rate'] = PUMPRATE.split('u')[0]
        _df['flow_rate'] = SHOCKRATE.split('hz')[0]
        _df['mean_bg'] = mean_bg
        _df['exposure_ms'] = exposure['GFP_exp_ms']
        
        # Append the data frame to the global list. 
        dfs.append(_df)

df = pd.concat(dfs, axis=0)
df.drop(['x_pos', 'y_pos', 'mask_label', 'label_cent_x','label_cent_y', 'dist'],
       axis=1, inplace=True)



Processing 16sd0 from 20170324 with flow rate 0.02hz.
Processing sd1 from 20170421 with flow rate 00.71hz.


  warn("Only one label was provided to `remove_small_objects`. "


Processing sd1 from 20170424 with flow rate 1.92hz.
Processing 10sd1 from 20170426 with flow rate 0.005hz.
Processing 10sd1 from 20170428 with flow rate 01.88hz.
Processing sd2 from 20170502 with flow rate 00.02hz.
Processing sd2 from 20170503 with flow rate 00.2hz.
Processing sd2 from 20170509 with flow rate 02.20hz.
Processing sd2 from 20170517 with flow rate 02.17hz.
Processing 12sd2 from 20170518 with flow rate 02.00hz.
Processing 12sd2 from 20170519 with flow rate 00.50hz.
Processing sd1 from 20170525 with flow rate 00.01hz.
Processing sd4 from 20170714 with flow rate 00.018hz.


In [161]:
# Deal with the special case of sd6
data_dir = 'data/reorganized_data/special_case/20170316_sd6_pre_100ulmin_1.00hz_0000/'
files = glob.glob(data_dir + '/*ome.tif')
_ims = skimage.io.ImageCollection(files)
phase_ims = [z[0] for z in _ims]
fluo_ims = [z[1] for z in _ims]
meta_data = glob.glob(data_dir + '/*metadata.txt')
special_dfs = []
ip_dist = 0.16

# Grab the markers. 
markers = glob.glob(data_dir + '/*.xml')

# split_files = data_dir.split('/')[-2].split('_')
DATE, RBS, _PREPOST , PUMPRATE, SHOCKRATE, _NUM = data_dir.split('/')[-2].split('_')

# filter the images. 
ff_ims = median_flatfield(fluo_ims)

for i, ph in enumerate(phase_ims):
    seg = contour_seg(ph)
 
    # Compute the mean background intensity. 
    mean_bg = compute_mean_bg(ph, ff_ims[i])
    mean_bg = mean_bg / ip_dist**2
    
    # Scrape the metadata for the exposure. 
    exposure = scrape_metadata(metadata[i], return_date=False)
   
    for j, m in enumerate(markers):
        m_pos = m.split('/')[-1].split('.')[0]
        ph_pos = files[i].split('/')[-1].split('_')[-1].split('.')[0]
        if m_pos == ph_pos:
            # Parse the marker file. 
            _markers = marker_parse(markers[j])
            _df = link_markers(_markers, seg, ff_ims[i])
            _df.drop(['x_pos', 'y_pos', 'mask_label', 'label_cent_x','label_cent_y', 'dist'],
               axis=1, inplace=True)
            # Append the data frame to the global list. 
           
        else:
            props = skimage.measure.regionprops(seg, ff_ims[i])
            area = [prop.area * ip_dist**2 for prop in props]
            intensity = [prop.mean_intensity / ip_dist**2 for prop in props]
            _df = pd.DataFrame([intensity, area]).T
            _df.columns = ['intensity', 'area']
            _df['survival'] = False 
        # Prune the data frame
        _df['date'] = DATE
        _df['rbs'] = RBS
        _df['pump_rate'] = PUMPRATE.split('u')[0]
        _df['flow_rate'] = SHOCKRATE.split('hz')[0]
        _df['mean_bg'] = mean_bg
        _df['exposure_ms'] = exposure['GFP_exp_ms']       
        
        
        
        special_dfs.append(_df)
        
special_df = pd.concat(special_dfs, axis=0)



In [165]:
# Merge the two data frames together. 
data = pd.concat([df, special_df], axis=0, ignore_index=True)

# Rescale the intensity and subtract the background. 
data['intensity'] = data['intensity'] - data['mean_bg']
max_exp = np.max(data['exposure_ms'].unique())
data['rescaled_intensity'] = data['intensity'] * (max_exp / data['exposure_ms'])

# Save the dataframe. 
data.to_csv('data/20170814_reprocessed_data.csv')

In [6]:
data = pd.read_csv('data/20170814_reprocessed_data.csv')
max_exp = np.max(data['exposure_ms'].unique())

## Reprocessing the intensity calibration data.  

I should also reprocess the data used to calibrate the intensity and correct any errors in the measurement of some stratins (such as `sd1` and `10sd1`).  

In [27]:
# Load the files from the intensity calibration samples. 
data_dir = 'data/reorganized_data/calibration_data/'
cal_folders = glob.glob(data_dir + '/2017*')
ip_dist = 0.16

dfs = []
for i, folder in enumerate(cal_folders):
    # Determine the file identifiers.
    split_files = folder.split('/')[-1].split('_')
    DATE, RBS, _PREPOST ,_NUM = split_files
    
    # Determine if the subfolder is composed of `Pos` folders or `.ome.tif'
    tif_files = glob.glob(folder + '/*.ome.tif*')
    if len(tif_files) > 0: 
        _ims = skimage.io.ImageCollection(tif_files)
        phase_ims = [z[0] for z in _ims]
        fluo_ims = [z[1] for z in _ims] 
        metadata = glob.glob(folder + '/*.txt')
    else:
        bf_files = glob.glob(folder + '/Pos*/*Brightfield*.tif')
        gfp_files = glob.glob(folder + '/Pos*/*GFP*.tif')
        phase_ims = skimage.io.ImageCollection(bf_files)
        fluo_ims = skimage.io.ImageCollection(gfp_files)
        metadata = glob.glob(folder + '/Pos*/*.txt')
    
    # Generate the average illumination profile. 
    ff_ims = median_flatfield(fluo_ims, medfilter=True)
    
    # Segment and extract the important information. 
    for j, ph in enumerate(phase_ims):
        seg = contour_seg(ph)
        seg, num_obj = skimage.measure.label(seg > 0, return_num=True)
        if num_obj > 0:
            mean_bg = compute_mean_bg(ph, ff_ims[j])
            mean_bg = mean_bg / ip_dist**2
            exposure = scrape_metadata(metadata[j], return_date=False)

            # Compute the important properties. 
            props = skimage.measure.regionprops(seg, ff_ims[j])
            intensity = [prop.mean_intensity / ip_dist**2 for prop in props]
            area = [prop.area * ip_dist**2 for prop in props]
            
            # Generate dataframe
            _df = pd.DataFrame([intensity, area]).T
            _df.columns = ['intensity', 'area']
            _df['date'] = DATE
            _df['rbs'] = RBS
            _df['mean_bg'] = mean_bg
            _df['exposure_ms'] = exposure['GFP_exp_ms']    
            dfs.append(_df)
   

# Prune and save the data frame. 
cal_data = pd.concat(dfs, axis=0, ignore_index=True)
cal_data['intensity'] = cal_data['intensity'] - cal_data['mean_bg']
cal_data['rescaled_intensity'] = cal_data['intensity'] * (max_exp / cal_data['exposure_ms'])
cal_data.to_csv('data/20170814_calibration_data.csv', index=False)

  warn("Only one label was provided to `remove_small_objects`. "


## And with that... 

I can analyze it in another notebook 