In [None]:
import numpy as np
import pandas as pd
import glob
import os

import skimage
import skimage.filters
import skimage.io
import skimage.morphology
import skimage.segmentation

import matplotlib
import matplotlib.pyplot as plt

import scipy.ndimage

import bebi103

import bokeh
bokeh.io.output_notebook()

# code borrowed from http://justinbois.github.io/bootcamp/2016/lessons/l40_practice_image_processing_solution.html, 
# and from be/bi 103 2017 tutorials 9a and 9b also by justin bois

In [None]:
# function defintions

def bebi103_thresh(im, selem, white_true=True, k_range=(0.5, 1.5):
    """
    Threshold image as described above.  Morphological mean filter is 
    applied using selem.
    """    
    # Determine comparison operator
    if white_true:
        compare = np.greater
        sign = -1
    else:
        compare = np.less
        sign = 1
    
    # Do the mean filter
    im_mean = skimage.filters.rank.mean(im, selem)

    # Compute number of pixels in binary image as a function of k
    k = np.linspace(k_range[0], k_range[1], 100)
    n_pix = np.empty_like(k)
    for i in range(len(k)):
        n_pix[i] = compare(im, k[i] * im_mean).sum() 

    # Compute rough second derivative
    dn_pix_dk2 = np.diff(np.diff(n_pix))

    # Find index of maximal second derivative
    max_ind = np.argmax(sign * dn_pix_dk2)

    # Use this index to set k
    k_opt = k[max_ind - sign * 2]

    # Threshold with this k
    im_bw = compare(im, k_opt * im_mean)

    return im_bw, k_opt

def aei_filter(im, area_bounds, ecc_bounds, int_bounds, int_im):
    """
    Filters objects in an image based on their areas.
    
    Parameters
    ----------
    im : 2d-array, int
        Labeled segmentation mask to be filtered. 
    area_bounds : tuple of ints
        Range of areas in which acceptable objects exist. This should be 
        provided in units of square pixels.
    ecc_bounds : tuple of floats 
        Range of eccentricities in which acceptable objects exist. This should be 
        provided on the range of 0 to 1.0.
    int_bounds : tuple of ints
        Range of intensities in which acceptable objects exist. 
        
    Returns
    -------
    im_relab : 2d-array, int
        The relabeled, filtered image.
    """
    
    # Extract the region props of the objects. 
    props = skimage.measure.regionprops(im, intensity_image = int_im)
    
    # Extract the areas and labels.
    areas = np.array([prop.area for prop in props])
    eccs = np.array([prop.eccentricity for prop in props])
    labels = np.array([prop.label for prop in props])
    mean_int = np.array([prop.mean_intensity for prop in props])
    
    # Make an empty image to add the approved cells.
    im_approved = np.zeros_like(im)
    
    # Threshold the objects based on area and eccentricity
    for i, _ in enumerate(areas):
        if areas[i] > area_bounds[0] and areas[i] < area_bounds[1]\
            and eccs[i] > ecc_bounds[0] and eccs[i] < ecc_bounds[1]\
            and mean_int[i] > int_bounds[0] and mean_int[i] < int_bounds[1]:
                im_approved += im==labels[i]
   
    # Relabel the image.
    im_filt = skimage.measure.label(im_approved > 0)

    return im_filt

In [None]:
# Define constants

DATE = 20181002
RUN = 'r1'
TEMP = 37  # in C
CARBON = 'glucose'
OPERATOR = 'o2'
BASENAME = str(DATE)+str(TEMP)+CARBON+OPERATOR

folder = './snaps/' 
extension = '.tif'
save_dir = './snaps_output/'
save_df_name = 'full_snaps_df.csv' # e.g. 'snaps_df_02.csv', 'snaps_df_3.csv', 'snaps_df_48.csv'

if os.path.exists(save_dir) == False:
    os.mkdir(save_dir)

ip = .065 # interpixel distance

area_bounds = (1/ip**2, 10.0/ip**2) # acceptable area range
ecc_bounds = (0.8, 1.0) # eccentricity range
int_bounds = (0, 400) # intensity range, e.g. 250 for atc = 0-2, 400 for atc = 3, 500 for atc = 4-8


# Glob phase constrast images
im_list = glob.glob(folder + '*c1' + extension)
im_list = np.sort(im_list)

### Segmentation Method 2: Thresholding

In [None]:
snap_dfs = []
for i, s in enumerate(im_list):
    print(i+1)
    
    filename = s.split('/')[-1]
    strain, info = filename.split('_')[-2:]
    atc_conc = info[0:2]
    pos = info[8:10]

    if int(atc_conc) == 3:

        im = skimage.io.imread(s) # read in phase contrast image

        im_thresh, k = bebi103_thresh(im, skimage.morphology.disk(25), white_true=False) # threshold to segment im

        selem = skimage.morphology.disk(2) # Structuring element is radius 2 disk
        im_bw_opened = skimage.morphology.binary_opening(im_thresh, selem) # Erode binary image

        im_bw = skimage.segmentation.clear_border(im_bw_opened, buffer_size=5) # clear border

        im_labeled, n_labels = skimage.measure.label(im_bw, background=0, return_num=True) # Label the objects.

        # read in mCherry image for aei filter
        int_im = skimage.io.imread(folder + filename.split('.tif')[0][:-2]+'c3.tif')

        # Apply the area, eccentricity, max intensity bounds. 
        im_filt = aei_filter(im_labeled, area_bounds, ecc_bounds, int_bounds, int_im) 
        
        im_label, n_labels = skimage.measure.label(im_filt, background=0, return_num=True) # Relabel the image.

        # Make a stacked image, with phase contrast image and colored cell regions
        im_float = (im.astype(float) - im.min()) / (im.max() - im.min()) # normalize
        im_float = skimage.restoration.denoise_tv_chambolle(im_float, 0.05) # total variation filter

        im_color = np.copy(im_float) # what's the point of making a copy?
        im_color[im_label > 0] = 1 # Make the color image 1 wherever we have a bacterium
        im_rgb = np.dstack((im_color, im_float, im_float)) # stack the images, R, G, B

        # save stacked img
        skimage.io.imsave(fname='{}{}'.format(save_dir, filename.split('.tif')[0]+'.jpg'), arr=im_rgb) 

        # Make dataframe with cell region properties
        df = pd.DataFrame([], columns=['date', 'run', 'temp', 'carbon', 'operator', 'strain', 'atc_ngml', 'position',
                                       'cell_id', 'area_um','area_pix','mean_intensity'])

        if n_labels > 0:
            im_r = skimage.io.imread(folder + filename.split('.tif')[0][:-2]+'c3.tif') # read in mCherry image
            im_r_float = (im_r - im_r.min()) / (im_r.max() - im_r.min()) # normalize
            im_r_props = skimage.measure.regionprops(im_label, intensity_image=im_r) # measure properties of cells

            for prop in im_r_props:
                df = df.append({'date': DATE, 'run': RUN, 'operator': OPERATOR, 'carbon': CARBON, 'temp': TEMP,
                                'strain': strain, 'atc_ngml': atc_conc, 'position': pos, 'cell_id': prop.label, 
                                'area_um': prop.area*ip**2, 'area_pix': prop.area, 
                                'mean_intensity': int(prop.mean_intensity)},
                               ignore_index=True)
        else:
            print('skipped '+filename)

        snap_dfs.append(df)

snap_df = pd.concat(snap_dfs, ignore_index=True)

snap_df.to_csv(save_dir+save_df_name, index=False)

In [None]:
# skipping the above if needed, we can load in a previously created csv
snap_df = pd.read_csv(save_dir+'full_snaps_df.csv') # or save_df_name

In [None]:
# gather and combine the three dfs
snap_df_02 = pd.read_csv('./snaps_output_0-2/'+'snaps_df_02.csv')
snap_df_3 = pd.read_csv('./snaps_output_3/'+'snaps_df_3.csv')
snap_df_48 = pd.read_csv('./snaps_output_4-8/'+'snaps_df_48.csv')
snap_df = pd.concat([snap_df_02,snap_df_3,snap_df_48])
# save full dataframe
snap_df.to_csv('./snaps/'+'full_snaps_df.csv', index=False)

In [None]:
# convert areas and intensity to floats
snap_df[['area_um','area_pix','mean_intensity']] = snap_df[['area_um','area_pix','mean_intensity']].astype(float)

# group by date, strain, and atc concentration, and average intensity per pixel over positions and cell regions
calc_df = snap_df.groupby(['date','strain','atc_ngml']).mean_intensity.agg(['mean', 'sem']).reset_index()
calc_df.rename(columns = {'mean':'int_mean','sem':'int_sem'}, inplace = True)

# Plot mean intensities vs atc concentration
# code adapted from analysis in fold_change_plots, lines 44-54
plt.errorbar(calc_df['atc_ngml'],calc_df['int_mean'], yerr=calc_df['int_sem'], marker='o', lw=1)
plt.xlabel('ATC [ng/ml]')
plt.ylabel('Intensity')
plt.title('mCherry Intensity vs ATC Concentration')
plt.savefig('./snaps/'+'mCherry_aTc_titration.png', bbox_inches='tight')

calc_df.to_csv('./snaps/'+'snaps_titration_df.csv', index=False)

In [None]:
calc_df

In [None]:
# Plots of frequencies of mean_intensity of cell regions by image, for all images. 
# To reveal issues in the segmentation/filtering. 
# (Beware: 70 total images for 20181002)

grouped = snap_df_trouble.groupby(['strain','atc_ngml','position']).mean_intensity
plots = [bebi103.viz.ecdf(data, x_axis_label=str(group)+' intensity') for group, data in grouped]
bokeh.io.show(bokeh.layouts.gridplot(plots, ncols=2))