In [None]:
# Basic imports
import numpy as np
import pandas as pd
import skimage

import glob
import os

import numba
import time
import datetime

import cellseg

# High-throughput in vitro image segmentation workflow

The purpose of this code is to analyze a group of in vitro images for their overall transduction frequency and their brightness, to compare different virus based on their affinity to a receptor.

This code was written by David Goertsen in the  Gradinaru Lab, 2022, to analyze images from the following publication: 

---

For the pipeline, files are organized in a folders by the Round (which are generally a group of experiments), Plate (which would contain at most 96 conditions), and Well (which would be a specific condition). To analyze this data, we first want to use a look up table to obtain the `Round`, `Plate`, and `Well` data.  

In [None]:
# Read the CSV file into a pandas dataframe
df_lut = pd.read_csv('data/quantification/20220829_data_lookup_table.csv', comment='#')

If you are running your own images, or want to run a subset of the images, the following code block facilitates this sort of analysis. 

In [None]:
## To run a specific round/plate/well:
# inds = ((df_lut['Round'] == 3) & (df_lut['Plate'] == 7) & (df_lut['Well'] == 59))
# df_lut = df_lut.loc[inds]

## To run all cells up to a point in the table, use the following: 
# df_lut = df_lut.loc[inds]
# df_lut = df_lut[:8023]

Not all of the wells are filled, so we want to make sure the `Include` condition is set for these data. If an imaging error occured, you can exclude those wells in the Look Up Table.

In [None]:
# Only consider the wells marked to include
inds = (df_lut['Include'] == True) 
df_lut = df_lut.loc[inds]

#Display the table
df_lut.head()

Next, we want to create the dataframe that we will use for the analysis. If you are continuing a previously interrupted analysis, you can instead set the dataframe as what was used previously. 

In [None]:
# Initialize a dataframe
df = pd.DataFrame(columns=['Date',
                           'Round',
                           'Plate',
                           'Well',
                           'Count',
                           'Cells Quantified',
                           'Brightness List',
                           'Bright Field Area',
                           'Signal Area',
                           'Total Brightness',
                           'Median Cell Brightness',
                           '90% Confidence Interval'])

# # or, use an old dataframe (FILL IN THE NAME)
#df = pd.read_csv('data/quantification/OLD-DATAFRAME-NAME.csv', comment='#')


Then, we want to cycle through the images, get the data from them, and add it to the dataframe that we've set. We will save the dataframe after each well (just in case something fails). 

In [None]:
# Set the directory location for the images, which would then contain a series of folders as: 'data/images/round_1/plate_1/XY01/' 
directory = 'data/images'

# Cycle through the rows in the look up table
for index, row in df_lut.iterrows():
    
    #Get the file location:
    if row['Well'] < 10:
        well_directory = directory + '/round_' + str(row['Round']) + '/plate_' + str(row['Plate']) + '/XY0' + str(row['Well']) +'/'
    else:
        well_directory = directory + '/round_' + str(row['Round']) + '/plate_' + str(row['Plate']) + '/XY' + str(row['Well']) +'/'
        
    #Collect all the images from the final folder (there should be four images), although only two will be used.
    file_list = glob.glob(well_directory + '*.tif')

    # If there are less than four images, the code jumps to the next folder
    if len(file_list) == 4:
        
        # Initialize the images
        im_sig = skimage.img_as_float(skimage.io.imread(file_list[0])[:,:,1])
        im_bf = skimage.img_as_float(skimage.io.imread(file_list[1])[:,:])
        im_dapi = skimage.img_as_float(skimage.io.imread(file_list[2])[:,:,2])

        # Collect data from the image by running it through the cellseg.quant package. For more information, see that code.  
        n_cells, cell_list, cell_intensity_list, bf_area, signal_area, total_brightness, median, nintyfifth, fifth  = cellseg.quant.in_vitro_quantification(im_bf, im_sig)

        # Write all the information into a tidy dataframe
        df = df.append({'Date' : datetime.datetime.now(),
                        'Round': row['Round'],
                        'Plate': row['Plate'],
                        'Well': row['Well'], 
                        'Count' : int(n_cells),
                        'Cells Quantified' : cell_list, 
                        'Brightness List': cell_intensity_list, 
                        'Bright Field Area': bf_area,
                        'Signal Area': signal_area,
                        'Total Brightness': total_brightness,
                        'Median Cell Brightness': median, 
                        '90% Confidence Interval': [fifth, nintyfifth]},
                       ignore_index=True)

        # Save the dataframe (commented out to avoid overwriting)
        df.to_csv('data/quantification/20220901-data.csv', index=False)

    else:
        continue

    # Print that the well was completed. This could be adapted to be more intricate, but is not necessary for anything. 
    print('Analysis for round %s, plate %s, well %s completed' % (str(row['Round']), str(row['Plate']), str(row['Well'])))
