# Purpose: Pig Cell Analysis

### Purpose: To segment and quantify features of all the pig data

Created by: Hawley Helmbrecht

Creation Date: 06/4/2021

Last Update: 

Notes:
    
    I did this process 3 times. First only using one of the channels. Second time, using a max intensity projection of all the channels since they appear to all be GFAP. And the third time with the max intensity projection and then with the binary fill holes after the remove small objects.
    
    I will do a qualitative check of segmentation accuracy to chose between these three for the VAMPIRE analysis and data visualization later. The current notebook shows the process for the 3rd option.

*Step 1: Import Necessary Packages*

In [16]:
import numpy as np
import pandas as pd
from scipy import ndimage

import skimage.filters
from skimage import morphology
from skimage.measure import label, regionprops, regionprops_table
from skimage.color import label2rgb
from skimage import io
from skimage import measure 

import matplotlib.pyplot as plt
import matplotlib.patches as mpatches

import watermark
import os
from PIL import Image

*Step 2: User Inputs*

In [8]:
#replace the example path from my computer with the path to the image on your computer

cell_folder = '/Users/hhelmbre/Desktop/Hawley_Pig_Data'

image_type = '.tif'

*Step 3: Defining a Folder Cleaner Function to only Return Tif Images*

In [7]:
def folder_cleaner(folder, image_type):
    k=0
    for files in folder:
        if image_type in str(files):
            k+=1
        else:
            folder = np.delete(folder, np.argwhere(folder == str(files)))
    return folder

*Step 4: Get All Images in the Folder*

In [12]:
arr = os.listdir(cell_folder)
file_list = np.asarray(arr)
file_list = folder_cleaner(file_list, image_type)

In [13]:
file_list

array(['2347_PC_20x_F7.tif', '2205_PC_20x_F8.tif', '2441_PC_20x_F8.tif',
       '2346_PC_20x_F8.tif', '2347_PC_20x_F6.tif', '2347_PC_20x_F4.tif',
       '2347_PC_20x_F5.tif', '2347_PC_20x_F1.tif', '2347_PC_20x_F2.tif',
       '2206_PC_20x_F8.tif', '2347_PC_20x_F3.tif', '2413_PC_20x_F4.tif',
       '2317_PC_20x_F1.tif', '2414_PC_20x_F1.tif', '2413_PC_20x_F5.tif',
       '2313_PC_20x_F1.tif', '2313_PC_20x_F3.tif', '2413_PC_20x_F7.tif',
       '2306_PC_20x_F8.tif', '2317_PC_20x_F2.tif', '2414_PC_20x_F3.tif',
       '2414_PC_20x_F2.tif', '2317_PC_20x_F3.tif', '2413_PC_20x_F6.tif',
       '2313_PC_20x_F2.tif', '2413_PC_20x_F2.tif', '2313_PC_20x_F6.tif',
       '2312_PC_20x_F8.tif', '2317_PC_20x_F7.tif', '2414_PC_20x_F6.tif',
       '2414_PC_20x_F7.tif', '2316_PC_20x_F8.tif', '2317_PC_20x_F6.tif',
       '2313_PC_20x_F7.tif', '2413_PC_20x_F3.tif', '2413_PC_20x_F1.tif',
       '2313_PC_20x_F5.tif', '2317_PC_20x_F4.tif', '2414_PC_20x_F5.tif',
       '2414_PC_20x_F4.tif', '2317_PC_20x_F5.tif', 

*Step 5: Segmenting and Calculating Region Features on All Images*

In [6]:
properties_list = ('area', 'bbox_area', 'centroid', 'convex_area', 
                   'eccentricity', 'equivalent_diameter', 'euler_number', 
                   'extent', 'filled_area', 'major_axis_length', 
                   'minor_axis_length', 'orientation', 'perimeter', 'solidity')


In [92]:
j = 0
for names in file_list:
        
    try:
        cell_im = io.imread(str(cell_folder + '/' + names))
        print('name', names, 'shape', cell_im.shape)
        shape = cell_im.shape
        
        #Some of the images did not have 3 channels but still appear to include only the GFAP stain
        if len(shape) == 3:
            green_cell_im = np.max(cell_im, axis=2)
        else:
            green_cell_im = cell_im
    
        thresh_li = skimage.filters.threshold_li(green_cell_im)
        binary_li = green_cell_im > thresh_li
        new_binary_li = morphology.remove_small_objects(binary_li, min_size=500)
        new_binary_li = ndimage.binary_fill_holes(new_binary_li)
        label_image = label(new_binary_li)
        
        #Saving the Segmented Image
        cell_im_name = str(cell_folder + '/' + names)
        size = len(cell_im_name)
        cell_im_mod = cell_im_name[:size - 4]
        im_to_save = Image.fromarray(new_binary_li)
        im_to_save.save(str(cell_im_mod + '_' + 'segmented.png'))
        
        #Feel free to add them here as well. The computational time is pretty efficient
        props = measure.regionprops_table(label_image, properties=(properties_list))

        if j == 0:
            df = pd.DataFrame(props)
            df['filename'] = names
        else:
            df2 = pd.DataFrame(props)
            df2['filename'] = names
            df = df.append(df2)
            
    except FileNotFoundError:
        continue

    j = 1

name 2347_PC_20x_F7.tif shape (1456, 1936, 3)
name 2205_PC_20x_F8.tif shape (1456, 1936, 3)
name 2441_PC_20x_F8.tif shape (1456, 1936, 3)
name 2346_PC_20x_F8.tif shape (1456, 1936, 3)
name 2347_PC_20x_F6.tif shape (1456, 1936, 3)
name 2347_PC_20x_F4.tif shape (1456, 1936, 3)
name 2347_PC_20x_F5.tif shape (1456, 1936, 3)
name 2347_PC_20x_F1.tif shape (1456, 1936, 3)
name 2347_PC_20x_F2.tif shape (1456, 1936, 3)
name 2206_PC_20x_F8.tif shape (1456, 1936, 3)
name 2347_PC_20x_F3.tif shape (1456, 1936, 3)
name 2413_PC_20x_F4.tif shape (1456, 1936, 3)
name 2317_PC_20x_F1.tif shape (1456, 1936, 3)
name 2414_PC_20x_F1.tif shape (1456, 1936, 3)
name 2413_PC_20x_F5.tif shape (1456, 1936, 3)
name 2313_PC_20x_F1.tif shape (1456, 1936, 3)
name 2313_PC_20x_F3.tif shape (1456, 1936, 3)
name 2413_PC_20x_F7.tif shape (1456, 1936, 3)
name 2306_PC_20x_F8.tif shape (1456, 1936, 3)
name 2317_PC_20x_F2.tif shape (1456, 1936, 3)
name 2414_PC_20x_F3.tif shape (1456, 1936, 3)
name 2414_PC_20x_F2.tif shape (145

*Step 6: Caculating the Circularity*

In [93]:
df['circularity'] = 4*np.pi*df.area/df.perimeter**2

*Step 7: Calculating the Aspect Ratio*

In [94]:
df['aspect_ratio'] = df.major_axis_length/df.minor_axis_length

*Step 8: Add in a column for the ID*

In [95]:
df['sample_id'] = df.filename.str[:4]

*Step 9: Add in a column for the image number*

In [96]:
df['image_id'] = df.filename.str[-6:]

In [97]:
df['image_id'] = df.image_id.str[:2]

In [98]:
df

Unnamed: 0,area,bbox_area,centroid-0,centroid-1,convex_area,eccentricity,equivalent_diameter,euler_number,extent,filled_area,major_axis_length,minor_axis_length,orientation,perimeter,solidity,filename,circularity,aspect_ratio,sample_id,image_id
0,2214,7872,52.816621,438.181572,6072,0.756218,53.093807,1,0.281250,2214,98.905970,64.716083,-1.378342,712.973701,0.364625,2347_PC_20x_F7.tif,0.054732,1.528306,2347,F7
1,1860,10137,57.584409,1145.682796,6349,0.857788,48.664418,1,0.183486,1860,130.325187,66.987691,-0.955411,682.387914,0.292960,2347_PC_20x_F7.tif,0.050195,1.945509,2347,F7
2,2167,7920,78.638210,1412.350254,5505,0.724771,52.527232,1,0.273611,2167,96.617310,66.568297,0.838682,662.967604,0.393642,2347_PC_20x_F7.tif,0.061956,1.451401,2347,F7
3,4070,25122,133.615233,1566.195823,14829,0.608844,71.986700,1,0.162009,4070,157.049973,124.586121,-0.076239,1072.928066,0.274462,2347_PC_20x_F7.tif,0.044429,1.260574,2347,F7
4,635,2016,59.414173,1738.340157,1288,0.893850,28.434259,1,0.314980,635,56.228069,25.210805,-1.497795,250.450793,0.493012,2347_PC_20x_F7.tif,0.127215,2.230316,2347,F7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
115,2039,14688,1416.466405,373.740069,7474,0.932497,50.952286,1,0.138821,2039,174.500449,63.025564,-1.021974,904.506709,0.272812,2346_PC_20x_F4.tif,0.031319,2.768725,2346,F4
116,1240,7137,1399.333065,843.073387,4448,0.952026,39.734331,1,0.173742,1240,118.267536,36.191780,1.442191,529.635606,0.278777,2346_PC_20x_F4.tif,0.055549,3.267801,2346,F4
117,2650,11305,1410.807547,1515.081887,9121,0.903136,58.086873,1,0.234410,2650,148.643886,63.821065,-1.547570,715.724963,0.290538,2346_PC_20x_F4.tif,0.065007,2.329072,2346,F4
118,563,2562,1413.062167,16.083481,1311,0.941055,26.773753,1,0.219750,563,66.639468,22.540962,0.711355,248.001046,0.429443,2346_PC_20x_F4.tif,0.115030,2.956372,2346,F4


*Step 10: Saving as a CSV file*

In [99]:
df.to_csv('/Users/hhelmbre/Desktop/Hawley_Pig_Data/06_04_2021_allim_features_maxint_fill2.csv')

*Step 11: Print Dependencies and State*

In [73]:
%load_ext watermark

%watermark -v -m -p numpy,pandas,scipy,skimage,matplotlib,wget

%watermark -u -n -t -z

The watermark extension is already loaded. To reload it, use:
  %reload_ext watermark
Python implementation: CPython
Python version       : 3.7.4
IPython version      : 7.8.0

numpy     : 1.17.2
pandas    : 0.25.1
scipy     : 1.3.1
skimage   : 0.17.2
matplotlib: 3.1.1
wget      : not installed

Compiler    : Clang 4.0.1 (tags/RELEASE_401/final)
OS          : Darwin
Release     : 20.3.0
Machine     : x86_64
Processor   : i386
CPU cores   : 8
Architecture: 64bit

Last updated: Fri Jun 04 2021 10:17:05MST

