In [None]:
import scipy.misc
from scipy import ndimage
import sys
import os
import glob
import json
from collections import defaultdict
import numpy as np
import cv2
import matplotlib.pyplot as plt
import matplotlib as mpl
import pandas as pd
from keras.utils import np_utils
from keras.models import Model, model_from_json
from sklearn.metrics import f1_score, accuracy_score, recall_score, precision_score
from PIL import Image

%matplotlib inline

# Cell cropping from images

In [None]:
def file_names_confirmation(directory_path):
    
    global directory_names, directory_names_list
    
    directory_names = glob.glob(directory_path + "*")
    directory_names_list = [os.path.basename(r) for r in directory_names]
   
    print(directory_names_list)
    print(directory_names)

In [None]:
def cell_cropper_list(directory_path):
    global directory_names
    
    directory_names = glob.glob(directory_path + "*")
    print(directory_names)
    for i in directory_names:
        files = glob.glob(i + "/*")
        directory_name = os.path.basename(i)
        print(files)

In [None]:
def cell_crop_single(single_img, binimg_thred = 5., min_area=50, scale_v=25, scale_h=25, chs=0, fluoro=False):
    
    cells = np.empty((0, scale_v*2, scale_h*2, 3))
    
    img = single_img.astype(np.uint8)
    img_chs = cv2.split(img)
    img_preprocessed = cv2.GaussianBlur(img_chs[chs],(5,5),0)
    if fluoro==False:
        binimg = (img_preprocessed < np.percentile(img_preprocessed, binimg_thred))
        binimg = binimg.astype(np.uint8)
    else:
        binimg = (img_preprocessed > np.percentile(img_preprocessed, binimg_thred))
        binimg = binimg.astype(np.uint8)

    img_, contours, _ = cv2.findContours(binimg, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)

    arr=[]
    
    start=np.empty((0,2))
    start=np.append(start,np.array([[0, 0]]),axis=0)
    
    for j in contours:
        if cv2.contourArea(j)<min_area:
            continue
        x_=0
        y_=0
        for k in j:
            x_ += k[0][0]
            y_ += k[0][1]
        arr.append([x_/len(j), y_/len(j)])
    arr = np.array(arr)
    
    
    for j in range(len(arr)):
    
        if (arr[j][1] < scale_v) or (arr[j][1] > img.shape[0]-scale_v) or (arr[j][0] < scale_h) or (arr[j][0] > img.shape[1]-scale_h):
            continue 
        
        top = int(arr[j][1])-scale_v
        bottom = int(arr[j][1])+scale_v
    
        left = int(arr[j][0])-scale_h
        right = int(arr[j][0])+scale_h
    
        if left < 0:
            left = 0
            right = scale_h*2
        if right > img.shape[1]:
            right = img.shape[1]
            left = img.shape[1]-scale_h*2
    
        if top < 0:
            top = 0
            bottom = scale_v*2
        if bottom > img.shape[0]:
            bottom = img.shape[0]
            top = img.shape[0]-scale_v*2      
                
        img_crop = np.array(img[top:bottom,left:right]).reshape(scale_v*2, scale_h*2, 3).astype(np.uint8)
        img_chs = cv2.split(img_crop)
        img_preprocessed = cv2.GaussianBlur(img_chs[chs],(5,5),0)
            
        if fluoro==False:
            binimg = (img_preprocessed < np.percentile(img_preprocessed, binimg_thred))
            binimg = binimg.astype(np.uint8)
        else:
            binimg = (img_preprocessed > np.percentile(img_preprocessed, binimg_thred))
            binimg = binimg.astype(np.uint8)

        img_, contours, _ = cv2.findContours(binimg, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
            
        contourArea = []
            
        for j in contours:
            contourArea.append(cv2.contourArea(j))
        contourArea_sum = sum(contourArea)
        if contourArea_sum<min_area:
            continue
    
        cells = np.append(cells,np.array(img[top:bottom,left:right]).reshape(1,scale_v*2, scale_h*2, 3),axis=0)

    print("cropped_cell_count:", cells.shape[0])
    
    return cells    

In [None]:
def cell_crop_from_each_pic(filenames):
    
    global binimg_thred, min_area, scale_v, scale_h, chs, fluoro
    
    total_cells = np.empty((0, scale_v*2, scale_h*2, 3))
    
    for filename in filenames:
        print("filename:", filename)
        img = scipy.misc.imread(filename)
        height, width, chan = img.shape
        assert chan == 3
        cells = cell_crop_single(img, binimg_thred=binimg_thred, min_area=min_area, scale_v=scale_v, scale_h=scale_h, chs=chs, fluoro=fluoro)
        total_cells = np.append(total_cells,cells,axis=0)
    
    print("total_cropped_cell_count:", total_cells.shape[0])    
    
    return total_cells  


In [None]:
def do_cell_cropper(base_directory_name, directory_path, save_directory_npy):
    
    for i in directory_names:
        filenames = glob.glob(i + "/*")
        directory_name = os.path.basename(i)
        cells = cell_crop_from_each_pic(filenames)
        save_name = save_directory_npy + base_directory_name + "_" + directory_name
        np.save(save_name +".npy", cells)

# Confirmation for cell cropping

In [None]:
def binarization_check(check_directory, binimg_thred = 5., fluoro=False):

    check_files = glob.glob(check_directory + "/*")    

    for i in range(len(check_files)):
        picture = scipy.misc.imread(check_files[i])
        height, width, chan = picture.shape
        assert chan == 3
        img = picture.astype(np.uint8)
        
        print(check_files)
        print("")
        print("##### original picture #####")
        plt.figure(figsize=(10,7.5))
        plt.imshow(img)
        plt.show()

        img_chs = cv2.split(img)

        img_preprocessed = cv2.GaussianBlur(img_chs[chs],(5,5),0)
        if fluoro==False:
            binimg = (img_preprocessed < np.percentile(img_preprocessed, binimg_thred))
            binimg = binimg.astype(np.uint8)
        else:
            binimg = (img_preprocessed > np.percentile(img_preprocessed, binimg_thred))
            binimg = binimg.astype(np.uint8)
    
        print("")
        print("##### post binarization #####")
        plt.figure(figsize=(10,7.5))
        plt.imshow(binimg)
        plt.colorbar()
        plt.show()

In [None]:
def cell_crop_check_from_npy(save_directory_npy, npy_file_name):
    
    test_data_path = sorted(glob.glob(save_directory_npy + npy_file_name))
    file_names = [os.path.basename(r) for r in test_data_path]
    print(file_names)
    
    for j in range(len(test_data_path)):
        npy = np.load(test_data_path[j])
        rand = np.random.randint(0, len(npy), 50)
        print(file_names[j])
    
        for i in range(len(rand)):
            img = npy[rand[i]]
            rimg = 255 - img
            plt.subplot(5,10,i+1, xticks=[], yticks=[])
            plt.imshow(rimg)
    
        plt.show()


# senescence scoring


In [None]:
def model_validation_list(save_directory_result, save_directory_npy):
    
    global index_list, test_data_path, save_name_result
    
    directory_names = glob.glob(directory_path + "*")
    directory_names_list = [os.path.basename(r) for r in directory_names]
    
    test_data_directory = save_directory_npy
    index_list = sorted(directory_names_list)
    print(index_list)

    test_data_path = sorted(glob.glob(test_data_directory + npy_file_name))
    print(test_data_path)

    save_name_result = save_directory_result + base_directory_name
    print(save_name_result)

In [None]:
def preprocess_input(x0):
    return ((x0/255.)-0.5)*2.

In [None]:
def model_test(test_data_path, save_name_result, index_list):
    
    global classes_ratio, probs_mean, probs
    
    model = model_from_json(open(json_file_path).read())
    model.load_weights(h5_file_path)
    
    test_data = []
    classes = []
    classes_ratio = []

    for i in range(len(test_data_path)):
        data = preprocess_input(np.load(test_data_path[i]))
        test_data.append(data)
        pred_class =model.predict_classes(data, batch_size=100)
        classes.append(pred_class)
        classes_ratio.append(sum(pred_class)/len(pred_class))

    df_classes_ratio = pd.DataFrame({'mean':classes_ratio}, index=index_list)
    df_classes = pd.DataFrame(classes, index=index_list)
    classes_data = pd.concat([df_classes_ratio, df_classes], axis=1)
    classes_data.to_csv(save_name_result + '_with_class' + '.csv')
    df_classes_ratio.to_csv(save_name_result + '_ratio' + '.csv')    
  
    probs = []
    probs_mean = []

    for i in range(len(test_data)):
        probs.append(model.predict(test_data[i], batch_size=100, verbose=1))
        probs_mean.append(np.mean(probs[i], axis=0))
    
    probs_list = []
    probs_list_mean = []
    
    for i in range(len(test_data)):
        probs_list.append(probs[i][:,1])
        probs_list_mean.append(np.mean(probs_list[i], axis=0))
    
    df_probs_list_mean = pd.DataFrame({'mean':probs_list_mean}, index=index_list)
    df_probs_list = pd.DataFrame(probs_list, index=index_list)
    probs_data_list = pd.concat([df_probs_list_mean, df_probs_list], axis=1)
    probs_data_list.to_csv(save_name_result + '_probs_with_class' + '.csv')
    df_probs_list_mean.to_csv(save_name_result + '_probes_mean' + '.csv')
    
    
    print('classes_ratio: {0}'.format(classes_ratio))
    print('probs_list_mean: {0}'.format(probs_list_mean))
    
    left = list(range(1, (len(index_list)+1)))
    
    plt.figure(figsize=(15,5))
    plt.bar(left, classes_ratio,  tick_label=index_list, align="center")
    plt.title("classes_ratio")
    plt.xlabel("Condition")
    plt.ylabel("Classes_ratio")
    plt.grid(True)
    print(index_list)
    
    plt.figure(figsize=(15,5))
    plt.bar(left, probs_list_mean,  tick_label=index_list, align="center")
    plt.title("probs_mean")
    plt.xlabel("Condition")
    plt.ylabel("probs_mean")
    plt.grid(True)
    print(index_list)
    

# parameters

In [None]:
# directory and file name

base_directory_name = 'test_data'    #Directory name to be tested
npy_file_name = 'test_*.npy'    #Numpy file names
save_directory_npy = '/home/Demo/npy/scoring/'      #Directory path of numpy files
directory_path = "/home/Demo/pics/scoring/" + base_directory_name + "/"    #Directory path te be tested
save_directory_result = '/home/Demo/save_data/scoring/'    # Directory path to save results

directory_names = glob.glob(directory_path + "*")
directory_names_list = [os.path.basename(r) for r in directory_names]

In [None]:
#parameters for cell cropping

binimg_thred = 5   # Threshold for image binarization(recomendation value: 1 - 10)
n_chan=3               # Number of channels
chs = 0                    # Channels for image binarization (0:Red, 1:Green, 2:Blue)
fluoro = False         # True: Image binarization for fluorescent images, False: Image binarization for pahse contrast images
min_area = 50       # Minimam area for cell detection, to exclude noises)

scale_v = 25         # Image width/2 (px)
scale_h = 25         # Image height/2 (px)

In [None]:
# Load trained CNN data
json_file_path = '/home/Demo/save_data/training_save_data/test.json'
h5_file_path = '/home/Demo/save_data/training_save_data/test.h5'






# Do senescence scoring


In [None]:
#Confirmation for file names to be tested
file_names_confirmation(directory_path)

In [None]:
#Confirmation for cell cropping files
cell_cropper_list(directory_path)

In [None]:
#Do cell cropping
do_cell_cropper(base_directory_name, directory_path, save_directory_npy)

In [None]:
#Do senescence scoring
model_validation_list(save_directory_result, save_directory_npy)

print(index_list)
model_test(test_data_path, save_name_result, index_list)