# Cell segmentation and image generation

The cells represent several difficulties, such as the inconsistency of the intensity and size of cells, as well as non-differentiation in color between citoplasms that surround the cells.

## Installations and imports

Check CUDA version and GPU first

In [1]:
!python --version

Python 3.7.12


In [2]:
!nvcc --version
!nvidia-smi

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2020 NVIDIA Corporation
Built on Mon_Oct_12_20:09:46_PDT_2020
Cuda compilation tools, release 11.1, V11.1.105
Build cuda_11.1.TC455_06.29190527_0
Tue Jan 18 14:51:13 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 495.46       Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   35C    P0    27W / 250W |      0MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+------------

Install cellpose -- by default the torch GPU version is installed in COLAB notebook.

In [3]:
#!pip install folium==0.2.1
#!pip install imgaug==0.2.5
#!pip install --upgrade numpy
!pip install cellpose 
#!pip install numpy==1.19.5



import libraries and check GPU (the first time you import cellpose the models will download).

In [4]:
import numpy as np
import pandas as pd

import time, os, sys
from urllib.parse import urlparse
import skimage.io 
import matplotlib.pyplot as plt
import matplotlib as mpl

!pip install opencv-python-headless==4.1.2.30
import cv2

%matplotlib inline
mpl.rcParams['figure.dpi'] = 300

from urllib.parse import urlparse

from cellpose import models, io, core

use_GPU = core.use_gpu()
print('>>> GPU activated? %d'%use_GPU)

creating new log file
2022-01-18 14:51:26,376 [INFO] WRITING LOG OUTPUT TO /root/.cellpose/run.log
2022-01-18 14:51:34,783 [INFO] ** TORCH CUDA version installed and working. **
>>> GPU activated? 1


If error occurs in the cell above, click (Ctrl+M) and run again

In [5]:
import zipfile
from google_drive_downloader import GoogleDriveDownloader as gdd
import glob
import cv2
import time

import matplotlib
import matplotlib.pyplot as plt
import numpy as np

from skimage import data, img_as_float
from skimage import exposure

Connect Drive

In [6]:
# import library
from google.colab import drive

#mount the drive
drive.mount('/content/drive')
# go to the url and get the password for the drive

Mounted at /content/drive


Go through files of each class, run the code of segmentation, then proceed with image treatment on them.

Obs: We created two different types of treatment
- One is to normalize and pretreat the images before going through cellpose
- The other consists of pretreating only the cells generated by cellpose on the original images.

## Image treatment functions 

In [7]:
def apply_cyto_mask(img, mask, mask_with_borders, export_G_img):
  #Whenever export_G_img is True, the mask applied makes the blue channel a copy of the green one, as opposed to its own thing
  cells = []
  cell_numbers = np.unique(mask)
  cell_numbers = np.delete(cell_numbers, np.where(cell_numbers == 0))
  for i in cell_numbers:
    x, y = np.where(mask==i)
    cell = np.copy(img[np.amin(x):np.amax(x)+1, np.amin(y):np.amax(y)+1])
    cell_mask = mask[np.amin(x):np.amax(x)+1, np.amin(y):np.amax(y)+1]
    for ri in range(cell_mask.shape[0]):
      for ci in range(cell_mask.shape[1]):
        if cell_mask[ri][ci] != i:
          cell[ri][ci][2] = 0
          cell[ri][ci][1] = 0
    cell[:,:,0] = cell[:,:,1]
    if export_G_img == True:
      cell[:, :, 2] = cell[:, :, 1]
    cells.append(cell)
  return np.amax(mask)+1, cells

def exclude_borders(mask):
  m = np.copy(mask)
  is_border = m != 0
  is_border[1:m.shape[0]-1, 1:m.shape[1]-1] = False
  border_cells = np.unique(m[is_border])
  for i in border_cells:
    m[m==i] = 0
  return m
  

In [8]:
def get_cyto_image(img, mask):
    # apply equalization to nucleus and cytoplasm separetly
    # return image separeted
    cyto_img = np.copy(img[:,:,1])
    cyto_img[mask!=0]=0
    clahe = cv2.createCLAHE(clipLimit=10, tileGridSize=(8,8))
    cyto_img = clahe.apply(cyto_img)
    nuc = np.copy(img[:,:,1])
    nuc[mask==0] = 0
    nuc = clahe.apply(nuc)
    nuc[mask==0] = 0
    img_out = np.copy(img)
    img_out[:,:,1] = nuc
    img_out[:,:,2] = cyto_img
    return img_out

def create_images(class_path, class_name, export_G_img):
    # go through all the images in a class 
    # return images and their names
    # When export_G_img is true ImagesG contains all untreated original images, in parallel to images
    count = 0
    images = []
    names = []
    imagesG = []
    for img_path in sorted(glob.glob(class_path + '/*')):
        if img_path[-3:]=='jpg':
            count += 1
            img = cv2.imread(img_path)
            name = class_name + '_' + str(count)
            mask = np.load(class_path + '/Masks/' + name + '.npy')
            if export_G_img == True:
              imagesG.append(img.copy())
            img = get_cyto_image(img, mask)
            images.append(img)
            names.append(name)
    return images, names, imagesG   


In [9]:
def rename_images(class_path, class_name):
    # go through all the images in a class 
    # return images and their names
    count = 0
    for cell_path in sorted(glob.glob(class_path + '/*')):
        if cell_path[-3:]=='jpg':
            count += 1
            name = class_name + '_' + str(count)
            os.rename(cell_path, class_path+'/'+name+'.jpg')
            
def delete_folder_content(folder):
    # for a certain forder delete its contents
    for filename in os.listdir(folder):
        file_path = os.path.join(folder, filename)
        try:
            if os.path.isfile(file_path) or os.path.islink(file_path):
                os.unlink(file_path)
            elif os.path.isdir(file_path):
                shutil.rmtree(file_path)
        except Exception as e:
            print('Failed to delete %s. Reason: %s' % (file_path, e))

## Cellpose functions

In [10]:
def run_cellpose(images, names, dest_path, class_path, d, export_G_img, imgG):
  #whenever export_G_img is True the exported images are the original ones replicated in the other channels
  #They are saved as the originals were, but with a G before .npy
    model = models.Cellpose(gpu=use_GPU, model_type='cyto2')

    # define CHANNELS to run segementation on
    # grayscale=0, R=1, G=2, B=3
    # channels = [cytoplasm, nucleus]
    # if NUCLEUS channel does not exist, set the second channel to 0
    # channels = [0,0]
    # IF ALL YOUR IMAGES ARE THE SAME TYPE, you can give a list with 2 elements
    # channels = [0,0] # IF YOU HAVE GRAYSCALE
    # channels = [2,3] # IF YOU HAVE G=cytoplasm and B=nucleus
    # channels = [2,1] # IF YOU HAVE G=cytoplasm and R=nucleus

    # or if you have different types of channels in each image
    channels = [3, 2]
    # channels = [1,1]

    # if diameter is set to None, the size of the cells is estimated on a per image basis
    # you can set the average cell `diameter` in pixels yourself (recommended) 
    # diameter can be a list or a single number for all images
    masks, flows, styles, diams = model.eval(images, diameter=d, channels=channels)
   
    #delete_folder_content(class_path+"/Masks")
    # BATCHES ARE DETERMINED BY CLASS
    print("Saving images")
    n_cells = 0
    # for 1 image
    if len(masks) == 1:
        i = 0
        print(i, '-', 1)
        m = exclude_borders(masks[i])
        if export_G_img == False:
          aux, cells = apply_cyto_mask(images[i], m, masks[i], export_G_img)
          filename = dest_path + names[i] + '.npy'
        else :
          aux, cells = apply_cyto_mask(imgG[i], m, masks[i], export_G_img)
          filename = dest_path + names[i] + 'G.npy'
        if not os.path.isdir(dest_path):
            os.mkdir(dest_path)
        np.save(filename, np.asarray(cells))
        n_cells+=aux
    # for all images, exclude border cells,
    # apply cells' mask
    # and save cells
    else:
        for i in range(np.shape(masks)[0]):
            print(i, '-', np.shape(masks)[0])
            m = exclude_borders(masks[i])
            if export_G_img == False:
              aux, cells = apply_cyto_mask(images[i], m, masks[i], export_G_img)
              filename = dest_path + names[i] + '.npy'
            else :
              aux, cells = apply_cyto_mask(imgG[i], m, masks[i], export_G_img)
              filename = dest_path + names[i] + 'G.npy'
            if not os.path.isdir(dest_path):
                os.mkdir(dest_path)                
            np.save(filename, np.asarray(cells))
            n_cells+=aux
    print(f'Total number of cells: {n_cells}')
      

In [12]:
def apply_cellpose(path, dest, diameter, interrupt=0, export_G_img = False):
    # for all the classes in path given, apply cellpose to their images
    # Set export_G_img to true to export untreated cells with the green channel copied in other channels
    print(f'Green images: {export_G_img}')
    i=0
    mean_diameter = pd.read_csv(diameter)
    n_class = len(glob.glob(path + '/*'))
    for class_path in sorted(glob.glob(path + '/*')): 
        i+=1
        class_name = class_path[len(path)+1:]
        d = int(mean_diameter.loc[np.where(mean_diameter.name == class_name), 'nucleus_diameter'])
        if d>90:
          d=145
        else:
          d = (145*d)/90
        c = dest + class_name
        images, names, imagesG = create_images(class_path=class_path, class_name=class_name, export_G_img = export_G_img)
        #os.mkdir(c)
        print("Running for:", class_name," - ",  i, "/" ,n_class)
        run_cellpose(images, names, dest+class_name+'/', class_path, d, export_G_img, imagesG)
        if interrupt:
          break


## Run cellpose on 2D sample images
Here we run the cellpose algorithm on all the images of every class to generate their cells, while saving them to the drive, untreated.

In [None]:
# do not run again, takes a long time and would be unnecessary
path = "/content/drive/MyDrive/Images/Images by class"
dest = "/content/drive/MyDrive/Images/Cells/Full_image_cells/"
diameter = '/content/drive/MyDrive/Dicts/Avarage_Nucleus.csv'

if not os.path.isdir(dest):
    os.mkdir(dest)
start = time.time()
apply_cellpose(path, dest, diameter, 0, True)

In [14]:
end = time.time()
print(f"Classified images time: {int((end - start)/60)} min")

Classified images time: 0 min


Then we run for the pattern cells given

In [None]:
path = "/content/drive/MyDrive/Images/Patterns"
dest = "/content/drive/MyDrive/Images/Cells/CytoCells_pattern/"
diameter = '/content/drive/MyDrive/Dicts/Avarage_Pattern_Nucleus.csv'

if not os.path.isdir(dest):
    os.mkdir(dest)
    
# isn't slow
start = time.time()
#apply_cellpose(path, dest, diameter, 0, True)

In [None]:
end = time.time()
print(f"Patterns time: {int((end - start)/60)} min" )